--- ValBot/Python/check_intrawiki_section_links.py 2022/03/21 21:23:25 1171 +++ ValBot/Python/check_intrawiki_section_links.py 2022/09/25 23:58:33 1176 @@ -69,6 +69,17 @@ def scan_for_iw_links(page_text): if not '#' in link_text: #pywikibot.stdout('Link doesn\'t have a section anchor in it. Skipping.') continue + + # If this link has an interwiki prefix, it can be ignored + is_interwiki = False + if found_iw_match == False: + for prefix in interwiki_prefixes: + if prefix + ":" in link_text: + #pywikibot.stdout('Skipping link {} because it is an interwiki link.'.format(link_text)) + is_interwiki = True + break + if is_interwiki: + continue # If there is a '{' in the link, then probably it's a link built on transcluded text # like "Quotes/Diary#{{C3}}", which we cannot expand and work with, so skip it @@ -82,7 +93,7 @@ def scan_for_iw_links(page_text): # we're out of luck. if link_text.startswith('/'): link_text = page_name + link_text - pywikibot.stdout('Changed link_text to {} on account of "/".'.format(link_text)) + #pywikibot.stdout('Changed link_text to {} on account of "/".'.format(link_text)) # If this is a relative "../" link, find the parent page and set ourselves to that page, # then remove the relative portion of the link. Note that this is only performed once, @@ -129,22 +140,10 @@ def scan_for_iw_links(page_text): found_iw_match = True break - # If we didn't match the prefix against any intrawiki prefixes, see if it matches - # against an interwiki prefix; if so, this link can be ignored - is_interwiki = False - if found_iw_match == False: - for prefix in interwiki_prefixes: - if prefix + ":" in link_text: - #pywikibot.stdout('Skipping link {} because it is an interwiki link.'.format(link_text)) - is_interwiki = True - break - if is_interwiki: - continue - # If we still haven't turned this match into a URL, something's gone wrong if (found_iw_match == False) or (iw_url == ""): - pywikibot.stdout('ERROR: Couldn\'t figure out link {}. Aborting script.'.format(link_text)) - quit() + pywikibot.stdout('ERROR: Couldn\'t figure out link {}.'.format(link_text)) + continue # Test the URL iw_url = iw_url.replace(' ', '_')