ValBot/Python/check_interwiki_links.py

# Check Interwiki Links
# by iritscen@yahoo.com
# Looks at each link on a page (or all the pages in a category) which uses a registered interwiki prefix and loads the linked page, verifying that it exists and that
# any section link, if present, is valid as well. The output will use the word "ERROR" when it cannot validate the interwiki link.
# Recommended viewing width:
# |---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----|

import bs4
import pywikibot
import re
import requests # for listing members with dir() when debugging

from bs4 import BeautifulSoup
from pywikibot import pagegenerators
from pywikibot.bot import QuitKeyboardInterrupt
from pywikibot.comms.http import fetch
from pywikibot.specialbots import UploadRobot
from pywikibot.tools.formatter import color_format
from urllib.parse import urljoin

class IWLink:
   def __init__(self, iw_prefix, prefix_url, full_url, page_name, page_name_only, page_slug, hosting_page, curl_response):
      self.iw_prefix = iw_prefix # e.g. "wp"
      self.prefix_url = prefix_url # e.g. "https://en.wikipedia.org/wiki/"
      self.full_url = full_url # e.g. "https://en.wikipedia.org/wiki/Marathon_(series)#Rampancy"
      self.page_name = page_name # "Marathon (series)#Rampancy"
      self.page_name_only = page_name # "Marathon (series)"
      self.page_slug = page_slug # "Marathon_(series)#Rampancy"
      self.hosting_page = hosting_page # "Easter eggs"; page where the link was found
      self.curl_response = curl_response # a class defined in the Requests library

# Parallel arrays based on https://wiki.oni2.net/Special:Interwiki
interwiki_prefixes = ('acronym', 'cache', 'commons', 'dictionary', 'google', 'metawikimedia', 'mw', 'wikibooks', 'wikidata', 'wikimedia', 'wikinews', 'wikipedia', 'wikiquote', 'wikisource', 'wikispecies', 'wikiversity', 'wikivoyage', 'wikt', 'wiktionary', 'wp')

interwiki_urls = ('http://www.acronymfinder.com/~/search/af.aspx?string=exact&Acronym=', 'http://www.google.com/search?q=cache:', 'https://commons.wikimedia.org/wiki/', 'http://www.dict.org/bin/Dict?Database=*&Form=Dict1&Strategy=*&Query=', 'http://www.google.com/search?q=', 'https://meta.wikimedia.org/wiki/', 'https://www.mediawiki.org/wiki/', 'https://en.wikibooks.org/wiki/', 'https://www.wikidata.org/wiki/', 'https://foundation.wikimedia.org/wiki/', 'https://en.wikinews.org/wiki/', 'https://en.wikipedia.org/wiki/', 'https://en.wikiquote.org/wiki/', 'https://wikisource.org/wiki/', 'https://species.wikimedia.org/wiki/', 'https://en.wikiversity.org/wiki/', 'https://en.wikivoyage.org/wiki/', 'https://en.wiktionary.org/wiki/', 'https://en.wiktionary.org/wiki/', 'https://en.wikipedia.org/wiki/')

# Initialize globals
debug = 0
pages_checked = 0
iw_found = 0
errors_issued = 0
unintended_redirects_found = 0
name_printed = 0

# Prints the name of a page on which something occurred, if it has not been printed before
def possibly_print(the_link):
   global debug
   global name_printed
   
   if not name_printed and not debug:
      pywikibot.stdout('')
      pywikibot.stdout('From page "{}":'.format(the_link.hosting_page))
      name_printed = 1

# Search a page for the section specified in the link
def find_section(the_link, print_result):
   global errors_issued

   # Isolate section link
   _, anchor_name = the_link.page_slug.split('#')
   
   # Convert dot-notation hex entities to proper characters
   replacements = [(r'\.22', '"'), (r'\.27', "'"), (r'\.28', '('), (r'\.29', ')')]
   for pattern, replacement in replacements:
      anchor_name = re.sub(pattern, replacement, anchor_name)
   
   # Read linked page to see if it really has this anchor link
   soup = BeautifulSoup(the_link.curl_response.text, 'html.parser')
   tags_to_search = ['span', 'div', 'h2', 'h3', 'h4']
   found_section = False
   for tag_name in tags_to_search:
       for the_tag in soup.find_all(tag_name):
           if the_tag.get('id') == anchor_name:
               found_section = True
               break
       if found_section:
           break
   
   # Tell user what we found
   if found_section == False:
      possibly_print(the_link)
      pywikibot.stdout('   ERROR: Could not find section "{0}" on {1} page "{2}".'.format(anchor_name, the_link.iw_prefix, the_link.page_name))
      errors_issued = errors_issued + 1
   elif print_result == True:
      pywikibot.stdout('   The section "{0}" was found on {1} page "{2}".'.format(anchor_name, the_link.iw_prefix, the_link.page_name))

# For a link that redirected us to another page, extract the name of the target page from the target page's source
def find_canonical_link(the_link):
   # Extract link from this markup which contains name of redirected-to page:
   # <link rel="canonical" href="https://en.wikipedia.org/wiki/Page_name"/>
   canonical_name = the_link.curl_response.text.split('<link rel="canonical" href="')[-1]
   prefix_length = len(the_link.prefix_url)
   canonical_name = canonical_name[prefix_length:]
   tag_end = canonical_name.find('">')
   
   if tag_end == -1:
      pywikibot.stdout('   ERROR: The {0} link "{1}" is a redirect page, but this script could not isolate the target page name.'.format(the_link.iw_prefix, the_link.page_slug))
      errors_issued = errors_issued + 1
   else:
      canonical_name = canonical_name[:tag_end]
      if len(canonical_name) > 100:
         # Certain things can cause the trim to fail; report error and avoid slamming the output with massive page source from a failed trim
         pywikibot.stdout('   ERROR: The {0} link "{1}" is a redirect to "{2}…" (string overflow).'.format(the_link.iw_prefix, the_link.page_slug, canonical_name[:100]))
         errors_issued = errors_issued + 1
      else:
         the_link.page_name = canonical_name.replace('_', ' ')
         if '#' in the_link.page_slug:
            the_link.page_name_only, _ = the_link.page_slug.split('#')
            pywikibot.stdout('   The {0} link "{1}" is a redirect to "{2}", which is a valid page. Checking for section on that page….'.format(the_link.iw_prefix, the_link.page_name_only, the_link.page_name))
            find_section(the_link, True)
         else:
            pywikibot.stdout('   The {0} link "{1}" is a redirect to "{2}", which is a valid page.'.format(the_link.iw_prefix, the_link.page_slug, the_link.page_name))

# Test an interwiki link and look for a section link if applicable
def test_interwiki_link(the_link):
   global errors_issued
   global unintended_redirects_found
   
   the_link.curl_response = fetch(the_link.full_url)

   # One way we tell that a redirect occurred is by checking fetch's history, as it automatically follows redirects. This will catch formal redirects which come from
   # pages such as Special:PermanentLink.
   if the_link.curl_response.history != []:
      possibly_print(the_link)
      
      # If linked page is in all caps, e.g. WP:BEANS, it's likely a deliberate use of a redirect
      if the_link.page_slug.startswith('WP:') and the_link.page_slug == the_link.page_slug.upper():
         pywikibot.stdout('   Got redirection code "{0}" for {1} link "{2}". This appears to be a deliberate use of a Wikipedia shortcut. Checking the target page….'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
         find_canonical_link(the_link)
      else:
         permalink1 = 'Special:PermanentLink/'.lower()
         permalink2 = 'Special:Permalink/'.lower()
         page_slug_lower = the_link.page_slug.lower()
         if page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2):
            pywikibot.stdout('   Got redirection code "{0}" for {1} permanent revision link "{2}". Checking the target page….'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
            find_canonical_link(the_link)
         else:
            pywikibot.stdout('   ERROR: Unrecognized type of redirection (code "{0}") for {1} link "{2}". You should check the link manually.'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
            errors_issued = errors_issued + 1
   elif the_link.curl_response.status_code != 200:
      possibly_print(the_link)
      pywikibot.stdout('   ERROR: Got response code {0} for {1} link "{2}". The page may not exist.'.format(the_link.curl_response.status_code, the_link.iw_prefix, the_link.page_slug))
      errors_issued = errors_issued + 1
   # However the usual way that a redirect occurs is that MediaWiki redirects us sneakily using JavaScript, while returning code OK 200 as if the link was correct; this
   # happens when a redirect page is accessed. We must detect these soft redirects by looking at the page source to find the redirect note inserted at the top of the
   # page for the reader.
   elif 'Redirected from <a' in the_link.curl_response.text:
      unintended_redirects_found = unintended_redirects_found + 1
      possibly_print(the_link)
      pywikibot.stdout('   WARNING: Got silently redirected by {0} link "{1}". Checking the target page….'.format(the_link.iw_prefix, the_link.page_slug))
      find_canonical_link(the_link) # calls find_section() at end
   elif '#' in the_link.page_slug:
      find_section(the_link, False)

# Searches the given page text for interwiki links
def scan_for_interwiki_links(page_text, page_name):
   global debug
   global pages_checked
   global iw_found
   global name_printed
   pages_checked = pages_checked + 1
   cur_prefix = 0
   name_printed = 0

   for prefix in interwiki_prefixes:
      # Isolate strings that start with "[[prefix:" and end with "|" or "]"
      iw_link = r"\[\[" + prefix + r":[^|\]]*(\||\])"
      for match in re.finditer(iw_link, page_text):
         the_link = IWLink(prefix, interwiki_urls[cur_prefix], "", "", "", "", page_name, "")
      
         # Extract just the page title from this regex match
         s = match.start() + 2 + len(the_link.iw_prefix) + 1
         e = match.end() - 1

         # Use underscores in slug used to constructed URL, but retain spaces for printable name
         the_link.page_slug = page_text[s:e].replace(' ', '_')
         the_link.page_name = page_text[s:e]
         if debug: pywikibot.stdout('   Validating {0} link "{1}"'.format(the_link.iw_prefix, the_link.page_name))
         iw_found = iw_found + 1

         # Construct full URL for the particular wiki
         the_link.full_url = the_link.prefix_url + the_link.page_slug

         # Adjust URL if this is a foreign-language WP link
         if re.match("^[a-zA-Z]{2}:", the_link.page_slug):
            lang_code = the_link.page_slug[0:2] + "."
            # "wp:" is the Wikipedia: namespace, not a language
            if lang_code != "wp." and lang_code != "WP.":
               the_link.full_url = the_link.full_url.replace('en.', lang_code)
               the_link.full_url = the_link.full_url.replace(the_link.page_slug[0:3], '')

         # Test the URL
         test_interwiki_link(the_link)
      cur_prefix = cur_prefix + 1

# Print a wrap-up message
def print_summary():
   global pages_checked
   global iw_found
   global errors_issued
   global unintended_redirects_found

   page_str = "pages"
   if pages_checked == 1:
      page_str = "page"

   link_str = "links"
   if iw_found == 1:
      link_str = "link"

   pywikibot.stdout('Checked {0} {1} and found {2} interwiki {3}.'.format(pages_checked, page_str, iw_found, link_str))

   error_str = "errors were"
   if errors_issued == 1:
      error_str = "error was"

   pywikibot.stdout('{0} {1} encountered in validating these links.'.format(errors_issued, error_str))

   warning_str = "likely-unintended redirects were"
   if unintended_redirects_found == 1:
      warning_str = "likely-unintended redirect was"

   pywikibot.stdout('{0} {1} encountered in validating these links.'.format(unintended_redirects_found, warning_str))

# Main function
def main(*args):
   global debug
   search_cat = ''
   search_page = ''

   # Process arguments
   local_args = pywikibot.handle_args(args)
   for arg in local_args:
      if arg.startswith('-cat:'):
         search_cat = arg[5:]
      elif arg.startswith('-page:'):
         search_page = arg[6:]
      elif arg == '-dbg':
         debug = 1
      else:
         pywikibot.stdout('Unknown argument "{}". Exiting.'.format(arg))
         return

   #pywikibot.stdout('The members of the requests.models.Response class are:')
   #pywikibot.stdout(format(dir(requests.models.Response)))
   #return
   
   # Check specified page or loop through specified category and check all pages
   site = pywikibot.Site()
   if search_cat != '':
      cat_obj = pywikibot.Category(site, search_cat)
      generator = pagegenerators.CategorizedPageGenerator(cat_obj, recurse=True)
      for page in pagegenerators.PreloadingGenerator(generator, 100):
         if debug: pywikibot.stdout('Checking page "{}"'.format(page.title()))
         scan_for_interwiki_links(page.text, page.title())
   elif search_page != '':
      page = pywikibot.Page(site, search_page)
      if debug: pywikibot.stdout('Checking page "{}"'.format(page.title()))
      scan_for_interwiki_links(page.text, page.title())

   # Print the results
   print_summary()

if __name__ == '__main__':
   main()
Revision:	1200
Committed:	Mon Sep 29 19:18:16 2025 UTC (9 days, 20 hours ago) by iritscen
Content type:	text/x-python
File size:	13358 byte(s)
Log Message:	Reverted last commit now that test is done.
#	Content
1	# Check Interwiki Links
2	# by iritscen@yahoo.com
3	# Looks at each link on a page (or all the pages in a category) which uses a registered interwiki prefix and loads the linked page, verifying that it exists and that
4	# any section link, if present, is valid as well. The output will use the word "ERROR" when it cannot validate the interwiki link.
5	# Recommended viewing width:
6	# \|---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----\|
7
8	import bs4
9	import pywikibot
10	import re
11	import requests # for listing members with dir() when debugging
12
13	from bs4 import BeautifulSoup
14	from pywikibot import pagegenerators
15	from pywikibot.bot import QuitKeyboardInterrupt
16	from pywikibot.comms.http import fetch
17	from pywikibot.specialbots import UploadRobot
18	from pywikibot.tools.formatter import color_format
19	from urllib.parse import urljoin
20
21	class IWLink:
22	def __init__(self, iw_prefix, prefix_url, full_url, page_name, page_name_only, page_slug, hosting_page, curl_response):
23	self.iw_prefix = iw_prefix # e.g. "wp"
24	self.prefix_url = prefix_url # e.g. "https://en.wikipedia.org/wiki/"
25	self.full_url = full_url # e.g. "https://en.wikipedia.org/wiki/Marathon_(series)#Rampancy"
26	self.page_name = page_name # "Marathon (series)#Rampancy"
27	self.page_name_only = page_name # "Marathon (series)"
28	self.page_slug = page_slug # "Marathon_(series)#Rampancy"
29	self.hosting_page = hosting_page # "Easter eggs"; page where the link was found
30	self.curl_response = curl_response # a class defined in the Requests library
31
32	# Parallel arrays based on https://wiki.oni2.net/Special:Interwiki
33	interwiki_prefixes = ('acronym', 'cache', 'commons', 'dictionary', 'google', 'metawikimedia', 'mw', 'wikibooks', 'wikidata', 'wikimedia', 'wikinews', 'wikipedia', 'wikiquote', 'wikisource', 'wikispecies', 'wikiversity', 'wikivoyage', 'wikt', 'wiktionary', 'wp')
34
35	interwiki_urls = ('http://www.acronymfinder.com/~/search/af.aspx?string=exact&Acronym=', 'http://www.google.com/search?q=cache:', 'https://commons.wikimedia.org/wiki/', 'http://www.dict.org/bin/Dict?Database=&Form=Dict1&Strategy=&Query=', 'http://www.google.com/search?q=', 'https://meta.wikimedia.org/wiki/', 'https://www.mediawiki.org/wiki/', 'https://en.wikibooks.org/wiki/', 'https://www.wikidata.org/wiki/', 'https://foundation.wikimedia.org/wiki/', 'https://en.wikinews.org/wiki/', 'https://en.wikipedia.org/wiki/', 'https://en.wikiquote.org/wiki/', 'https://wikisource.org/wiki/', 'https://species.wikimedia.org/wiki/', 'https://en.wikiversity.org/wiki/', 'https://en.wikivoyage.org/wiki/', 'https://en.wiktionary.org/wiki/', 'https://en.wiktionary.org/wiki/', 'https://en.wikipedia.org/wiki/')
36
37	# Initialize globals
38	debug = 0
39	pages_checked = 0
40	iw_found = 0
41	errors_issued = 0
42	unintended_redirects_found = 0
43	name_printed = 0
44
45	# Prints the name of a page on which something occurred, if it has not been printed before
46	def possibly_print(the_link):
47	global debug
48	global name_printed
49
50	if not name_printed and not debug:
51	pywikibot.stdout('')
52	pywikibot.stdout('From page "{}":'.format(the_link.hosting_page))
53	name_printed = 1
54
55	# Search a page for the section specified in the link
56	def find_section(the_link, print_result):
57	global errors_issued
58
59	# Isolate section link
60	_, anchor_name = the_link.page_slug.split('#')
61
62	# Convert dot-notation hex entities to proper characters
63	replacements = [(r'\.22', '"'), (r'\.27', "'"), (r'\.28', '('), (r'\.29', ')')]
64	for pattern, replacement in replacements:
65	anchor_name = re.sub(pattern, replacement, anchor_name)
66
67	# Read linked page to see if it really has this anchor link
68	soup = BeautifulSoup(the_link.curl_response.text, 'html.parser')
69	tags_to_search = ['span', 'div', 'h2', 'h3', 'h4']
70	found_section = False
71	for tag_name in tags_to_search:
72	for the_tag in soup.find_all(tag_name):
73	if the_tag.get('id') == anchor_name:
74	found_section = True
75	break
76	if found_section:
77	break
78
79	# Tell user what we found
80	if found_section == False:
81	possibly_print(the_link)
82	pywikibot.stdout(' ERROR: Could not find section "{0}" on {1} page "{2}".'.format(anchor_name, the_link.iw_prefix, the_link.page_name))
83	errors_issued = errors_issued + 1
84	elif print_result == True:
85	pywikibot.stdout(' The section "{0}" was found on {1} page "{2}".'.format(anchor_name, the_link.iw_prefix, the_link.page_name))
86
87	# For a link that redirected us to another page, extract the name of the target page from the target page's source
88	def find_canonical_link(the_link):
89	# Extract link from this markup which contains name of redirected-to page:
90	# <link rel="canonical" href="https://en.wikipedia.org/wiki/Page_name"/>
91	canonical_name = the_link.curl_response.text.split('<link rel="canonical" href="')[-1]
92	prefix_length = len(the_link.prefix_url)
93	canonical_name = canonical_name[prefix_length:]
94	tag_end = canonical_name.find('">')
95
96	if tag_end == -1:
97	pywikibot.stdout(' ERROR: The {0} link "{1}" is a redirect page, but this script could not isolate the target page name.'.format(the_link.iw_prefix, the_link.page_slug))
98	errors_issued = errors_issued + 1
99	else:
100	canonical_name = canonical_name[:tag_end]
101	if len(canonical_name) > 100:
102	# Certain things can cause the trim to fail; report error and avoid slamming the output with massive page source from a failed trim
103	pywikibot.stdout(' ERROR: The {0} link "{1}" is a redirect to "{2}…" (string overflow).'.format(the_link.iw_prefix, the_link.page_slug, canonical_name[:100]))
104	errors_issued = errors_issued + 1
105	else:
106	the_link.page_name = canonical_name.replace('_', ' ')
107	if '#' in the_link.page_slug:
108	the_link.page_name_only, _ = the_link.page_slug.split('#')
109	pywikibot.stdout(' The {0} link "{1}" is a redirect to "{2}", which is a valid page. Checking for section on that page….'.format(the_link.iw_prefix, the_link.page_name_only, the_link.page_name))
110	find_section(the_link, True)
111	else:
112	pywikibot.stdout(' The {0} link "{1}" is a redirect to "{2}", which is a valid page.'.format(the_link.iw_prefix, the_link.page_slug, the_link.page_name))
113
114	# Test an interwiki link and look for a section link if applicable
115	def test_interwiki_link(the_link):
116	global errors_issued
117	global unintended_redirects_found
118
119	the_link.curl_response = fetch(the_link.full_url)
120
121	# One way we tell that a redirect occurred is by checking fetch's history, as it automatically follows redirects. This will catch formal redirects which come from
122	# pages such as Special:PermanentLink.
123	if the_link.curl_response.history != []:
124	possibly_print(the_link)
125
126	# If linked page is in all caps, e.g. WP:BEANS, it's likely a deliberate use of a redirect
127	if the_link.page_slug.startswith('WP:') and the_link.page_slug == the_link.page_slug.upper():
128	pywikibot.stdout(' Got redirection code "{0}" for {1} link "{2}". This appears to be a deliberate use of a Wikipedia shortcut. Checking the target page….'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
129	find_canonical_link(the_link)
130	else:
131	permalink1 = 'Special:PermanentLink/'.lower()
132	permalink2 = 'Special:Permalink/'.lower()
133	page_slug_lower = the_link.page_slug.lower()
134	if page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2):
135	pywikibot.stdout(' Got redirection code "{0}" for {1} permanent revision link "{2}". Checking the target page….'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
136	find_canonical_link(the_link)
137	else:
138	pywikibot.stdout(' ERROR: Unrecognized type of redirection (code "{0}") for {1} link "{2}". You should check the link manually.'.format(the_link.curl_response.history[0], the_link.iw_prefix, the_link.page_slug))
139	errors_issued = errors_issued + 1
140	elif the_link.curl_response.status_code != 200:
141	possibly_print(the_link)
142	pywikibot.stdout(' ERROR: Got response code {0} for {1} link "{2}". The page may not exist.'.format(the_link.curl_response.status_code, the_link.iw_prefix, the_link.page_slug))
143	errors_issued = errors_issued + 1
144	# However the usual way that a redirect occurs is that MediaWiki redirects us sneakily using JavaScript, while returning code OK 200 as if the link was correct; this
145	# happens when a redirect page is accessed. We must detect these soft redirects by looking at the page source to find the redirect note inserted at the top of the
146	# page for the reader.
147	elif 'Redirected from <a' in the_link.curl_response.text:
148	unintended_redirects_found = unintended_redirects_found + 1
149	possibly_print(the_link)
150	pywikibot.stdout(' WARNING: Got silently redirected by {0} link "{1}". Checking the target page….'.format(the_link.iw_prefix, the_link.page_slug))
151	find_canonical_link(the_link) # calls find_section() at end
152	elif '#' in the_link.page_slug:
153	find_section(the_link, False)
154
155	# Searches the given page text for interwiki links
156	def scan_for_interwiki_links(page_text, page_name):
157	global debug
158	global pages_checked
159	global iw_found
160	global name_printed
161	pages_checked = pages_checked + 1
162	cur_prefix = 0
163	name_printed = 0
164
165	for prefix in interwiki_prefixes:
166	# Isolate strings that start with "[[prefix:" and end with "\|" or "]"
167	iw_link = r"\[\[" + prefix + r":[^\|\]]*(\\|\|\])"
168	for match in re.finditer(iw_link, page_text):
169	the_link = IWLink(prefix, interwiki_urls[cur_prefix], "", "", "", "", page_name, "")
170
171	# Extract just the page title from this regex match
172	s = match.start() + 2 + len(the_link.iw_prefix) + 1
173	e = match.end() - 1
174
175	# Use underscores in slug used to constructed URL, but retain spaces for printable name
176	the_link.page_slug = page_text[s:e].replace(' ', '_')
177	the_link.page_name = page_text[s:e]
178	if debug: pywikibot.stdout(' Validating {0} link "{1}"'.format(the_link.iw_prefix, the_link.page_name))
179	iw_found = iw_found + 1
180
181	# Construct full URL for the particular wiki
182	the_link.full_url = the_link.prefix_url + the_link.page_slug
183
184	# Adjust URL if this is a foreign-language WP link
185	if re.match("^[a-zA-Z]{2}:", the_link.page_slug):
186	lang_code = the_link.page_slug[0:2] + "."
187	# "wp:" is the Wikipedia: namespace, not a language
188	if lang_code != "wp." and lang_code != "WP.":
189	the_link.full_url = the_link.full_url.replace('en.', lang_code)
190	the_link.full_url = the_link.full_url.replace(the_link.page_slug[0:3], '')
191
192	# Test the URL
193	test_interwiki_link(the_link)
194	cur_prefix = cur_prefix + 1
195
196	# Print a wrap-up message
197	def print_summary():
198	global pages_checked
199	global iw_found
200	global errors_issued
201	global unintended_redirects_found
202
203	page_str = "pages"
204	if pages_checked == 1:
205	page_str = "page"
206
207	link_str = "links"
208	if iw_found == 1:
209	link_str = "link"
210
211	pywikibot.stdout('Checked {0} {1} and found {2} interwiki {3}.'.format(pages_checked, page_str, iw_found, link_str))
212
213	error_str = "errors were"
214	if errors_issued == 1:
215	error_str = "error was"
216
217	pywikibot.stdout('{0} {1} encountered in validating these links.'.format(errors_issued, error_str))
218
219	warning_str = "likely-unintended redirects were"
220	if unintended_redirects_found == 1:
221	warning_str = "likely-unintended redirect was"
222
223	pywikibot.stdout('{0} {1} encountered in validating these links.'.format(unintended_redirects_found, warning_str))
224
225	# Main function
226	def main(*args):
227	global debug
228	search_cat = ''
229	search_page = ''
230
231	# Process arguments
232	local_args = pywikibot.handle_args(args)
233	for arg in local_args:
234	if arg.startswith('-cat:'):
235	search_cat = arg[5:]
236	elif arg.startswith('-page:'):
237	search_page = arg[6:]
238	elif arg == '-dbg':
239	debug = 1
240	else:
241	pywikibot.stdout('Unknown argument "{}". Exiting.'.format(arg))
242	return
243
244	#pywikibot.stdout('The members of the requests.models.Response class are:')
245	#pywikibot.stdout(format(dir(requests.models.Response)))
246	#return
247
248	# Check specified page or loop through specified category and check all pages
249	site = pywikibot.Site()
250	if search_cat != '':
251	cat_obj = pywikibot.Category(site, search_cat)
252	generator = pagegenerators.CategorizedPageGenerator(cat_obj, recurse=True)
253	for page in pagegenerators.PreloadingGenerator(generator, 100):
254	if debug: pywikibot.stdout('Checking page "{}"'.format(page.title()))
255	scan_for_interwiki_links(page.text, page.title())
256	elif search_page != '':
257	page = pywikibot.Page(site, search_page)
258	if debug: pywikibot.stdout('Checking page "{}"'.format(page.title()))
259	scan_for_interwiki_links(page.text, page.title())
260
261	# Print the results
262	print_summary()
263
264	if __name__ == '__main__':
265	main()