[ViewVC] Diff of: Oni2/ValBot/Python/check_interwiki

Comparing ValBot/Python/check_interwiki_links.py (file contents):
Revision 1170 by iritscen, Mon Mar 21 21:22:33 2022 UTC vs.
Revision 1174 by iritscen, Tue Jun 28 22:11:41 2022 UTC

+import pywikibot
+import re
-<
+import requests
->
+import requests # for listing members with dir()
+from pywikibot.bot import QuitKeyboardInterrupt
+from pywikibot import pagegenerators
+            # Sometimes we used a space char. instead of a '_', so fix that before querying
+            page_title = page_text[s:e].replace(' ', '_')
-+
+            # Use only spaces for title when printing it
-+
+            page_title_human = page_title.replace('_', ' ')
-+
+            pywikibot.stdout('   Validating {0} link "{1}"'.format(prefix, page_title_human))
-+
+            iw_found = iw_found + 1
-+
+            # Construct full URL for the particular wiki
+            iw_url = interwiki_urls[cur] + page_title
-–
+            pywikibot.stdout('   Validating {0} link "{1}"'.format(prefix, page_title))
-–
+            iw_found = iw_found + 1
+            # Adjust URL if this is a foreign-language WP link
+            if re.match("^[a-zA-Z]{2}:", page_title):
+                    iw_url = iw_url.replace(page_title[0:3], '')
+            # Test the URL
-–
+            #pywikibot.stdout('   Testing URL "{}"'.format(iw_url))
+            response = fetch(iw_url)
-<
+            # Redirects are followed automatically by fetch() and treated as "200"s, so the
-<
+            # way we tell that a redirect occurred is by checking the history
->
+            # One way we tell that a redirect occurred is by checking the history
+            if response.history != []:
+                pywikibot.stdout('   ERROR: Got redirection code ({0}) on URL "{1}".'.format(response.history[0], iw_url))
+                errors_issued = errors_issued + 1
+            elif response.status_code != 200:
+                pywikibot.stdout('   ERROR: Got response code {0} on URL "{1}".'.format(response.status_code, iw_url))
+                errors_issued = errors_issued + 1
-+
+            # The usual way that a redirect occurs is that MediaWiki redirects us sneakily
-+
+            # using JavaScript, while returning code OK 200 as if the link was correct; we
-+
+            # must detect this from the page source
-+
+            elif 'Redirected from <a' in response.text:
-+
+                # Extract link from this source which contains name of redirected-to page:
-+
+                # <link rel="canonical" href="https://en.wikipedia.org/wiki/Page_name"/>
-+
+                canonical_name = response.text.split('<link rel="canonical" href="')[-1]
-+
+                prefix_length = len(interwiki_urls[cur])
-+
+                canonical_name = canonical_name[prefix_length:]
-+
+                tag_end = canonical_name.find('"/>')
-+
+                if tag_end == -1:
-+
+                   pywikibot.stdout('   ERROR: This is a redirect page (but I could not isolate the correct page name).')
-+
+                else:
-+
+                   canonical_name = canonical_name[:tag_end]
-+
+                   if len(canonical_name) > 100:
-+
+                      # Certain things can cause the trim to fail; here we avoid slamming
-+
+                      # the output with massive page source from a failed trim
-+
+                      pywikibot.stdout('   ERROR: This is a redirect to "{}" (string trimmed to 100 chars due to excessive length).'.format(canonical_name[:100]))
-+
+                   else:
-+
+                      canonical_name = canonical_name.replace('_', ' ')
-+
+                      pywikibot.stdout('   ERROR: This is a redirect to "{}".'.format(canonical_name))
-+
+                errors_issued = errors_issued + 1
+            elif '#' in page_title:
+                # Isolate section link
+                page_name, anchor_name = page_title.split('#')
+                for span_tag in soup.findAll('span'):
+                    span_name = span_tag.get('id', None)
+                    if span_name == anchor_name:
-–
+                        #pywikibot.stdout('Found section!')
+                        found_section = True
+                        break
+                if found_section == False:

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing ValBot/Python/check_interwiki_links.py (file contents): Revision 1170 by iritscen, Mon Mar 21 21:22:33 2022 UTC vs. Revision 1174 by iritscen, Tue Jun 28 22:11:41 2022 UTC

Diff Legend

Comparing ValBot/Python/check_interwiki_links.py (file contents):
Revision 1170 by iritscen, Mon Mar 21 21:22:33 2022 UTC vs.
Revision 1174 by iritscen, Tue Jun 28 22:11:41 2022 UTC