ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/ValBot/check_interwiki_links.py
(Generate patch)

Comparing ValBot/check_interwiki_links.py (file contents):
Revision 1151 by iritscen, Mon Mar 8 14:15:52 2021 UTC vs.
Revision 1152 by iritscen, Wed Mar 31 16:29:48 2021 UTC

# Line 42 | Line 42 | def scan_for_iw_links(page_text):
42  
43              # Construct full URL for the particular wiki
44              iw_url = interwiki_urls[cur] + page_title
45 <            pywikibot.output('Found {0} link {1}'.format(prefix, page_title))
45 >            pywikibot.output('Found {0} link {1}.'.format(prefix, page_title))
46              iw_found = iw_found + 1
47  
48              # Adjust URL if this is a foreign-language WP link
# Line 54 | Line 54 | def scan_for_iw_links(page_text):
54                      iw_url = iw_url.replace(page_title[0:3], '')
55  
56              # Test the URL
57 <            #pywikibot.output('Testing URL {}'.format(iw_url))
57 >            #pywikibot.output('Testing URL {}...'.format(iw_url))
58              response = fetch(iw_url)
59  
60              # Redirects are followed automatically by fetch() and treated as "200"s, so the
61              # way we tell that a redirect occurred is by checking the history
62              if response.history != []:
63 <                pywikibot.output('WARNING: Initially got {}.'.format(response.history))
63 >                pywikibot.output('WARNING: Redirected from {}.'.format(response.history))
64                  problems_found = problems_found + 1
65              elif response.status_code != 200:
66                  #pywikibot.output('WARNING: Got response code {}.'.format(response.status_code)) # commented out because fetch() already prints such a msg
67                  problems_found = problems_found + 1
68 +            elif '#' in page_title:
69 +                # Isolate section link
70 +                pywikibot.output('Detected section link on page {0}.'.format(page_title))
71 +                page_name, anchor_name = page_title.split('#')
72 +                
73 +                # Convert dot-notation hex entities to proper characters
74 +                anchor_name = anchor_name.replace('.22', '"')
75 +                anchor_name = anchor_name.replace('.27', '\'')
76 +                anchor_name = anchor_name.replace('.28', '(')
77 +                anchor_name = anchor_name.replace('.29', ')')
78 +                
79 +                # Read linked page to see if it really has this anchor link
80 +                soup = BeautifulSoup(response.text, 'html.parser')
81 +                found_section = False
82 +                for tag in soup.findAll('a'):
83 +                    link = tag.get('href', None)
84 +                    if not link:
85 +                        #pywikibot.output('It is not a link.')
86 +                        continue
87 +                    #pywikibot.output('Got link {0}.'.format(link))
88 +                    if not link.startswith('#'):
89 +                        continue
90 +                        
91 +                    if link == '#' + anchor_name:
92 +                        pywikibot.output('Found section link!')
93 +                        found_section = True
94 +                        break
95 +                if found_section == False:
96 +                    pywikibot.output('Could not find section {0} on page {1}.'.format(anchor_name, page_name))
97 +                    problems_found = problems_found + 1
98          cur = cur + 1
99  
100   def main(*args):

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)