ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/ValBot/Python/check_interwiki_links.py
(Generate patch)

Comparing ValBot/Python/check_interwiki_links.py (file contents):
Revision 1185 by iritscen, Tue Aug 15 02:03:16 2023 UTC vs.
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC

# Line 62 | Line 62 | def find_section(page_text, page_name, p
62     # Read linked page to see if it really has this anchor link
63     soup = BeautifulSoup(page_text, 'html.parser')
64     found_section = False
65 <   for span_tag in soup.findAll('span'): # search for span with ID matching the section name
66 <      span_name = span_tag.get('id', None)
67 <      if span_name == anchor_name:
65 >   for the_tag in soup.findAll('span'): # search for span with ID matching the section name
66 >      tag_name = the_tag.get('id', None)
67 >      if tag_name == anchor_name:
68           found_section = True
69           break
70     if found_section == False:
71 <      for span_tag in soup.findAll('div'): # search for div with ID matching the section name
72 <         span_name = span_tag.get('id', None)
73 <         if span_name == anchor_name:
71 >      for the_tag in soup.findAll('div'): # search for div with ID matching the section name
72 >         tag_name = the_tag.get('id', None)
73 >         if tag_name == anchor_name:
74 >            found_section = True
75 >            break
76 >   if found_section == False:
77 >      for the_tag in soup.findAll('h2'): # search for h2 with ID matching the section name
78 >         tag_name = the_tag.get('id', None)
79 >         if tag_name == anchor_name:
80 >            found_section = True
81 >            break
82 >   if found_section == False:
83 >      for the_tag in soup.findAll('h3'): # search for h3 with ID matching the section name
84 >         tag_name = the_tag.get('id', None)
85 >         if tag_name == anchor_name:
86 >            found_section = True
87 >            break
88 >   if found_section == False:
89 >      for the_tag in soup.findAll('h4'): # search for h4 with ID matching the section name
90 >         tag_name = the_tag.get('id', None)
91 >         if tag_name == anchor_name:
92              found_section = True
93              break
94     if found_section == False:
# Line 161 | Line 179 | def scan_for_interwiki_links(page_text,
179  
180     for prefix in interwiki_prefixes:
181        # Isolate strings that start with "[[prefix:" and end with "|" or "]"
182 <      iw_link = "\[\[" + prefix + ":[^|\]]*(\||\])"
182 >      iw_link = r"\[\[" + prefix + r":[^|\]]*(\||\])"
183        for match in re.finditer(iw_link, page_text):
184           # Extract just the page title from this regex match
185           s = match.start() + 2 + len(prefix) + 1

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)