[ViewVC] Diff of: Oni2/ValBot/Python/check_interwiki

Comparing ValBot/Python/check_interwiki_links.py (file contents):
Revision 1185 by iritscen, Tue Aug 15 02:03:16 2023 UTC vs.
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC

+   # Read linked page to see if it really has this anchor link
+   soup = BeautifulSoup(page_text, 'html.parser')
+   found_section = False
-<
+   for span_tag in soup.findAll('span'): # search for span with ID matching the section name
-<
+      span_name = span_tag.get('id', None)
-<
+      if span_name == anchor_name:
->
+   for the_tag in soup.findAll('span'): # search for span with ID matching the section name
->
+      tag_name = the_tag.get('id', None)
->
+      if tag_name == anchor_name:
+         found_section = True
+         break
+   if found_section == False:
-<
+      for span_tag in soup.findAll('div'): # search for div with ID matching the section name
-<
+         span_name = span_tag.get('id', None)
-<
+         if span_name == anchor_name:
->
+      for the_tag in soup.findAll('div'): # search for div with ID matching the section name
->
+         tag_name = the_tag.get('id', None)
->
+         if tag_name == anchor_name:
->
+            found_section = True
->
+            break
->
+   if found_section == False:
->
+      for the_tag in soup.findAll('h2'): # search for h2 with ID matching the section name
->
+         tag_name = the_tag.get('id', None)
->
+         if tag_name == anchor_name:
->
+            found_section = True
->
+            break
->
+   if found_section == False:
->
+      for the_tag in soup.findAll('h3'): # search for h3 with ID matching the section name
->
+         tag_name = the_tag.get('id', None)
->
+         if tag_name == anchor_name:
->
+            found_section = True
->
+            break
->
+   if found_section == False:
->
+      for the_tag in soup.findAll('h4'): # search for h4 with ID matching the section name
->
+         tag_name = the_tag.get('id', None)
->
+         if tag_name == anchor_name:
+            found_section = True
+            break
+   if found_section == False:
+   for prefix in interwiki_prefixes:
+      # Isolate strings that start with "[[prefix:" and end with "|" or "]"
-<
+      iw_link = "\[\[" + prefix + ":[^|\]]*(\||\])"
->
+      iw_link = r"\[\[" + prefix + r":[^|\]]*(\||\])"
+      for match in re.finditer(iw_link, page_text):
+         # Extract just the page title from this regex match
+         s = match.start() + 2 + len(prefix) + 1

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing ValBot/Python/check_interwiki_links.py (file contents): Revision 1185 by iritscen, Tue Aug 15 02:03:16 2023 UTC vs. Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC

Diff Legend

Comparing ValBot/Python/check_interwiki_links.py (file contents):
Revision 1185 by iritscen, Tue Aug 15 02:03:16 2023 UTC vs.
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC