[ViewVC] Diff of: Oni2/ValBot/Python/check_intrawiki_section

Comparing ValBot/Python/check_intrawiki_section_links.py (file contents):
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC vs.
Revision 1194 by iritscen, Mon Nov 18 04:00:08 2024 UTC

+# Search a page for the section specified in the link
+def find_section(page_text, page_name, page_slug, print_result):
+   global errors_issued
-<
-<
+   # Isolate section link
->
+   found_section = False
->
->
+   # Isolate section link or text fragment link
+   target_page_name, anchor_name = page_slug.split('#', 1)
+   target_page_name_human = target_page_name.replace('_', ' ')
-<
+   if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
-<
-<
+   # Read linked page to see if it really has this anchor link
-<
+   soup = BeautifulSoup(page_text, 'html.parser')
-<
+   found_section = False
-<
+   for span_tag in soup.findAll('span'):
-<
+      span_name = span_tag.get('id', None)
-<
+      if span_name == anchor_name:
-<
+         if debug and not print_result: pywikibot.stdout('         Found section in a span!')
->
->
+   # First check if this is a text fragment directive, and look for it if so
->
+   if anchor_name.startswith(':~:text='):
->
+      if debug: pywikibot.stdout('         Found text fragment directive {} from URL {}.'.format(anchor_name, page_slug))
->
+      anchor_name = anchor_name[8:]
->
+      # We're only checking the first text directive, so strip add'l ones if present
->
+      addl_fragment = anchor_name.find('&text=')
->
+      if addl_fragment != -1:
->
+         anchor_name = anchor_name[:addl_fragment]
->
+      search_terms = anchor_name.split(',')
->
+      # Delete prefix and suffix terms because they aren't needed
->
+      if search_terms[0].endswith('-'):
->
+         search_terms.pop(0)
->
+      if search_terms[-1].startswith('-'):
->
+         search_terms.pop()
->
+      # Remake text directive with the terms separated by spaces as they should be in the page text
->
+      newSep = ' '
->
+      search_string = newSep.join(search_terms)
->
+      if debug: pywikibot.stdout('         Converted text fragment to string "{}".'.format(search_string))
->
+      if search_string in page_text:
+         found_section = True
-<
+         break
->
+         if debug and not print_result: pywikibot.stdout('         Found text fragment!')
->
->
+   # If we're still here, it's a section link; read linked page to see if it really has this
->
+   # anchor link
->
+   if found_section == False:
->
+      if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
->
+      soup = BeautifulSoup(page_text, 'html.parser')
->
+      # Search for a span with this ID
->
+      for span_tag in soup.findAll('span'):
->
+         span_name = span_tag.get('id', None)
->
+         if span_name == anchor_name:
->
+            if debug and not print_result: pywikibot.stdout('         Found section in a span!')
->
+            found_section = True
->
+            break
+   if found_section == False:
+      # Search for a div with this ID
+      for span_tag in soup.findAll('div'):
+   # automatically follows redirects. This will catch formal redirects which come from pages
+   # such as Special:PermanentLink.
+   if response.history != []:
-–
+      permalink1 = 'Special:PermanentLink/'.lower()
+      permalink2 = 'Special:Permalink/'.lower()
+      page_slug_lower = page_slug.lower()

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing ValBot/Python/check_intrawiki_section_links.py (file contents): Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC vs. Revision 1194 by iritscen, Mon Nov 18 04:00:08 2024 UTC

Diff Legend

Comparing ValBot/Python/check_intrawiki_section_links.py (file contents):
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC vs.
Revision 1194 by iritscen, Mon Nov 18 04:00:08 2024 UTC