--- ValBot/Python/check_intrawiki_section_links.py	2024/01/23 03:53:05	1188
+++ ValBot/Python/check_intrawiki_section_links.py	2024/11/18 04:00:08	1194
@@ -35,7 +35,7 @@ chapter_names = ['CHAPTER_00_._COMBAT_TR
 # Tuple of patterns for recognizing wikilinks
 # Pattern 1: Detect "[[anything]]", "[[any:thing]]", "[[any|thing]]", "[[any:thi|ng]]"
 # Pattern 2: Detect "{{SectionLink|Page|Section name}}", "{{SectionLink||Section name}}"
-link_patterns = ("\[\[[^|\]]*(\||\])", "\{\{SectionLink\|[^|\}]*\|[^|\}]*\}\}")
+link_patterns = (r"\[\[[^|\]]*(\||\])", r"\{\{SectionLink\|[^|\}]*\|[^|\}]*\}\}")
 
 # Initialize globals
 debug = 0
@@ -58,21 +58,46 @@ def possibly_print(page_name):
 # Search a page for the section specified in the link
 def find_section(page_text, page_name, page_slug, print_result):
    global errors_issued
-
-   # Isolate section link
+   found_section = False
+   
+   # Isolate section link or text fragment link
    target_page_name, anchor_name = page_slug.split('#', 1)
    target_page_name_human = target_page_name.replace('_', ' ')
-   if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
-
-   # Read linked page to see if it really has this anchor link
-   soup = BeautifulSoup(page_text, 'html.parser')
-   found_section = False
-   for span_tag in soup.findAll('span'):
-      span_name = span_tag.get('id', None)
-      if span_name == anchor_name:
-         if debug and not print_result: pywikibot.stdout('         Found section in a span!')
+   
+   # First check if this is a text fragment directive, and look for it if so
+   if anchor_name.startswith(':~:text='):
+      if debug: pywikibot.stdout('         Found text fragment directive {} from URL {}.'.format(anchor_name, page_slug))
+      anchor_name = anchor_name[8:]
+      # We're only checking the first text directive, so strip add'l ones if present
+      addl_fragment = anchor_name.find('&text=')
+      if addl_fragment != -1:
+         anchor_name = anchor_name[:addl_fragment]
+      search_terms = anchor_name.split(',')
+      # Delete prefix and suffix terms because they aren't needed
+      if search_terms[0].endswith('-'):
+         search_terms.pop(0)
+      if search_terms[-1].startswith('-'):
+         search_terms.pop()
+      # Remake text directive with the terms separated by spaces as they should be in the page text
+      newSep = ' ' 
+      search_string = newSep.join(search_terms)
+      if debug: pywikibot.stdout('         Converted text fragment to string "{}".'.format(search_string))
+      if search_string in page_text:
          found_section = True
-         break
+         if debug and not print_result: pywikibot.stdout('         Found text fragment!')
+   
+   # If we're still here, it's a section link; read linked page to see if it really has this
+   # anchor link
+   if found_section == False:
+      if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
+      soup = BeautifulSoup(page_text, 'html.parser')
+      # Search for a span with this ID
+      for span_tag in soup.findAll('span'):
+         span_name = span_tag.get('id', None)
+         if span_name == anchor_name:
+            if debug and not print_result: pywikibot.stdout('         Found section in a span!')
+            found_section = True
+            break
    if found_section == False:
       # Search for a div with this ID
       for span_tag in soup.findAll('div'):
@@ -127,7 +152,6 @@ def test_intrawiki_link(iw_url, page_nam
    # automatically follows redirects. This will catch formal redirects which come from pages
    # such as Special:PermanentLink.
    if response.history != []:
-      
       permalink1 = 'Special:PermanentLink/'.lower()
       permalink2 = 'Special:Permalink/'.lower()
       page_slug_lower = page_slug.lower()