ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/ValBot/Python/check_intrawiki_section_links.py
(Generate patch)

Comparing ValBot/Python/check_intrawiki_section_links.py (file contents):
Revision 1192 by iritscen, Mon Sep 16 23:08:26 2024 UTC vs.
Revision 1194 by iritscen, Mon Nov 18 04:00:08 2024 UTC

# Line 58 | Line 58 | def possibly_print(page_name):
58   # Search a page for the section specified in the link
59   def find_section(page_text, page_name, page_slug, print_result):
60     global errors_issued
61 <
62 <   # Isolate section link
61 >   found_section = False
62 >  
63 >   # Isolate section link or text fragment link
64     target_page_name, anchor_name = page_slug.split('#', 1)
65     target_page_name_human = target_page_name.replace('_', ' ')
66 <   if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
67 <
68 <   # Read linked page to see if it really has this anchor link
69 <   soup = BeautifulSoup(page_text, 'html.parser')
70 <   found_section = False
71 <   for span_tag in soup.findAll('span'):
72 <      span_name = span_tag.get('id', None)
73 <      if span_name == anchor_name:
74 <         if debug and not print_result: pywikibot.stdout('         Found section in a span!')
66 >  
67 >   # First check if this is a text fragment directive, and look for it if so
68 >   if anchor_name.startswith(':~:text='):
69 >      if debug: pywikibot.stdout('         Found text fragment directive {} from URL {}.'.format(anchor_name, page_slug))
70 >      anchor_name = anchor_name[8:]
71 >      # We're only checking the first text directive, so strip add'l ones if present
72 >      addl_fragment = anchor_name.find('&text=')
73 >      if addl_fragment != -1:
74 >         anchor_name = anchor_name[:addl_fragment]
75 >      search_terms = anchor_name.split(',')
76 >      # Delete prefix and suffix terms because they aren't needed
77 >      if search_terms[0].endswith('-'):
78 >         search_terms.pop(0)
79 >      if search_terms[-1].startswith('-'):
80 >         search_terms.pop()
81 >      # Remake text directive with the terms separated by spaces as they should be in the page text
82 >      newSep = ' '
83 >      search_string = newSep.join(search_terms)
84 >      if debug: pywikibot.stdout('         Converted text fragment to string "{}".'.format(search_string))
85 >      if search_string in page_text:
86           found_section = True
87 <         break
87 >         if debug and not print_result: pywikibot.stdout('         Found text fragment!')
88 >  
89 >   # If we're still here, it's a section link; read linked page to see if it really has this
90 >   # anchor link
91 >   if found_section == False:
92 >      if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
93 >      soup = BeautifulSoup(page_text, 'html.parser')
94 >      # Search for a span with this ID
95 >      for span_tag in soup.findAll('span'):
96 >         span_name = span_tag.get('id', None)
97 >         if span_name == anchor_name:
98 >            if debug and not print_result: pywikibot.stdout('         Found section in a span!')
99 >            found_section = True
100 >            break
101     if found_section == False:
102        # Search for a div with this ID
103        for span_tag in soup.findAll('div'):
# Line 127 | Line 152 | def test_intrawiki_link(iw_url, page_nam
152     # automatically follows redirects. This will catch formal redirects which come from pages
153     # such as Special:PermanentLink.
154     if response.history != []:
130      
155        permalink1 = 'Special:PermanentLink/'.lower()
156        permalink2 = 'Special:Permalink/'.lower()
157        page_slug_lower = page_slug.lower()

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)