ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1183 by iritscen, Tue May 16 01:10:09 2023 UTC vs.
Revision 1184 by iritscen, Sun May 21 22:22:55 2023 UTC

# Line 33 | Line 33 | LINKS_URL=""           # download extern
33   EXCEPT_URL=""          # location of wiki page with a list of exceptions for NG results
34   OUTPUT_DIR=""           # place reports and all other output in a folder inside this existing folder
35   RECORD_OK_LINKS=0      # record response code to the log even when it's a value in OK_CODES
36 + ONLY_200_OK=0          # only treat code 200 as "OK" and not any other code in OK_CODES
37   SHOW_SLASH=0           # record issue when a slash is added to the end of a URL
38   SHOW_HTTPS=0           # record issue when "http" is upgraded to "https"
39   SHOW_YT_RD=0           # record redirection for a youtu.be URL expanding to the full URL
# Line 67 | Line 68 | declare -a NS_IDS=(-2 -1 0 1 2 3 4 5 6 7
68   declare -a NS_NAMES=("Media" "Special" "Main" "Talk" "User" "User_talk" "OniGalore" "OniGalore_talk" "File" "File_talk" "MediaWiki" "MediaWiki_talk" "Template" "Template_talk" "Help" "Help_talk" "Category" "Category_talk" "BSL" "BSL_talk" "OBD" "OBD_talk" "AE" "AE_talk" "Oni2" "Oni2_talk" "XML" "XML_talk")
69  
70   # These arrays tell the script which suffixes at the ends of URLs represent files and which are pages.
71 < # This determines whether the script tries to take a screenshot of the URL or just gets its HTTP code.
71 > # This determines whether the script tries to take a screenshot of the URL (when screenshots are
72 > # requested).
73   declare -a HTTP_FILES=(3ds 7z avi BINA blend bsl cpp csv dae dll dmg doc east exe fbx first63 flv gamedata gif jpg last32 log m4a mhm mov mp3 mp4 oni ONWC pdf png psd py rar tga TRMA txt vbs wav wmv xaf xcf xlsx xml zip)
74   declare -a HTTP_TLDS_AND_PAGES=(abstract action ars asp aspx cfm cgi com css de do full htm html it js jsp net org pgi php php3 phtml pl ru shtml stm uk x)
75  
# Line 134 | Line 136 | NAME
136   SYNOPSIS
137         validate_external_links.sh --help
138         validate_external_links.sh --links URL --output DIR [--exceptions URL]
139 <          [--record-ok-links] [--show-added-slashes] [--show-https-upgrades]
140 <          [--show-yt-redirects] [--suggest-snapshots] [--check-archive-links]
141 <          [--take-screenshots FILE] [--timeout NUM] [--start-url NUM]
142 <          [--end-url NUM] [--upload FILE]
139 >          [--record-ok-links] [--only-200-ok] [--show-added-slashes]
140 >          [--show-https-upgrades] [--show-yt-redirects] [--suggest-snapshots]
141 >          [--check-archive-links] [--take-screenshots FILE] [--timeout NUM]
142 >          [--start-url NUM] [--end-url NUM] [--upload FILE]
143  
144   DESCRIPTION
145         This script parses a list of external links found in the OniGalore wiki
# Line 172 | Line 174 | OPTIONS
174                                 beginning with "file://".
175         --record-ok-links       Log a link in the report even if its response
176                                 code is "OK".
177 +       --only-200-ok           Only treat response code 200 as "OK". Normally
178 +                               several additional codes are treated as "OK" (see
179 +                               the array OK_CODES in script) because they are
180 +                               typically not an indicator of a bad link.
181         --show-added-slashes    Report on redirects that simply add a '/' to the
182                                 end of the URL.
183         --show-https-upgrades   Report on redirects that simply upgrade a
# Line 225 | Line 231 | while (( "$#" )); do
231        --exceptions )           EXCEPT_URL="$2";                    shift 2;;
232        --output )               OUTPUT_DIR="$2";                    shift 2;;
233        --record-ok-links )      RECORD_OK_LINKS=1;                  shift;;
234 +      --only-200-ok )          ONLY_200_OK=1;                      shift;;
235        --show-added-slashes )   SHOW_SLASH=1;                       shift;;
236        --show-https-upgrades )  SHOW_HTTPS=1;                       shift;;
237        --show-yt-redirects )    SHOW_YT_RD=1;                       shift;;
# Line 704 | Line 711 | valPrint ctrh "Site query timeout: $TIME
711   valPrint ctrhn "Show OK links: "
712   if [ $RECORD_OK_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
713  
714 + valPrint ctrhn "Treat these response codes as OK: "
715 + if [ $ONLY_200_OK -eq 1 ]; then valPrint ctrh "200"; else valPrint ctrh "${OK_CODES[*]}"; fi
716 +
717   valPrint ctrhn "Take screenshots: "
718   if [ $TAKE_PAGE_SHOT -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
719  
# Line 1006 | Line 1016 | for LINE in `cat "$LINKS_FILE"`; do
1016     # If we didn't match an interwiki domain, see if the status code is in our "OK" codes list
1017     if [ $STATUS == "??" ]; then
1018        for CODE in "${OK_CODES[@]}"; do
1019 +         if [ $ONLY_200_OK -eq 1 ] && [ $CODE -ne 200 ]; then
1020 +            continue
1021 +         fi
1022 +      
1023           if [[ $CODE == $CURL_CODE ]]; then
1024              STATUS="OK"
1025              let OK_LINKS+=1
# Line 1142 | Line 1156 | for LINE in `cat "$LINKS_FILE"`; do
1156              break
1157           fi
1158        done
1159 +      # Also check it against the "OK" codes besides 200 if the --only-200-ok argument was received
1160 +      if [ $ONLY_200_OK -eq 1 ]; then
1161 +         for CODE in "${OK_CODES[@]}"; do
1162 +            if [ $CODE -eq 200 ]; then
1163 +               continue
1164 +            fi
1165 +            if [[ $CODE == $CURL_CODE ]]; then
1166 +               STATUS="NG"
1167 +               let NG_LINKS+=1
1168 +               break
1169 +            fi
1170 +         done
1171 +      fi
1172     fi
1173  
1174     # If we didn't match a known status code, advise the reader

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)