--- Validate External Links/validate_external_links.sh	2023/01/13 22:26:56	1177
+++ Validate External Links/validate_external_links.sh	2023/01/23 01:51:32	1178
@@ -47,7 +47,7 @@ URL_LIMIT=0            # if non-zero, st
 UPLOAD_INFO=""         # path to a file on your hard drive with the login info needed to upload a report
 
 # Fixed strings -- see the occurrences of these variables to learn their purpose
-AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36"
+AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
 ARCHIVE_API="http://archive.org/wayback/available"
 ARCHIVE_GENERIC="https://web.archive.org/web/*"
 ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206"
@@ -76,7 +76,7 @@ declare -a HTTP_TLDS_AND_PAGES=(abstract
 # if you add a new code.
 declare -a OK_CODES=(200 401 405 406 418 501)
 declare -a RD_CODES=(301 302 303 307 308)
-declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530)
+declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 520 530)
 
 # Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
 # transcluded text, and if the transclusion fails, then the braces show up in the URL
@@ -123,7 +123,7 @@ END_RUN=0
 
 ### HELP OUTPUT ###
 # A pseudo-man page. Here is the 80-character rule for the page text:
-# 234567890123456789012345678901234567890123456789012345678901234567890123456789
+# 345678901234567890123456789012345678901234567890123456789012345678901234567890
 function printHelp()
 {
   cat << EOF
@@ -534,7 +534,7 @@ function wrapupAndExit()
    # Print processed link totals
    if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
    if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
-   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
+   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) $(pluralCheckWas $SKIP_ARCHIVES) not checked"; fi
    if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
    if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "&nbsp;&nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
    if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
@@ -1136,8 +1136,14 @@ for LINE in `cat "$LINKS_FILE"`; do
          # Check for URL match
          EXCEPT_URL="${EXCEPT_LINE#*,}"
          EXCEPT_URL="${EXCEPT_URL%,*}"
-         if [ "$EXCEPT_URL" != "$URL" ]; then
-            continue
+         if [[ "$EXCEPT_URL" =~ \* ]]; then # if this exception URL contains the '*' wildcard, use pattern-matching with it
+            if [[ "$URL" =~ "$EXCEPT_URL" ]]; then
+               continue
+            fi
+         else
+            if [ "$EXCEPT_URL" != "$URL" ]; then # otherwise just use a straight string comparison
+               continue
+            fi
          fi
 
          # Check for page name match