--- Validate External Links/validate_external_links.sh	2021/05/09 21:53:48	1157
+++ Validate External Links/validate_external_links.sh	2021/06/13 20:50:43	1158
@@ -38,7 +38,7 @@ SHOW_HTTPS=0           # record issue wh
 SHOW_YT_RD=0           # record redirection for a youtu.be URL expanding to the full URL
 SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page
 SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page
-CHECK_ARCHIVE_LINKS=0  # check URLs under the archive.org domain
+CHECK_ARCHIVE_LINKS=0  # check URLs on archive.org and archive.is
 TAKE_PAGE_SHOT=0       # take a screenshot of each OK page
 TIMEOUT=10             # time to wait for a response when querying a site
 CHROME_PATH=""         # path to a copy of Google Chrome that has the command-line screenshot feature
@@ -110,7 +110,7 @@ SKIP_EXPECT_IW=0
 SKIP_HTTPS_UP=0
 SKIP_SLASH_ADD=0
 SKIP_YOUTU_BE=0
-SKIP_ARCHIVE_ORG=0
+SKIP_ARCHIVES=0
 FILE_LINKS=0
 PAGE_LINKS=0
 SKIPPED_HEADER_ROW=0
@@ -185,8 +185,9 @@ OPTIONS
                                does nothing unless you also use the
                                --record-ok-links argument.
        --check-archive-links   Check links that are already pointing to a page
-                               on the Internet Archive. In theory these links
-                               should be totally stable and not need validation.
+                               on the Internet Archive or archive.is (AKA 
+                               archive.today). In theory these links should be
+                               totally stable and not need validation.
        --take-screenshots FILE Call the Google Chrome binary at this path to
                                take screenshots of each "OK" page.
        --timeout NUM           Wait this many seconds for a site to respond. The
@@ -531,7 +532,7 @@ function wrapupAndExit()
    # Print processed link totals
    if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
    if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
-   if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi
+   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
    if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
    if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "&nbsp;&nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
    if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
@@ -676,7 +677,7 @@ if [ $SHOW_HTTPS -eq 1 ]; then valPrint
 valPrint ctrhn "Ignore youtu.be redirects: "
 if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
 
-valPrint ctrhn "Check archive.org links: "
+valPrint ctrhn "Check archive.org and archive.is links: "
 if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
 
 valPrint tr "A summary of my findings will be found at the bottom of the report."
@@ -809,10 +810,10 @@ for LINE in `cat "$LINKS_FILE"`; do
       continue
    fi
 
-   # If we're skipping Archive.org links, see if this is one
-   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
-      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."
-      let SKIP_ARCHIVE_ORG+=1
+   # If we're skipping archive links, see if this is one
+   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links."
+      let SKIP_ARCHIVES+=1
       let PAGE_LINKS+=1
       continue
    fi
@@ -916,7 +917,7 @@ for LINE in `cat "$LINKS_FILE"`; do
 
    # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
    # issue with sites that require HTTPS
-   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
+   CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
    CURL_ERR=$(echo $?)
    CURL_RESULT=$CURL_CODE