ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1157 by iritscen, Sun May 9 21:53:48 2021 UTC vs.
Revision 1158 by iritscen, Sun Jun 13 20:50:43 2021 UTC

# Line 38 | Line 38 | SHOW_HTTPS=0           # record issue wh
38   SHOW_YT_RD=0           # record redirection for a youtu.be URL expanding to the full URL
39   SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page
40   SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page
41 < CHECK_ARCHIVE_LINKS=0  # check URLs under the archive.org domain
41 > CHECK_ARCHIVE_LINKS=0  # check URLs on archive.org and archive.is
42   TAKE_PAGE_SHOT=0       # take a screenshot of each OK page
43   TIMEOUT=10             # time to wait for a response when querying a site
44   CHROME_PATH=""         # path to a copy of Google Chrome that has the command-line screenshot feature
# Line 110 | Line 110 | SKIP_EXPECT_IW=0
110   SKIP_HTTPS_UP=0
111   SKIP_SLASH_ADD=0
112   SKIP_YOUTU_BE=0
113 < SKIP_ARCHIVE_ORG=0
113 > SKIP_ARCHIVES=0
114   FILE_LINKS=0
115   PAGE_LINKS=0
116   SKIPPED_HEADER_ROW=0
# Line 185 | Line 185 | OPTIONS
185                                 does nothing unless you also use the
186                                 --record-ok-links argument.
187         --check-archive-links   Check links that are already pointing to a page
188 <                               on the Internet Archive. In theory these links
189 <                               should be totally stable and not need validation.
188 >                               on the Internet Archive or archive.is (AKA
189 >                               archive.today). In theory these links should be
190 >                               totally stable and not need validation.
191         --take-screenshots FILE Call the Google Chrome binary at this path to
192                                 take screenshots of each "OK" page.
193         --timeout NUM           Wait this many seconds for a site to respond. The
# Line 531 | Line 532 | function wrapupAndExit()
532     # Print processed link totals
533     if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
534     if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
535 <   if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi
535 >   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
536     if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
537     if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "&nbsp;&nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
538     if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
# Line 676 | Line 677 | if [ $SHOW_HTTPS -eq 1 ]; then valPrint
677   valPrint ctrhn "Ignore youtu.be redirects: "
678   if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
679  
680 < valPrint ctrhn "Check archive.org links: "
680 > valPrint ctrhn "Check archive.org and archive.is links: "
681   if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
682  
683   valPrint tr "A summary of my findings will be found at the bottom of the report."
# Line 809 | Line 810 | for LINE in `cat "$LINKS_FILE"`; do
810        continue
811     fi
812  
813 <   # If we're skipping Archive.org links, see if this is one
814 <   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
815 <      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."
816 <      let SKIP_ARCHIVE_ORG+=1
813 >   # If we're skipping archive links, see if this is one
814 >   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then
815 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links."
816 >      let SKIP_ARCHIVES+=1
817        let PAGE_LINKS+=1
818        continue
819     fi
# Line 916 | Line 917 | for LINE in `cat "$LINKS_FILE"`; do
917  
918     # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
919     # issue with sites that require HTTPS
920 <   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
920 >   CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
921     CURL_ERR=$(echo $?)
922     CURL_RESULT=$CURL_CODE
923  

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)