| 38 |
|
SHOW_YT_RD=0 # record redirection for a youtu.be URL expanding to the full URL |
| 39 |
|
SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page |
| 40 |
|
SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page |
| 41 |
< |
CHECK_ARCHIVE_LINKS=0 # check URLs under the archive.org domain |
| 41 |
> |
CHECK_ARCHIVE_LINKS=0 # check URLs on archive.org and archive.is |
| 42 |
|
TAKE_PAGE_SHOT=0 # take a screenshot of each OK page |
| 43 |
|
TIMEOUT=10 # time to wait for a response when querying a site |
| 44 |
|
CHROME_PATH="" # path to a copy of Google Chrome that has the command-line screenshot feature |
| 110 |
|
SKIP_HTTPS_UP=0 |
| 111 |
|
SKIP_SLASH_ADD=0 |
| 112 |
|
SKIP_YOUTU_BE=0 |
| 113 |
< |
SKIP_ARCHIVE_ORG=0 |
| 113 |
> |
SKIP_ARCHIVES=0 |
| 114 |
|
FILE_LINKS=0 |
| 115 |
|
PAGE_LINKS=0 |
| 116 |
|
SKIPPED_HEADER_ROW=0 |
| 185 |
|
does nothing unless you also use the |
| 186 |
|
--record-ok-links argument. |
| 187 |
|
--check-archive-links Check links that are already pointing to a page |
| 188 |
< |
on the Internet Archive. In theory these links |
| 189 |
< |
should be totally stable and not need validation. |
| 188 |
> |
on the Internet Archive or archive.is (AKA |
| 189 |
> |
archive.today). In theory these links should be |
| 190 |
> |
totally stable and not need validation. |
| 191 |
|
--take-screenshots FILE Call the Google Chrome binary at this path to |
| 192 |
|
take screenshots of each "OK" page. |
| 193 |
|
--timeout NUM Wait this many seconds for a site to respond. The |
| 532 |
|
# Print processed link totals |
| 533 |
|
if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi |
| 534 |
|
if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi |
| 535 |
< |
if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi |
| 535 |
> |
if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi |
| 536 |
|
if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi |
| 537 |
|
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi |
| 538 |
|
if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi |
| 677 |
|
valPrint ctrhn "Ignore youtu.be redirects: " |
| 678 |
|
if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi |
| 679 |
|
|
| 680 |
< |
valPrint ctrhn "Check archive.org links: " |
| 680 |
> |
valPrint ctrhn "Check archive.org and archive.is links: " |
| 681 |
|
if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi |
| 682 |
|
|
| 683 |
|
valPrint tr "A summary of my findings will be found at the bottom of the report." |
| 810 |
|
continue |
| 811 |
|
fi |
| 812 |
|
|
| 813 |
< |
# If we're skipping Archive.org links, see if this is one |
| 814 |
< |
if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then |
| 815 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links." |
| 816 |
< |
let SKIP_ARCHIVE_ORG+=1 |
| 813 |
> |
# If we're skipping archive links, see if this is one |
| 814 |
> |
if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then |
| 815 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links." |
| 816 |
> |
let SKIP_ARCHIVES+=1 |
| 817 |
|
let PAGE_LINKS+=1 |
| 818 |
|
continue |
| 819 |
|
fi |
| 917 |
|
|
| 918 |
|
# Get response code using 'curl' to see if this link is valid; the --insecure option avoids an |
| 919 |
|
# issue with sites that require HTTPS |
| 920 |
< |
CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) |
| 920 |
> |
CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) |
| 921 |
|
CURL_ERR=$(echo $?) |
| 922 |
|
CURL_RESULT=$CURL_CODE |
| 923 |
|
|