38 |
|
SHOW_YT_RD=0 # record redirection for a youtu.be URL expanding to the full URL |
39 |
|
SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page |
40 |
|
SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page |
41 |
< |
CHECK_ARCHIVE_LINKS=0 # check URLs under the archive.org domain |
41 |
> |
CHECK_ARCHIVE_LINKS=0 # check URLs on archive.org and archive.is |
42 |
|
TAKE_PAGE_SHOT=0 # take a screenshot of each OK page |
43 |
|
TIMEOUT=10 # time to wait for a response when querying a site |
44 |
|
CHROME_PATH="" # path to a copy of Google Chrome that has the command-line screenshot feature |
110 |
|
SKIP_HTTPS_UP=0 |
111 |
|
SKIP_SLASH_ADD=0 |
112 |
|
SKIP_YOUTU_BE=0 |
113 |
< |
SKIP_ARCHIVE_ORG=0 |
113 |
> |
SKIP_ARCHIVES=0 |
114 |
|
FILE_LINKS=0 |
115 |
|
PAGE_LINKS=0 |
116 |
|
SKIPPED_HEADER_ROW=0 |
185 |
|
does nothing unless you also use the |
186 |
|
--record-ok-links argument. |
187 |
|
--check-archive-links Check links that are already pointing to a page |
188 |
< |
on the Internet Archive. In theory these links |
189 |
< |
should be totally stable and not need validation. |
188 |
> |
on the Internet Archive or archive.is (AKA |
189 |
> |
archive.today). In theory these links should be |
190 |
> |
totally stable and not need validation. |
191 |
|
--take-screenshots FILE Call the Google Chrome binary at this path to |
192 |
|
take screenshots of each "OK" page. |
193 |
|
--timeout NUM Wait this many seconds for a site to respond. The |
532 |
|
# Print processed link totals |
533 |
|
if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi |
534 |
|
if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi |
535 |
< |
if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi |
535 |
> |
if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi |
536 |
|
if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi |
537 |
|
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi |
538 |
|
if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi |
677 |
|
valPrint ctrhn "Ignore youtu.be redirects: " |
678 |
|
if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi |
679 |
|
|
680 |
< |
valPrint ctrhn "Check archive.org links: " |
680 |
> |
valPrint ctrhn "Check archive.org and archive.is links: " |
681 |
|
if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi |
682 |
|
|
683 |
|
valPrint tr "A summary of my findings will be found at the bottom of the report." |
810 |
|
continue |
811 |
|
fi |
812 |
|
|
813 |
< |
# If we're skipping Archive.org links, see if this is one |
814 |
< |
if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then |
815 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links." |
816 |
< |
let SKIP_ARCHIVE_ORG+=1 |
813 |
> |
# If we're skipping archive links, see if this is one |
814 |
> |
if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then |
815 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links." |
816 |
> |
let SKIP_ARCHIVES+=1 |
817 |
|
let PAGE_LINKS+=1 |
818 |
|
continue |
819 |
|
fi |
917 |
|
|
918 |
|
# Get response code using 'curl' to see if this link is valid; the --insecure option avoids an |
919 |
|
# issue with sites that require HTTPS |
920 |
< |
CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) |
920 |
> |
CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) |
921 |
|
CURL_ERR=$(echo $?) |
922 |
|
CURL_RESULT=$CURL_CODE |
923 |
|
|