| 6 |  | # - TXT (for easy diffing with an earlier log) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 7 |  | # - RTF (for reading as a local file with clickable links) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 8 |  | # - HTML (for uploading as a web page). | 
 
 
 
 
 
 
 
 
 
 
 | 9 | < | # Call script with "--help" argument for documentation. Also see Read Me First.rtf for critical notes. | 
 
 
 
 
 
 
 
 
 | 9 | > | # Call script with "--help" argument for documentation. Also see Read Me.rtf for critical notes. | 
 
 
 
 
 
 
 
 
 
 
 | 10 |  | # | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 11 |  | # Recommended rule: | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 12 |  | # |----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----| | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 41 |  | TAKE_PAGE_SHOT=0     # take a screenshot of each OK page | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 42 |  | TIMEOUT=10           # time to wait for a response when querying a site | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 43 |  | CHROME_PATH=""       # path to a copy of Google Chrome that has the command-line screenshot feature | 
 
 
 
 
 
 
 
 
 
 
 | 44 | < | URL_START=1          # start at this URL in LINKS_FILE (1 by default) | 
 
 
 
 
 
 
 
 
 | 44 | > | URL_START=1          # start at this URL in LINKS_FILE | 
 
 
 
 
 
 
 
 
 
 
 | 45 |  | URL_LIMIT=0          # if non-zero, stop at this URL in LINKS_FILE | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 46 |  | UPLOAD_INFO=""       # path to a file on your hard drive with the login info needed to upload a report | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 47 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 48 |  | # Fixed strings -- see the occurrences of these variables to learn their purpose | 
 
 
 
 
 
 
 
 
 
 
 | 49 | < | AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 OPR/69.0.3686.77" | 
 
 
 
 
 
 
 
 
 | 49 | > | AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36 OPR/70.0.3728.154" | 
 
 
 
 
 
 
 
 
 
 
 | 50 |  | ARCHIVE_API="http://archive.org/wayback/available" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 51 |  | ARCHIVE_GENERIC="https://web.archive.org/web/*" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 52 |  | ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 103 |  | SKIP_UNK_SUFFIX=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 104 |  | SKIP_UNK_CODE=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 105 |  | SKIP_EXPECT_NG=0 | 
 
 
 
 
 
 
 
 | 106 | + | SKIP_EXPECT_RD=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 107 |  | SKIP_EXPECT_EI=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 108 |  | SKIP_EXPECT_IW=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 109 |  | SKIP_HTTPS_UP=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 181 |  | --take-screenshots FILE Call the Google Chrome binary at this path to | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 182 |  | take screenshots of each "OK" page. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 183 |  | --timeout NUM           Wait this many seconds for a site to respond. The | 
 
 
 
 
 
 
 
 
 
 
 | 184 | < | default is 10. | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 184 | > | default is 10. Important note: Val will attempt | 
 
 
 
 
 | 185 | > | to reach each URL three times, so the time taken | 
 
 
 
 
 | 186 | > | to ping an unresponsive site will be three times | 
 
 
 
 
 | 187 | > | this setting. | 
 
 
 
 
 
 
 
 
 
 
 | 188 |  | --start-url NUM         Start at this link in the links CSV file. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 189 |  | --end-url NUM           Stop at this link in the links CSV file. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 190 |  | --upload FILE           Upload report using the credentials and path | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 483 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 484 |  | # Do some math on results of session | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 485 |  | LINKS_PROCESSED=$((LINK_NUM-URL_START+1)) | 
 
 
 
 
 
 
 
 
 | 482 | – | LINK_PROBLEMS=$((EI_LINKS+IW_LINKS+RD_LINKS+NG_LINKS)) | 
 
 
 
 
 
 
 
 
 | 483 | – | LINK_ERRORS=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) | 
 
 
 
 
 
 
 
 
 | 484 | – | LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_EI+SKIP_EXPECT_IW)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 486 |  | TRIVIAL_RDS=$((SKIP_SLASH_ADD+SKIP_HTTPS_UP+SKIP_YOUTU_BE)) | 
 
 
 
 
 
 
 
 
 
 
 | 487 | < | LINKS_CHECKED=$((LINKS_PROCESSED-LINK_ERRORS)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 487 | > | LINK_ERRORS=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) | 
 
 
 
 
 | 488 | > | LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_RD+SKIP_EXPECT_EI+SKIP_EXPECT_IW)) | 
 
 
 
 
 | 489 | > | LINK_PROBLEMS_TOTAL=$((NG_LINKS+RD_LINKS+EI_LINKS+IW_LINKS)) | 
 
 
 
 
 | 490 | > | LINK_PROBLEMS_NG=$((NG_LINKS-SKIP_EXPECT_NG)) | 
 
 
 
 
 | 491 | > | LINK_PROBLEMS_RD=$((RD_LINKS-SKIP_EXPECT_RD)) | 
 
 
 
 
 | 492 | > | LINK_PROBLEMS_EI=$((EI_LINKS-SKIP_EXPECT_EI)) | 
 
 
 
 
 | 493 | > | LINK_PROBLEMS_IW=$((IW_LINKS-SKIP_EXPECT_IW)) | 
 
 
 
 
 | 494 | > | LINK_PROBLEMS_NET=$((LINK_PROBLEMS_NG+LINK_PROBLEMS_RD+LINK_PROBLEMS_EI+LINK_PROBLEMS_IW)) | 
 
 
 
 
 
 
 
 
 
 
 | 495 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 496 |  | ## SUMMARY OUTPUT ## | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 497 |  | valPrint ct "Summary ($ELAPSED):" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 503 |  | if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 504 |  | if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 505 |  | if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi | 
 
 
 
 
 
 
 
 
 
 
 | 506 | < | if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 507 | < | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi | 
 
 
 
 
 
 
 
 
 | 506 | > | if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi | 
 
 
 
 
 | 507 | > | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi | 
 
 
 
 
 
 
 
 
 
 
 | 508 |  | if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 509 |  | if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 510 |  |  | 
 
 
 
 
 
 
 
 
 | 503 | – | # Print excepted link totals | 
 
 
 
 
 
 
 
 
 | 504 | – | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi | 
 
 
 
 
 
 
 
 
 | 505 | – | if [ $SKIP_EXPECT_NG -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_NG/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi | 
 
 
 
 
 
 
 
 
 | 506 | – | if [ $SKIP_EXPECT_EI -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_EI/$EI_LINKS external internal $(pluralCheckNoun link $EI_LINKS)"; fi | 
 
 
 
 
 
 
 
 
 | 507 | – | if [ $SKIP_EXPECT_IW -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_IW/$IW_LINKS potential intrawiki $(pluralCheckNoun link $IW_LINKS)"; fi | 
 
 
 
 
 
 
 
 
 | 508 | – |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 511 |  | # Print errored link totals | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 512 |  | if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS) (see RTF or TXT report for specific links):"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 513 |  | if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS missing/unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 517 |  | if [ $SKIP_UNK_SUFFIX -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_SUFFIX unknown URL $(pluralCheckNoun suffix $SKIP_UNK_SUFFIX)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 518 |  | if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 519 |  |  | 
 
 
 
 
 
 
 
 | 520 | + | # Print excepted link totals | 
 
 
 
 
 
 
 
 | 521 | + | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED link $(pluralCheckNoun problem $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi | 
 
 
 
 
 
 
 
 | 522 | + | if [ $SKIP_EXPECT_NG -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_NG/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi | 
 
 
 
 
 
 
 
 | 523 | + | if [ $SKIP_EXPECT_RD -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_RD/$RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi | 
 
 
 
 
 
 
 
 | 524 | + | if [ $SKIP_EXPECT_EI -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_EI/$EI_LINKS external internal $(pluralCheckNoun link $EI_LINKS)"; fi | 
 
 
 
 
 
 
 
 | 525 | + | if [ $SKIP_EXPECT_IW -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_IW/$IW_LINKS potential intrawiki $(pluralCheckNoun link $IW_LINKS)"; fi | 
 
 
 
 
 
 
 
 | 526 | + |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 527 |  | # Print checked link totals | 
 
 
 
 
 
 
 
 
 
 
 | 528 | < | if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 529 | < | if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 530 | < | if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 531 | < | if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 532 | < | if [ $IW_LINKS -gt 0 ]; then valPrint ctrh "- $IW_LINKS $(pluralCheckNoun link $IW_LINKS) that could be interwiki"; fi | 
 
 
 
 
 
 
 
 
 | 528 | > | if [ $LINK_PROBLEMS_NET -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS_NET link $(pluralCheckNoun issue $LINK_PROBLEMS_NET):"; fi | 
 
 
 
 
 | 529 | > | if [ $LINK_PROBLEMS_NG -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_NG NG $(pluralCheckNoun link $LINK_PROBLEMS_NG)"; fi | 
 
 
 
 
 | 530 | > | if [ $LINK_PROBLEMS_RD -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_RD $(pluralCheckNoun redirection $LINK_PROBLEMS_RD)"; fi | 
 
 
 
 
 | 531 | > | if [ $LINK_PROBLEMS_EI -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_EI $(pluralCheckNoun link $LINK_PROBLEMS_EI) that could be intrawiki"; fi | 
 
 
 
 
 | 532 | > | if [ $LINK_PROBLEMS_IW -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_IW $(pluralCheckNoun link $LINK_PROBLEMS_IW) that could be interwiki"; fi | 
 
 
 
 
 
 
 
 
 
 
 | 533 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 534 |  | # Close the log files' markup | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 535 |  | valPrint trh "ValExtLinks says goodbye." | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 624 |  | valPrint ctrhn "Take screenshots: " | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 625 |  | if [ $TAKE_PAGE_SHOT -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 626 |  |  | 
 
 
 
 
 
 
 
 
 
 
 | 627 | < | valPrint ctrhn "Suggest Archive.org snapshots: " | 
 
 
 
 
 
 
 
 
 | 627 | > | valPrint ctrhn "Suggest archive.org snapshots: " | 
 
 
 
 
 
 
 
 
 
 
 | 628 |  | if [ $SUGGEST_SNAPSHOTS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 629 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 630 |  | valPrint ctrhn "Ignore slash-adding redirects: " | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 870 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 871 |  | # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 872 |  | # issue with sites that require HTTPS | 
 
 
 
 
 
 
 
 
 
 
 | 873 | < | CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --write-out '%{http_code}\n' $URL) | 
 
 
 
 
 
 
 
 
 | 873 | > | CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) | 
 
 
 
 
 
 
 
 
 
 
 | 874 |  | CURL_ERR=$(echo $?) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 875 |  | CURL_RESULT=$CURL_CODE | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 876 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1006 |  | { | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1007 |  | EXCEPT_LINE="${EXCEPT_ARRAY[$i]}" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1008 |  |  | 
 
 
 
 
 
 
 
 | 1009 | + | # Undo any HTML-encoding from the wiki page; for now we just worry about the ampersand, as most | 
 
 
 
 
 
 
 
 | 1010 | + | # other HTML-encoded characters are not found in URLs | 
 
 
 
 
 
 
 
 | 1011 | + | EXCEPT_LINE=$(echo "$EXCEPT_LINE" | sed 's/\&/\&/') | 
 
 
 
 
 
 
 
 | 1012 | + |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1013 |  | # Match URL | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1014 |  | EXCEPT_URL="${EXCEPT_LINE#*,}" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1015 |  | EXCEPT_URL="${EXCEPT_URL%,*}" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1029 |  | let SKIP_EXPECT_EI+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1030 |  | elif [ $STATUS == "IW" ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1031 |  | let SKIP_EXPECT_IW+=1 | 
 
 
 
 
 
 
 
 | 1032 | + | elif [ $STATUS == "RD" ]; then | 
 
 
 
 
 
 
 
 | 1033 | + | let SKIP_EXPECT_RD+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1034 |  | else | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1035 |  | let SKIP_EXPECT_NG+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1036 |  | fi |