| 6 |  | # (for reading as a local file with clickable links), and HTML (for uploading as a web page). | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 7 |  | # Call script with "--help" argument for documentation. Also see Read Me First.rtf for critical notes. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 8 |  | # Recommended rule: | 
 
 
 
 
 
 
 
 
 
 
 | 9 | < | # ------------------------------------------------------------------------------------------------------ | 
 
 
 
 
 
 
 
 
 | 9 | > | # |----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----| | 
 
 
 
 
 
 
 
 
 
 
 | 10 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 11 |  | # Set separator token to newline | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 12 |  | IFS=" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 26 |  | UPLOAD_INFO=""      # path to a file on your hard drive with the login info needed to upload a report | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 27 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 28 |  | # Fixed strings -- see the occurrences of these variables to learn their purpose | 
 
 
 
 
 
 
 
 
 
 
 | 29 | < | AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0" | 
 
 
 
 
 
 
 
 
 | 29 | > | AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36 OPR/67.0.3575.53" | 
 
 
 
 
 
 
 
 
 
 
 | 30 |  | ARCHIVE_API="http://archive.org/wayback/available" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 31 |  | ARCHIVE_GENERIC="https://web.archive.org/web/*" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 32 |  | ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 87 |  | PAGE_LINKS=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 88 |  | SKIPPED_HEADER_ROW=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 89 |  | FINISHED_LIST="no" | 
 
 
 
 
 
 
 
 | 90 | + | START_RUN=0 | 
 
 
 
 
 
 
 
 | 91 | + | END_RUN=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 92 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 93 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 94 |  | ### HELP ### | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 422 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 423 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 424 |  |  | 
 
 
 
 
 
 
 
 | 425 | + | # Generate string with elapsed time | 
 
 
 
 
 
 
 
 | 426 | + | END_RUN=$(date +%s) | 
 
 
 
 
 
 
 
 | 427 | + | ELAPSED=$(echo $(($END_RUN - $START_RUN)) | awk '{printf "%d min. %d sec. elapsed", int($1/60), int($1%60)}') | 
 
 
 
 
 
 
 
 | 428 | + |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 429 |  | # Output results of session and close the log file's markup | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 430 |  | LINKS_PROCESSED=$((LINK_NUM-URL_START+1)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 431 |  | LINKS_SKIPPED=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 432 |  | LINKS_CHECKED=$((LINKS_PROCESSED-LINKS_SKIPPED)) | 
 
 
 
 
 
 
 
 
 
 
 | 433 | < | valPrint ct "Summary:" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 434 | < | valPrint r "\b1 Summary \b0" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 435 | < | valPrint hn "<h3><span id=\"summary\">Summary</span></h3>" | 
 
 
 
 
 
 
 
 
 | 433 | > | valPrint ct "Summary ($ELAPSED):" | 
 
 
 
 
 | 434 | > | valPrint r "\b1 Summary \b0 ($ELAPSED)" | 
 
 
 
 
 | 435 | > | valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>" | 
 
 
 
 
 
 
 
 
 
 
 | 436 |  | valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT)." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 437 |  | valPrint ctrh "I skipped $LINKS_SKIPPED $(pluralCheckNoun link $LINKS_SKIPPED), and found $FILE_LINKS $(pluralCheckNoun file $FILE_LINKS) and $PAGE_LINKS $(pluralCheckNoun page $PAGE_LINKS)." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 438 |  | if [ $LINKS_SKIPPED -gt 0 ]; then valPrint ctrh "Skip breakdown: "; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 546 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 547 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 548 |  | ### MAIN LOOP ### | 
 
 
 
 
 
 
 
 | 549 | + | START_RUN=$(date +%s) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 550 |  | # Process each line of the .csv in LINKS_FILE | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 551 |  | for LINE in `cat "$LINKS_FILE"`; do | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 552 |  | let LINK_NUM+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 588 |  | NS_NAME="" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 589 |  | a=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 590 |  | while [ "x${NS_IDS[$a]}" != "x" ]; do # once this evaluates to "x", the array is done | 
 
 
 
 
 
 
 
 
 
 
 | 591 | < | if [ $NS_ID -eq ${NS_IDS[$a]} ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 591 | > | if [ $NS_ID == "NULL" ]; then | 
 
 
 
 
 | 592 | > | break | 
 
 
 
 
 | 593 | > | elif [ $NS_ID -eq ${NS_IDS[$a]} ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 594 |  | NS_NAME="${NS_NAMES[$a]}" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 595 |  | break | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 596 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 597 |  | let a+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 598 |  | done | 
 
 
 
 
 
 
 
 
 
 
 | 599 | < | if [ -z "$NS_NAME" ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 600 | < | valPrint tr "Skipping URL found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 599 | > | if [ "$NS_NAME" == "" ]; then | 
 
 
 
 
 | 600 | > | if [ $NS_ID == "NULL" ]; then | 
 
 
 
 
 | 601 | > | valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." | 
 
 
 
 
 | 602 | > | else | 
 
 
 
 
 | 603 | > | valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." | 
 
 
 
 
 | 604 | > | fi | 
 
 
 
 
 
 
 
 
 
 
 | 605 |  | let SKIP_UNK_NS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 606 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 607 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 615 |  | # JavaScript code, so it will return erroneous links | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 616 |  | PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 617 |  | if [ $PAGE_NAME_SUFFIX == "js" ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 618 | < | valPrint tr "Skipping URL found on JavaScript page $PAGE_NAME." | 
 
 
 
 
 
 
 
 
 | 618 | > | valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." | 
 
 
 
 
 
 
 
 
 
 
 | 619 |  | let SKIP_JS_PAGE+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 620 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 621 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 899 |  | if [ $STATUS == "NG" ] && [ $SUGGEST_SNAPSHOTS -eq 1 ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 900 |  | ARCHIVE_QUERY=$(curl --silent --max-time 10 "$ARCHIVE_API?url=$URL&$ARCHIVE_OK_CODES") | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 901 |  |  | 
 
 
 
 
 
 
 
 
 
 
 | 902 | < | # Isolate "url" property in response and log it if a "closest" snapshot was received... | 
 
 
 
 
 
 
 
 
 | 902 | > | # If a "closest" snapshot was received... | 
 
 
 
 
 
 
 
 
 
 
 | 903 |  | if [[ "$ARCHIVE_QUERY" == *\"closest\":* ]]; then | 
 
 
 
 
 
 
 
 
 
 
 | 904 | < | SNAPSHOT_URL=${ARCHIVE_QUERY##*\"url\": \"} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 905 | < | SNAPSHOT_URL=${SNAPSHOT_URL%\", \"timestamp*} | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 904 | > | # In case the URL has a shebang in it (like mega.nz links do), escape the '!' to break it | 
 
 
 
 
 | 905 | > | ARCHIVE_QUERY=$(echo "$ARCHIVE_QUERY" | sed 's/#!/#\\!/') | 
 
 
 
 
 | 906 | > |  | 
 
 
 
 
 | 907 | > | # ...isolate "url" property in the response that follows the "closest" tag | 
 
 
 
 
 | 908 | > | SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":' | 
 
 
 
 
 | 909 | > | SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "' | 
 
 
 
 
 | 910 | > | SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"' | 
 
 
 
 
 | 911 | > |  | 
 
 
 
 
 | 912 | > | # Inform the user of the snapshot URL | 
 
 
 
 
 
 
 
 
 
 
 | 913 |  | valPrint t "  IA suggests $SNAPSHOT_URL" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 914 |  | valPrint r "                IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 915 |  | valPrint hn "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>" |