| 18 |  | EXCEPT_URL=""       # ditto above for file with exceptions to NG results | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 19 |  | OUTPUT_DIR=""       # place reports and all other output in a folder inside this existing folder | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 20 |  | RECORD_OK_LINKS=0   # record response code to the log even when it's a value in OK_CODES | 
 
 
 
 
 
 
 
 | 21 | + | SHOW_SLASH=0        # record response code to the log when a slash is added to the end of a URL | 
 
 
 
 
 
 
 
 | 22 | + | SHOW_HTTPS=0        # record response code to the log when "http" is upgraded to "https" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 23 |  | SUGGEST_SNAPSHOTS=0 # query the Internet Archive for a possible snapshot URL for each NG page | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 24 |  | TAKE_PAGE_SHOT=0    # take a screenshot of each OK page | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 25 |  | CHROME_PATH=""      # path to a copy of Google Chrome that has the command-line screenshot feature | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 36 |  | CURL_CODES="http://iritscen.oni2.net/val/curl_codes.txt" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 37 |  | EXPECT_SCRIPT_NAME="val_expect_sftp.txt" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 38 |  | HTTP_CODES="http://iritscen.oni2.net/val/http_codes.txt" | 
 
 
 
 
 
 
 
 
 
 
 | 39 | < | MY_WIKI_PAGE="http://wiki.oni2.net/User:Iritscen" | 
 
 
 
 
 
 
 
 
 | 39 | > | MY_WIKI_PAGE="https://wiki.oni2.net/User:Iritscen" | 
 
 
 
 
 
 
 
 
 
 
 | 40 |  | THIS_DIR=$(cd $(dirname $0); pwd) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 41 |  | WORKING_DIR=$(pwd) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 42 |  | WIKI_PATH="wiki.oni2.net" | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 85 |  | SKIP_EXPECT_NG=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 86 |  | SKIP_EXPECT_EI=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 87 |  | SKIP_EXPECT_IW=0 | 
 
 
 
 
 
 
 
 | 88 | + | SKIP_HTTPS_UP=0 | 
 
 
 
 
 
 
 
 | 89 | + | SKIP_SLASH_ADD=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 90 |  | FILE_LINKS=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 91 |  | PAGE_LINKS=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 92 |  | SKIPPED_HEADER_ROW=0 | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 143 |  | you supply a file:// path. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 144 |  | --record-ok-links       Log a link in the report even if its response | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 145 |  | code is "OK". | 
 
 
 
 
 
 
 
 | 146 | + | --show-added-slashes    Report on redirects that simply add a '/' to the | 
 
 
 
 
 
 
 
 | 147 | + | end of the URL. | 
 
 
 
 
 
 
 
 | 148 | + | --show-https-upgrade    Report on redirects that simply upgrade a | 
 
 
 
 
 
 
 
 | 149 | + | "http://" URL to a "https://" URL. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 150 |  | --suggest-snapshots     Query the Internet Archive for a possible | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 151 |  | snapshot URL for each "NG" page. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 152 |  | --take-screenshots FILE Call the Google Chrome binary at this path to | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 175 |  | # Parse arguments as long as there are more arguments to process | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 176 |  | while (( "$#" )); do | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 177 |  | case "$1" in | 
 
 
 
 
 
 
 
 
 
 
 | 178 | < | --links )             LINKS_URL="$2";                     shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 179 | < | --exceptions )        EXCEPT_URL="$2";                    shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 180 | < | --output )            OUTPUT_DIR="$2";                    shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 181 | < | --record-ok-links )   RECORD_OK_LINKS=1;                  shift;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 182 | < | --suggest-snapshots ) SUGGEST_SNAPSHOTS=1;                shift;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 183 | < | --take-screenshots )  TAKE_PAGE_SHOT=1; CHROME_PATH="$2"; shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 184 | < | --start-url )         URL_START=$2;                       shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 185 | < | --end-url )           URL_LIMIT=$2;                       shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 186 | < | --upload )            UPLOAD_INFO=$2;                     shift 2;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 187 | < | * )                   echo "Invalid argument $1 detected. Aborting."; exit 1;; | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 178 | > | --links )              LINKS_URL="$2";                     shift 2;; | 
 
 
 
 
 | 179 | > | --exceptions )         EXCEPT_URL="$2";                    shift 2;; | 
 
 
 
 
 | 180 | > | --output )             OUTPUT_DIR="$2";                    shift 2;; | 
 
 
 
 
 | 181 | > | --record-ok-links )    RECORD_OK_LINKS=1;                  shift;; | 
 
 
 
 
 | 182 | > | --show-added-slashes ) SHOW_SLASH=1;                       shift;; | 
 
 
 
 
 | 183 | > | --show-https-upgrade ) SHOW_HTTPS=1;                       shift;; | 
 
 
 
 
 | 184 | > | --suggest-snapshots )  SUGGEST_SNAPSHOTS=1;                shift;; | 
 
 
 
 
 | 185 | > | --take-screenshots )   TAKE_PAGE_SHOT=1; CHROME_PATH="$2"; shift 2;; | 
 
 
 
 
 | 186 | > | --start-url )          URL_START=$2;                       shift 2;; | 
 
 
 
 
 | 187 | > | --end-url )            URL_LIMIT=$2;                       shift 2;; | 
 
 
 
 
 | 188 | > | --upload )             UPLOAD_INFO=$2;                     shift 2;; | 
 
 
 
 
 | 189 | > | * )                    echo "Invalid argument $1 detected. Aborting."; exit 1;; | 
 
 
 
 
 
 
 
 
 
 
 | 190 |  | esac | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 191 |  | done | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 192 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 443 |  | END_RUN=$(date +%s) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 444 |  | ELAPSED=$(echo $(($END_RUN - $START_RUN)) | awk '{printf "%d min. %d sec. elapsed", int($1/60), int($1%60)}') | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 445 |  |  | 
 
 
 
 
 
 
 
 
 
 
 | 446 | < | # Output results of session and close the log file's markup | 
 
 
 
 
 
 
 
 
 | 446 | > | # Do some math on results of session | 
 
 
 
 
 
 
 
 
 
 
 | 447 |  | LINKS_PROCESSED=$((LINK_NUM-URL_START+1)) | 
 
 
 
 
 
 
 
 
 
 
 | 448 | < | LINKS_SKIPPED=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 449 | < | LINKS_CHECKED=$((LINKS_PROCESSED-LINKS_SKIPPED)) | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 448 | > | LINK_PROBLEMS=$((EI_LINKS+IW_LINKS+RD_LINKS+NG_LINKS)) | 
 
 
 
 
 | 449 | > | LINK_ERRORS=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) | 
 
 
 
 
 | 450 | > | LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_EI+SKIP_EXPECT_IW)) | 
 
 
 
 
 | 451 | > | TRIVIAL_RDS=$((SKIP_SLASH_ADD+SKIP_HTTPS_UP)) | 
 
 
 
 
 | 452 | > | LINKS_CHECKED=$((LINKS_PROCESSED-LINK_ERRORS)) | 
 
 
 
 
 | 453 | > |  | 
 
 
 
 
 | 454 | > | # Print summary header | 
 
 
 
 
 
 
 
 
 
 
 | 455 |  | valPrint ct "Summary ($ELAPSED):" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 456 |  | valPrint r "\b1 Summary \b0 ($ELAPSED)" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 457 |  | valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>" | 
 
 
 
 
 
 
 
 
 
 
 | 458 | < | valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT)." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 459 | < | valPrint ctrh "I skipped $LINKS_SKIPPED $(pluralCheckNoun link $LINKS_SKIPPED), and found $FILE_LINKS $(pluralCheckNoun file $FILE_LINKS) and $PAGE_LINKS $(pluralCheckNoun page $PAGE_LINKS)." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 460 | < | if [ $LINKS_SKIPPED -gt 0 ]; then valPrint ctrh "Skip breakdown: "; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 461 | < | if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 458 | > | valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." | 
 
 
 
 
 | 459 | > |  | 
 
 
 
 
 | 460 | > | # Print processed link totals | 
 
 
 
 
 | 461 | > | if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi | 
 
 
 
 
 | 462 | > | if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi | 
 
 
 
 
 | 463 | > | if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi | 
 
 
 
 
 | 464 | > | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi | 
 
 
 
 
 | 465 | > | if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi | 
 
 
 
 
 | 466 | > | if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi | 
 
 
 
 
 | 467 | > |  | 
 
 
 
 
 | 468 | > | # Print excepted link totals | 
 
 
 
 
 | 469 | > | if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi | 
 
 
 
 
 | 470 | > | if [ $SKIP_EXPECT_NG -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_NG/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi | 
 
 
 
 
 | 471 | > | if [ $SKIP_EXPECT_EI -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_EI/$EI_LINKS external internal $(pluralCheckNoun link $EI_LINKS)"; fi | 
 
 
 
 
 | 472 | > | if [ $SKIP_EXPECT_IW -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_IW/$IW_LINKS potential intrawiki $(pluralCheckNoun link $IW_LINKS)"; fi | 
 
 
 
 
 | 473 | > |  | 
 
 
 
 
 | 474 | > | # Print errored link totals | 
 
 
 
 
 | 475 | > | if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS) (see RTF or TXT report for specific links):"; fi | 
 
 
 
 
 | 476 | > | if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS missing/unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi | 
 
 
 
 
 
 
 
 
 
 
 | 477 |  | if [ $SKIP_JS_PAGE -gt 0 ]; then valPrint ctrh "- $SKIP_JS_PAGE $(pluralCheckNoun link $SKIP_JS_PAGE) on $(pluralCheckA $SKIP_JS_PAGE)JavaScript $(pluralCheckNoun page $SKIP_JS_PAGE)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 478 |  | if [ $SKIP_BAD_URL -gt 0 ]; then valPrint ctrh "- $SKIP_BAD_URL illegal $(pluralCheckNoun URL $SKIP_BAD_URL)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 479 |  | if [ $SKIP_NON_ASCII -gt 0 ]; then valPrint ctrh "- $SKIP_NON_ASCII non-ASCII $(pluralCheckNoun URL $SKIP_NON_ASCII)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 480 |  | if [ $SKIP_UNK_SUFFIX -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_SUFFIX unknown URL $(pluralCheckNoun suffix $SKIP_UNK_SUFFIX)"; fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 481 |  | if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi | 
 
 
 
 
 
 
 
 
 
 
 | 482 | < | valPrint ctrh "Out of the $LINKS_CHECKED links checked, $EI_LINKS could be $(pluralCheckAn $EI_LINKS)intrawiki $(pluralCheckNoun link $EI_LINKS), $IW_LINKS could be $(pluralCheckAn $IW_LINKS)interwiki $(pluralCheckNoun link $IW_LINKS), $OK_LINKS $(pluralCheckWas $OK_LINKS) OK, $RD_LINKS $(pluralCheckWas $RD_LINKS) $(pluralCheckA $RD_LINKS)redirection $(pluralCheckNoun notice $RD_LINKS), and $NG_LINKS $(pluralCheckWas $NG_LINKS) NG." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 483 | < | if [ $SKIP_EXPECT_NG -gt 0 ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 484 | < | valPrint ctrh "$SKIP_EXPECT_NG/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS) went unlisted due to being found in the exceptions file." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 485 | < | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 486 | < | if [ $SKIP_EXPECT_EI -gt 0 ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 487 | < | valPrint ctrh "$SKIP_EXPECT_EI/$EI_LINKS external internal $(pluralCheckNoun link $EI_LINKS) went unlisted due to being found in the exceptions file." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 488 | < | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 489 | < | if [ $SKIP_EXPECT_IW -gt 0 ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 490 | < | valPrint ctrh "$SKIP_EXPECT_IW/$IW_LINKS potential intrawiki $(pluralCheckNoun link $IW_LINKS) went unlisted due to being found in the exceptions file." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 461 | < | fi | 
 
 
 
 
 
 
 
 
 | 482 | > |  | 
 
 
 
 
 | 483 | > | # Print checked link totals | 
 
 
 
 
 | 484 | > | if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi | 
 
 
 
 
 | 485 | > | if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi | 
 
 
 
 
 | 486 | > | if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi | 
 
 
 
 
 | 487 | > | if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi | 
 
 
 
 
 | 488 | > | if [ $IW_LINKS -gt 0 ]; then valPrint ctrh "- $IW_LINKS $(pluralCheckNoun link $IW_LINKS) that could be interwiki"; fi | 
 
 
 
 
 | 489 | > |  | 
 
 
 
 
 | 490 | > | # Close the log files' markup | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 491 |  | valPrint trh "ValExtLinks says goodbye." | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 492 |  | printRTFfooter | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 493 |  | printHTMfooter | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 663 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 664 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 665 |  | # Build longer wiki page URLs from namespace and page names | 
 
 
 
 
 
 
 
 
 
 
 | 666 | < | FULL_PAGE_PATH=http://$WIKI_PATH/$NS_NAME:$PAGE_NAME | 
 
 
 
 
 
 
 
 
 | 666 | > | FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 | 667 |  | LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 668 |  | # Namespace "Main:" cannot be a part of the path; it's an implicit namespace, and naming it | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 669 |  | # explicitly breaks the link | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 670 |  | if [ $NS_ID -eq 0 ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 671 | < | FULL_PAGE_PATH=http://$WIKI_PATH/$PAGE_NAME | 
 
 
 
 
 
 
 
 
 | 671 | > | FULL_PAGE_PATH=https://$WIKI_PATH/$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 | 672 |  | LOCAL_PAGE_PATH=$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 673 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 674 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 823 |  | # Get URL header again in order to retrieve the URL we are being redirected to | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 824 |  | NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 825 |  |  | 
 
 
 
 
 
 
 
 
 
 
 | 826 | < | # Filter out cases where the redirect URL is just the original URL with https:// instead of | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 827 | < | # http://, or with an added '/' at the end. These corrections happen a lot and are not | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 828 | < | # important to us. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 829 | < | URL_NO_PROTOCOL=${URL#http://} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 801 | < | URL_NO_PROTOCOL=${URL_NO_PROTOCOL%/} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 802 | < | NEW_URL_NO_PROTOCOL=${NEW_URL#https://} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 803 | < | NEW_URL_NO_PROTOCOL=${NEW_URL_NO_PROTOCOL%/} | 
 
 
 
 
 
 
 
 
 | 826 | > | # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter | 
 
 
 
 
 | 827 | > | # those changes out if the user didn't ask for them | 
 
 
 
 
 | 828 | > | URL_HTTP=$(echo $URL | sed -E 's/^https:/http:/') | 
 
 
 
 
 | 829 | > | NEW_URL_HTTP=$(echo $NEW_URL | sed -E 's/^https:/http:/') | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 830 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 831 |  | # Sometimes 'curl' fails to get the redirect_url due to time-out or bad web site config | 
 
 
 
 
 
 
 
 
 
 
 | 832 | < | NEW_URL_LENGTH=$(echo | awk -v input=$NEW_URL_NO_PROTOCOL '{print length(input)}') | 
 
 
 
 
 
 
 
 
 | 832 | > | NEW_URL_LENGTH=$(echo | awk -v input=$NEW_URL_HTTP '{print length(input)}') | 
 
 
 
 
 
 
 
 
 
 
 | 833 |  | if [ $NEW_URL_LENGTH -lt $MIN_URL_LENGTH ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 834 | < | NEW_URL_NO_PROTOCOL="[new URL not retrieved]" | 
 
 
 
 
 
 
 
 
 | 834 | > | NEW_URL_HTTP="[new URL not retrieved]" | 
 
 
 
 
 
 
 
 
 
 
 | 835 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 836 |  |  | 
 
 
 
 
 
 
 
 
 
 
 | 837 | < | # If the URLs match after the above filters were applied, then the link is OK | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 838 | < | if [ $URL_NO_PROTOCOL == $NEW_URL_NO_PROTOCOL ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 837 | > | # Remove slash at end of new URL, if present, so we can filter out the redirects that | 
 
 
 
 
 | 838 | > | # merely add an ending slash if the user didn't ask for them | 
 
 
 
 
 | 839 | > | NEW_URL_NO_SLASH=$(echo $NEW_URL_HTTP | sed -E 's:/$::') | 
 
 
 
 
 | 840 | > |  | 
 
 
 
 
 | 841 | > | # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user | 
 
 
 
 
 | 842 | > | # wants those to be reported) | 
 
 
 
 
 | 843 | > | if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then | 
 
 
 
 
 | 844 | > | valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL." | 
 
 
 
 
 | 845 | > | STATUS="OK" | 
 
 
 
 
 | 846 | > | let OK_LINKS+=1 | 
 
 
 
 
 | 847 | > | let SKIP_HTTPS_UP+=1 | 
 
 
 
 
 | 848 | > | # If the URLs match besides an added ending slash, then the link is OK (unless user wants | 
 
 
 
 
 | 849 | > | # those to be reported) | 
 
 
 
 
 | 850 | > | elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then | 
 
 
 
 
 | 851 | > | valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL." | 
 
 
 
 
 
 
 
 
 
 
 | 852 |  | STATUS="OK" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 853 |  | let OK_LINKS+=1 | 
 
 
 
 
 
 
 
 | 854 | + | let SKIP_SLASH_ADD+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 855 |  | else | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 856 |  | STATUS="RD" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 857 |  | let RD_LINKS+=1 |