| 76 |  | # if you add a new code. | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 77 |  | declare -a OK_CODES=(200 401 405 406 418 501) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 78 |  | declare -a RD_CODES=(301 302 303 307 308) | 
 
 
 
 
 
 
 
 
 
 
 | 79 | < | declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530) | 
 
 
 
 
 
 
 
 
 | 79 | > | declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530) | 
 
 
 
 
 
 
 
 
 
 
 | 80 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 81 |  | # Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 82 |  | # transcluded text, and if the transclusion fails, then the braces show up in the URL | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 777 |  | PAGE_NAME=${LINE#$NS_ID,} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 778 |  | PAGE_NAME=${PAGE_NAME%%,*} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 779 |  |  | 
 
 
 
 
 
 
 
 
 | 780 | – | # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS | 
 
 
 
 
 
 
 
 
 | 781 | – | # in JavaScript code, so it returns erroneous links | 
 
 
 
 
 
 
 
 
 | 782 | – | PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') | 
 
 
 
 
 
 
 
 
 | 783 | – | if [ $PAGE_NAME_SUFFIX == "js" ]; then | 
 
 
 
 
 
 
 
 
 | 784 | – | valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'." | 
 
 
 
 
 
 
 
 
 | 785 | – | let SKIP_JS_PAGE+=1 | 
 
 
 
 
 
 
 
 
 | 786 | – | let PAGE_LINKS+=1 | 
 
 
 
 
 
 
 
 
 | 787 | – | continue | 
 
 
 
 
 
 
 
 
 | 788 | – | fi | 
 
 
 
 
 
 
 
 
 | 789 | – |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 780 |  | # Build longer wiki page URLs from namespace and page names | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 781 |  | FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 782 |  | LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 787 |  | LOCAL_PAGE_PATH=$PAGE_NAME | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 788 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 789 |  |  | 
 
 
 
 
 
 
 
 | 790 | + | # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS | 
 
 
 
 
 
 
 
 | 791 | + | # in JavaScript code, so it returns erroneous links | 
 
 
 
 
 
 
 
 | 792 | + | PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') | 
 
 
 
 
 
 
 
 | 793 | + | if [ $PAGE_NAME_SUFFIX == "js" ]; then | 
 
 
 
 
 
 
 
 | 794 | + | valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$LOCAL_PAGE_PATH'." | 
 
 
 
 
 
 
 
 | 795 | + | let SKIP_JS_PAGE+=1 | 
 
 
 
 
 
 
 
 | 796 | + | let PAGE_LINKS+=1 | 
 
 
 
 
 
 
 
 | 797 | + | continue | 
 
 
 
 
 
 
 
 | 798 | + | fi | 
 
 
 
 
 
 
 
 | 799 | + |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 800 |  | # The URL being linked to is everything after the previous two fields (this allows commas to be in | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 801 |  | # the URLs, but a comma in the previous field, the page name, will break this) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 802 |  | URL=${LINE#$NS_ID,$PAGE_NAME,} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 803 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 804 |  | # Scan for illegal characters | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 805 |  | if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then | 
 
 
 
 
 
 
 
 
 
 
 | 806 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL." | 
 
 
 
 
 
 
 
 
 | 806 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL." | 
 
 
 
 
 
 
 
 
 
 
 | 807 |  | let SKIP_BAD_URL+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 808 |  | let PAGE_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 809 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 811 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 812 |  | # If we're skipping Archive.org links, see if this is one | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 813 |  | if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then | 
 
 
 
 
 
 
 
 
 
 
 | 814 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links." | 
 
 
 
 
 
 
 
 
 | 814 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links." | 
 
 
 
 
 
 
 
 
 
 
 | 815 |  | let SKIP_ARCHIVE_ORG+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 816 |  | let PAGE_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 817 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 829 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 830 |  | # 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make user check it | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 831 |  | if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then | 
 
 
 
 
 
 
 
 
 
 
 | 832 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters." | 
 
 
 
 
 
 
 
 
 | 832 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I cannot handle non-ASCII characters." | 
 
 
 
 
 
 
 
 
 
 
 | 833 |  | let SKIP_NON_ASCII+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 834 |  | let PAGE_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 835 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 903 |  | # If this suffix escaped identification as either a file, page or TLD, inform the user | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 904 |  | STR_TYPE="" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 905 |  | if [ $IS_FILE -eq -1 ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 906 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." | 
 
 
 
 
 
 
 
 
 | 906 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." | 
 
 
 
 
 
 
 
 
 
 
 | 907 |  | let SKIP_UNK_SUFFIX+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 908 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 909 |  | elif [ $IS_FILE -eq 1 ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1005 |  | # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1006 |  | # wants those to be reported) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1007 |  | if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 1008 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 | 1008 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 
 
 | 1009 |  | STATUS="OK" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1010 |  | let OK_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1011 |  | let SKIP_HTTPS_UP+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1012 |  | # If the URLs match besides an added ending slash, then the link is OK (unless user wants | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1013 |  | # those to be reported) | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1014 |  | elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 1015 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 | 1015 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 
 
 | 1016 |  | STATUS="OK" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1017 |  | let OK_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1018 |  | let SKIP_SLASH_ADD+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1024 |  | let NG_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1025 |  | else | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1026 |  | if [ $SHOW_YT_RD -eq 0 ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 1027 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 | 1027 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'." | 
 
 
 
 
 
 
 
 
 
 
 | 1028 |  | STATUS="OK" | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1029 |  | let OK_LINKS+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1030 |  | let SKIP_YOUTU_BE+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1055 |  |  | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1056 |  | # If we didn't match a known status code, advise the reader | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1057 |  | if [ $STATUS == "??" ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 1058 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown response code $CURL_CODE." | 
 
 
 
 
 
 
 
 
 | 1058 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown response code $CURL_CODE." | 
 
 
 
 
 
 
 
 
 
 
 | 1059 |  | let SKIP_UNK_CODE+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1060 |  | continue | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1061 |  | fi | 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1094 |  | # Match result code | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1095 |  | EXCEPT_CODE=${EXCEPT_LINE%%,*} | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1096 |  | if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then | 
 
 
 
 
 
 
 
 
 
 
 | 1097 | < | valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, '$EXPECT_CODE', is in the exceptions list." | 
 
 
 
 
 
 
 
 
 | 1097 | > | valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because its expected result, '$EXPECT_CODE', is in the exceptions list." | 
 
 
 
 
 
 
 
 
 
 
 | 1098 |  | if [ $STATUS == "EI" ]; then | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1099 |  | let SKIP_EXPECT_EI+=1 | 
 
 
 
 
 
 
 
 
 
 
 
 
 | 1100 |  | elif [ $STATUS == "IW" ]; then |