| 76 |
|
# if you add a new code. |
| 77 |
|
declare -a OK_CODES=(200 401 405 406 418 501) |
| 78 |
|
declare -a RD_CODES=(301 302 303 307 308) |
| 79 |
< |
declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530) |
| 79 |
> |
declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530) |
| 80 |
|
|
| 81 |
|
# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using |
| 82 |
|
# transcluded text, and if the transclusion fails, then the braces show up in the URL |
| 777 |
|
PAGE_NAME=${LINE#$NS_ID,} |
| 778 |
|
PAGE_NAME=${PAGE_NAME%%,*} |
| 779 |
|
|
| 780 |
– |
# We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS |
| 781 |
– |
# in JavaScript code, so it returns erroneous links |
| 782 |
– |
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') |
| 783 |
– |
if [ $PAGE_NAME_SUFFIX == "js" ]; then |
| 784 |
– |
valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'." |
| 785 |
– |
let SKIP_JS_PAGE+=1 |
| 786 |
– |
let PAGE_LINKS+=1 |
| 787 |
– |
continue |
| 788 |
– |
fi |
| 789 |
– |
|
| 780 |
|
# Build longer wiki page URLs from namespace and page names |
| 781 |
|
FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME |
| 782 |
|
LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME |
| 787 |
|
LOCAL_PAGE_PATH=$PAGE_NAME |
| 788 |
|
fi |
| 789 |
|
|
| 790 |
+ |
# We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS |
| 791 |
+ |
# in JavaScript code, so it returns erroneous links |
| 792 |
+ |
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') |
| 793 |
+ |
if [ $PAGE_NAME_SUFFIX == "js" ]; then |
| 794 |
+ |
valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$LOCAL_PAGE_PATH'." |
| 795 |
+ |
let SKIP_JS_PAGE+=1 |
| 796 |
+ |
let PAGE_LINKS+=1 |
| 797 |
+ |
continue |
| 798 |
+ |
fi |
| 799 |
+ |
|
| 800 |
|
# The URL being linked to is everything after the previous two fields (this allows commas to be in |
| 801 |
|
# the URLs, but a comma in the previous field, the page name, will break this) |
| 802 |
|
URL=${LINE#$NS_ID,$PAGE_NAME,} |
| 803 |
|
|
| 804 |
|
# Scan for illegal characters |
| 805 |
|
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then |
| 806 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL." |
| 806 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL." |
| 807 |
|
let SKIP_BAD_URL+=1 |
| 808 |
|
let PAGE_LINKS+=1 |
| 809 |
|
continue |
| 811 |
|
|
| 812 |
|
# If we're skipping Archive.org links, see if this is one |
| 813 |
|
if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then |
| 814 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links." |
| 814 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links." |
| 815 |
|
let SKIP_ARCHIVE_ORG+=1 |
| 816 |
|
let PAGE_LINKS+=1 |
| 817 |
|
continue |
| 829 |
|
|
| 830 |
|
# 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make user check it |
| 831 |
|
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then |
| 832 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters." |
| 832 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I cannot handle non-ASCII characters." |
| 833 |
|
let SKIP_NON_ASCII+=1 |
| 834 |
|
let PAGE_LINKS+=1 |
| 835 |
|
continue |
| 903 |
|
# If this suffix escaped identification as either a file, page or TLD, inform the user |
| 904 |
|
STR_TYPE="" |
| 905 |
|
if [ $IS_FILE -eq -1 ]; then |
| 906 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 906 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 907 |
|
let SKIP_UNK_SUFFIX+=1 |
| 908 |
|
continue |
| 909 |
|
elif [ $IS_FILE -eq 1 ]; then |
| 1005 |
|
# If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user |
| 1006 |
|
# wants those to be reported) |
| 1007 |
|
if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then |
| 1008 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." |
| 1008 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." |
| 1009 |
|
STATUS="OK" |
| 1010 |
|
let OK_LINKS+=1 |
| 1011 |
|
let SKIP_HTTPS_UP+=1 |
| 1012 |
|
# If the URLs match besides an added ending slash, then the link is OK (unless user wants |
| 1013 |
|
# those to be reported) |
| 1014 |
|
elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then |
| 1015 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." |
| 1015 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." |
| 1016 |
|
STATUS="OK" |
| 1017 |
|
let OK_LINKS+=1 |
| 1018 |
|
let SKIP_SLASH_ADD+=1 |
| 1024 |
|
let NG_LINKS+=1 |
| 1025 |
|
else |
| 1026 |
|
if [ $SHOW_YT_RD -eq 0 ]; then |
| 1027 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'." |
| 1027 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'." |
| 1028 |
|
STATUS="OK" |
| 1029 |
|
let OK_LINKS+=1 |
| 1030 |
|
let SKIP_YOUTU_BE+=1 |
| 1055 |
|
|
| 1056 |
|
# If we didn't match a known status code, advise the reader |
| 1057 |
|
if [ $STATUS == "??" ]; then |
| 1058 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown response code $CURL_CODE." |
| 1058 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown response code $CURL_CODE." |
| 1059 |
|
let SKIP_UNK_CODE+=1 |
| 1060 |
|
continue |
| 1061 |
|
fi |
| 1094 |
|
# Match result code |
| 1095 |
|
EXCEPT_CODE=${EXCEPT_LINE%%,*} |
| 1096 |
|
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then |
| 1097 |
< |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, '$EXPECT_CODE', is in the exceptions list." |
| 1097 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because its expected result, '$EXPECT_CODE', is in the exceptions list." |
| 1098 |
|
if [ $STATUS == "EI" ]; then |
| 1099 |
|
let SKIP_EXPECT_EI+=1 |
| 1100 |
|
elif [ $STATUS == "IW" ]; then |