ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC vs.
Revision 1149 by iritscen, Sun Feb 7 22:36:56 2021 UTC

# Line 76 | Line 76 | declare -a HTTP_TLDS_AND_PAGES=(action a
76   # if you add a new code.
77   declare -a OK_CODES=(200 401 405 406 418 501)
78   declare -a RD_CODES=(301 302 303 307 308)
79 < declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530)
79 > declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530)
80  
81   # Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
82   # transcluded text, and if the transclusion fails, then the braces show up in the URL
# Line 777 | Line 777 | for LINE in `cat "$LINKS_FILE"`; do
777     PAGE_NAME=${LINE#$NS_ID,}
778     PAGE_NAME=${PAGE_NAME%%,*}
779  
780   # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS
781   # in JavaScript code, so it returns erroneous links
782   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
783   if [ $PAGE_NAME_SUFFIX == "js" ]; then
784      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
785      let SKIP_JS_PAGE+=1
786      let PAGE_LINKS+=1
787      continue
788   fi
789
780     # Build longer wiki page URLs from namespace and page names
781     FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME
782     LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME
# Line 797 | Line 787 | for LINE in `cat "$LINKS_FILE"`; do
787        LOCAL_PAGE_PATH=$PAGE_NAME
788     fi
789  
790 +   # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS
791 +   # in JavaScript code, so it returns erroneous links
792 +   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
793 +   if [ $PAGE_NAME_SUFFIX == "js" ]; then
794 +      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$LOCAL_PAGE_PATH'."
795 +      let SKIP_JS_PAGE+=1
796 +      let PAGE_LINKS+=1
797 +      continue
798 +   fi
799 +
800     # The URL being linked to is everything after the previous two fields (this allows commas to be in
801     # the URLs, but a comma in the previous field, the page name, will break this)
802     URL=${LINE#$NS_ID,$PAGE_NAME,}
803  
804     # Scan for illegal characters
805     if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
806 <      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
806 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL."
807        let SKIP_BAD_URL+=1
808        let PAGE_LINKS+=1
809        continue
# Line 811 | Line 811 | for LINE in `cat "$LINKS_FILE"`; do
811  
812     # If we're skipping Archive.org links, see if this is one
813     if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
814 <      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links."
814 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."
815        let SKIP_ARCHIVE_ORG+=1
816        let PAGE_LINKS+=1
817        continue
# Line 829 | Line 829 | for LINE in `cat "$LINKS_FILE"`; do
829  
830     # 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make user check it
831     if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
832 <      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
832 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I cannot handle non-ASCII characters."
833        let SKIP_NON_ASCII+=1
834        let PAGE_LINKS+=1
835        continue
# Line 903 | Line 903 | for LINE in `cat "$LINKS_FILE"`; do
903     # If this suffix escaped identification as either a file, page or TLD, inform the user
904     STR_TYPE=""
905     if [ $IS_FILE -eq -1 ]; then
906 <      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
906 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
907        let SKIP_UNK_SUFFIX+=1
908        continue
909     elif [ $IS_FILE -eq 1 ]; then
# Line 1005 | Line 1005 | for LINE in `cat "$LINKS_FILE"`; do
1005              # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user
1006              # wants those to be reported)
1007              if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
1008 <               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
1008 >               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
1009                 STATUS="OK"
1010                 let OK_LINKS+=1
1011                 let SKIP_HTTPS_UP+=1
1012              # If the URLs match besides an added ending slash, then the link is OK (unless user wants
1013              # those to be reported)
1014              elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
1015 <               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
1015 >               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
1016                 STATUS="OK"
1017                 let OK_LINKS+=1
1018                 let SKIP_SLASH_ADD+=1
# Line 1024 | Line 1024 | for LINE in `cat "$LINKS_FILE"`; do
1024                    let NG_LINKS+=1
1025                 else
1026                    if [ $SHOW_YT_RD -eq 0 ]; then
1027 <                     valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
1027 >                     valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
1028                       STATUS="OK"
1029                       let OK_LINKS+=1
1030                       let SKIP_YOUTU_BE+=1
# Line 1055 | Line 1055 | for LINE in `cat "$LINKS_FILE"`; do
1055  
1056     # If we didn't match a known status code, advise the reader
1057     if [ $STATUS == "??" ]; then
1058 <      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown response code $CURL_CODE."
1058 >      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown response code $CURL_CODE."
1059        let SKIP_UNK_CODE+=1
1060        continue
1061     fi
# Line 1094 | Line 1094 | for LINE in `cat "$LINKS_FILE"`; do
1094              # Match result code
1095              EXCEPT_CODE=${EXCEPT_LINE%%,*}
1096              if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
1097 <               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, '$EXPECT_CODE', is in the exceptions list."
1097 >               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because its expected result, '$EXPECT_CODE', is in the exceptions list."
1098                 if [ $STATUS == "EI" ]; then
1099                    let SKIP_EXPECT_EI+=1
1100                 elif [ $STATUS == "IW" ]; then

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)