ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1147 by iritscen, Tue Feb 2 20:10:39 2021 UTC vs.
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC

# Line 47 | Line 47 | URL_LIMIT=0            # if non-zero, st
47   UPLOAD_INFO=""         # path to a file on your hard drive with the login info needed to upload a report
48  
49   # Fixed strings -- see the occurrences of these variables to learn their purpose
50 < AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36 OPR/70.0.3728.154"
50 > AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/88.0.4324.146 Safari/537.36"
51   ARCHIVE_API="http://archive.org/wayback/available"
52   ARCHIVE_GENERIC="https://web.archive.org/web/*"
53   ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206"
# Line 76 | Line 76 | declare -a HTTP_TLDS_AND_PAGES=(action a
76   # if you add a new code.
77   declare -a OK_CODES=(200 401 405 406 418 501)
78   declare -a RD_CODES=(301 302 303 307 308)
79 < declare -a NG_CODES=(000 400 403 404 410 500 502 503 530)
79 > declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530)
80  
81   # Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
82   # transcluded text, and if the transclusion fails, then the braces show up in the URL
# Line 720 | Line 720 | for LINE in `cat "$LINKS_FILE"`; do
720     if [ $SKIPPED_HEADER_ROW -eq 0 ]; then
721        if [ $LINE == "namespace,title,target" ]; then
722           SKIPPED_HEADER_ROW=1
723 <         LINK_NUM=0 # this line is it's not a link, so reset the link counter
723 >         LINK_NUM=0 # this line is not a link, so reset the link counter
724           valPrint hn "<table>"
725           continue
726        else
# Line 768 | Line 768 | for LINE in `cat "$LINKS_FILE"`; do
768           valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
769        fi
770        let SKIP_UNK_NS+=1
771 +      let PAGE_LINKS+=1
772        continue
773     fi
774  
# Line 782 | Line 783 | for LINE in `cat "$LINKS_FILE"`; do
783     if [ $PAGE_NAME_SUFFIX == "js" ]; then
784        valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
785        let SKIP_JS_PAGE+=1
786 +      let PAGE_LINKS+=1
787        continue
788     fi
789  
# Line 803 | Line 805 | for LINE in `cat "$LINKS_FILE"`; do
805     if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
806        valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
807        let SKIP_BAD_URL+=1
808 +      let PAGE_LINKS+=1
809        continue
810     fi
811  
# Line 810 | Line 813 | for LINE in `cat "$LINKS_FILE"`; do
813     if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
814        valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links."
815        let SKIP_ARCHIVE_ORG+=1
816 +      let PAGE_LINKS+=1
817        continue
818     fi
819  
# Line 827 | Line 831 | for LINE in `cat "$LINKS_FILE"`; do
831     if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
832        valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
833        let SKIP_NON_ASCII+=1
834 +      let PAGE_LINKS+=1
835        continue
836     fi
837  
# Line 904 | Line 909 | for LINE in `cat "$LINKS_FILE"`; do
909     elif [ $IS_FILE -eq 1 ]; then
910        STR_TYPE="file"
911        let FILE_LINKS+=1
912 <   elif [ $IS_FILE -eq 0 ]; then
912 >   else
913        STR_TYPE="page"
914        let PAGE_LINKS+=1
915     fi
# Line 952 | Line 957 | for LINE in `cat "$LINKS_FILE"`; do
957           if [[ $CODE == $CURL_CODE ]]; then
958              STATUS="OK"
959              let OK_LINKS+=1
960 +
961 +            # If this is a YouTube link, we have to look at the actual page source to know if the video
962 +            # is good or not
963 +            if [[ $URL == *www.youtube.com* ]]; then
964 +               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL | grep "\"simpleText\":\"Video unavailable\"")
965 +               if [ ! -z "$PAGE_TEXT" ]; then
966 +                  STATUS="NG"
967 +                  let OK_LINKS-=1
968 +                  let NG_LINKS+=1
969 +               fi
970 +            fi
971              break
972           fi
973        done
# Line 1000 | Line 1016 | for LINE in `cat "$LINKS_FILE"`; do
1016                 STATUS="OK"
1017                 let OK_LINKS+=1
1018                 let SKIP_SLASH_ADD+=1
1019 <            elif [ $SHOW_YT_RD -eq 0 ] && [ $YOUTU_BE -eq 1 ]; then
1020 <               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
1021 <               STATUS="OK"
1022 <               let OK_LINKS+=1
1023 <               let SKIP_YOUTU_BE+=1
1019 >            elif [ $YOUTU_BE -eq 1 ]; then
1020 >               # We have to look at the actual page source to know if a YouTube video is good or not
1021 >               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep "\"simpleText\":\"Video unavailable\"")
1022 >               if [ ! -z "$PAGE_TEXT" ]; then
1023 >                  STATUS="NG"
1024 >                  let NG_LINKS+=1
1025 >               else
1026 >                  if [ $SHOW_YT_RD -eq 0 ]; then
1027 >                     valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
1028 >                     STATUS="OK"
1029 >                     let OK_LINKS+=1
1030 >                     let SKIP_YOUTU_BE+=1
1031 >                  else
1032 >                     STATUS="RD"
1033 >                     let RD_LINKS+=1
1034 >                  fi
1035 >               fi
1036              else
1037                 STATUS="RD"
1038                 let RD_LINKS+=1

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)