--- Validate External Links/validate_external_links.sh 2023/01/23 01:51:32 1178 +++ Validate External Links/validate_external_links.sh 2023/05/07 19:53:19 1182 @@ -72,8 +72,8 @@ declare -a HTTP_FILES=(3ds 7z avi BINA b declare -a HTTP_TLDS_AND_PAGES=(abstract action ars asp aspx cfm cgi com css de do full htm html it js jsp net org pgi php php3 phtml pl ru shtml stm uk x) # These arrays tells us which HTTP response codes are OK (good), which are RD (redirections), and which -# are NG (no good). Pages that return OK codes will be screenshotted. Remember to update http_codes.txt -# if you add a new code. +# are NG (no good). Pages that return OK codes will be screenshotted when screenshots are asked for. +# Remember to update http_codes.txt if you add a new code. declare -a OK_CODES=(200 401 405 406 418 501) declare -a RD_CODES=(301 302 303 307 308) declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 520 530) @@ -1013,14 +1013,46 @@ for LINE in `cat "$LINKS_FILE"`; do # If this is a YouTube link, we have to look at the actual page source to know if the video # is good or not; override the link's info if it's actually NG if [[ $URL == *www.youtube.com* ]]; then - PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL | grep "\"simpleText\":\"Video unavailable\"") - if [ ! -z "$PAGE_TEXT" ]; then + PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL) + CURL_ERR=$(echo $?) + if [ "$CURL_ERR" != "0" ]; then + STATUS="NG" + CURL_RESULT="000-$CURL_ERR" + let OK_LINKS-=1 + let NG_LINKS+=1 + elif [[ "$PAGE_TEXT" =~ "simpleText\":\"Video unavailable" ]]; then + STATUS="NG" + CURL_CODE="404" + CURL_RESULT=$CURL_CODE + let OK_LINKS-=1 + let NG_LINKS+=1 + fi + fi + + # If this is a OneDrive link, we have to look at the actual page source to know if the file + # is really still at this URL; override the link's info if it's actually NG or RD + if [[ $URL == *skydrive.live.com* ]]; then + PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL) + CURL_ERR=$(echo $?) + if [ "$CURL_ERR" != "0" ]; then + STATUS="NG" + CURL_RESULT="000-$CURL_ERR" + let OK_LINKS-=1 + let NG_LINKS+=1 + elif [[ "$PAGE_TEXT" =~ "