--- Validate External Links/validate_external_links.sh 2024/06/23 23:36:00 1190 +++ Validate External Links/validate_external_links.sh 2024/10/31 22:03:44 1193 @@ -883,10 +883,10 @@ for LINE in `cat "$LINKS_FILE"`; do # URL ends in a suffix HAS_SUFFIX=0 - # If the URL ends in something like ".php?foo=bar", strip everything from the '?' onward + # If the URL ends in a query string like ".php?foo=bar", strip everything from the '?' onward CLEAN_URL=${URL%%\?*} - # If the URL ends in something like "#section_15", strip everything from the '#' onward + # If the URL ends in an anchor link like "#section_15", strip everything from the '#' onward CLEAN_URL=${CLEAN_URL%%\#*} # 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make reader check it @@ -1025,7 +1025,8 @@ for LINE in `cat "$LINKS_FILE"`; do let OK_LINKS+=1 # If this is a YouTube link, we have to look at the actual page source to know if the video - # is good or not; override the link's info if it's actually NG + # is good or not; override the link's info if it's actually NG. Also see RD_CODES section + # below for duplicative code. if [[ $URL == *www.youtube.com* ]]; then PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL) CURL_ERR=$(echo $?) @@ -1034,7 +1035,7 @@ for LINE in `cat "$LINKS_FILE"`; do CURL_RESULT="000-$CURL_ERR" let OK_LINKS-=1 let NG_LINKS+=1 - elif [[ "$PAGE_TEXT" =~ "simpleText\":\"Video unavailable" ]]; then + elif [[ "$PAGE_TEXT" =~ "simpleText\":\"Video unavailable" ]] | [[ "$PAGE_TEXT" =~ "simpleText\":\"Private video" ]]; then STATUS="NG" CURL_CODE="404" CURL_RESULT=$CURL_CODE @@ -1122,8 +1123,9 @@ for LINE in `cat "$LINKS_FILE"`; do let OK_LINKS+=1 let SKIP_SLASH_ADD+=1 elif [ $YOUTU_BE -eq 1 ]; then - # We have to look at the actual page source to know if a YouTube video is good or not - PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep "\"simpleText\":\"Video unavailable\"") + # We have to look at the actual page source to know if a YouTube video is good or not. + # Also see OK_CODES section above for duplicative code. + PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep -e "simpleText\":\"Video unavailable" -e "simpleText\":\"Private video") if [ ! -z "$PAGE_TEXT" ]; then STATUS="NG" let NG_LINKS+=1