883 |
|
# URL ends in a suffix |
884 |
|
HAS_SUFFIX=0 |
885 |
|
|
886 |
< |
# If the URL ends in something like ".php?foo=bar", strip everything from the '?' onward |
886 |
> |
# If the URL ends in a query string like ".php?foo=bar", strip everything from the '?' onward |
887 |
|
CLEAN_URL=${URL%%\?*} |
888 |
|
|
889 |
< |
# If the URL ends in something like "#section_15", strip everything from the '#' onward |
889 |
> |
# If the URL ends in an anchor link like "#section_15", strip everything from the '#' onward |
890 |
|
CLEAN_URL=${CLEAN_URL%%\#*} |
891 |
|
|
892 |
|
# 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make reader check it |
1025 |
|
let OK_LINKS+=1 |
1026 |
|
|
1027 |
|
# If this is a YouTube link, we have to look at the actual page source to know if the video |
1028 |
< |
# is good or not; override the link's info if it's actually NG |
1028 |
> |
# is good or not; override the link's info if it's actually NG. Also see RD_CODES section |
1029 |
> |
# below for duplicative code. |
1030 |
|
if [[ $URL == *www.youtube.com* ]]; then |
1031 |
|
PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL) |
1032 |
|
CURL_ERR=$(echo $?) |
1035 |
|
CURL_RESULT="000-$CURL_ERR" |
1036 |
|
let OK_LINKS-=1 |
1037 |
|
let NG_LINKS+=1 |
1038 |
< |
elif [[ "$PAGE_TEXT" =~ "simpleText\":\"Video unavailable" ]]; then |
1038 |
> |
elif [[ "$PAGE_TEXT" =~ "simpleText\":\"Video unavailable" ]] | [[ "$PAGE_TEXT" =~ "simpleText\":\"Private video" ]]; then |
1039 |
|
STATUS="NG" |
1040 |
|
CURL_CODE="404" |
1041 |
|
CURL_RESULT=$CURL_CODE |
1123 |
|
let OK_LINKS+=1 |
1124 |
|
let SKIP_SLASH_ADD+=1 |
1125 |
|
elif [ $YOUTU_BE -eq 1 ]; then |
1126 |
< |
# We have to look at the actual page source to know if a YouTube video is good or not |
1127 |
< |
PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep "\"simpleText\":\"Video unavailable\"") |
1126 |
> |
# We have to look at the actual page source to know if a YouTube video is good or not. |
1127 |
> |
# Also see OK_CODES section above for duplicative code. |
1128 |
> |
PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep -e "simpleText\":\"Video unavailable" -e "simpleText\":\"Private video") |
1129 |
|
if [ ! -z "$PAGE_TEXT" ]; then |
1130 |
|
STATUS="NG" |
1131 |
|
let NG_LINKS+=1 |