--- Validate External Links/validate_external_links.sh 2020/03/20 22:13:48 1122
+++ Validate External Links/validate_external_links.sh 2020/03/21 22:08:35 1123
@@ -455,15 +455,15 @@ function wrapupAndExit()
valPrint ct "Summary ($ELAPSED):"
valPrint r "\b1 Summary \b0 ($ELAPSED)"
valPrint hn "
Summary ($ELAPSED)
"
- valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
+ valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
# Print processed link totals
if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
- if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi
- if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
- if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi
- if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
+ if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi
+ if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
+ if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
+ if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
# Print excepted link totals
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi
@@ -481,7 +481,7 @@ function wrapupAndExit()
if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi
# Print checked link totals
- if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi
+ if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi
if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi
if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi
if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi
@@ -640,9 +640,9 @@ for LINE in `cat "$LINKS_FILE"`; do
done
if [ "$NS_NAME" == "" ]; then
if [ $NS_ID == "NULL" ]; then
- valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
+ valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki."
else
- valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
+ valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
fi
let SKIP_UNK_NS+=1
continue
@@ -657,7 +657,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# JavaScript code, so it will return erroneous links
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
if [ $PAGE_NAME_SUFFIX == "js" ]; then
- valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
+ valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
let SKIP_JS_PAGE+=1
continue
fi
@@ -678,7 +678,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# Scan for illegal characters
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
let SKIP_BAD_URL+=1
continue
fi
@@ -695,7 +695,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
let SKIP_NON_ASCII+=1
continue
fi
@@ -758,7 +758,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# If this suffix escaped identification as either a file, page or TLD, inform the user
STR_TYPE=""
if [ $IS_FILE -eq -1 ]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
let SKIP_UNK_SUFFIX+=1
continue
elif [ $IS_FILE -eq 1 ]; then
@@ -771,7 +771,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
# issue with sites that require HTTPS
- CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL)
+ CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL)
CURL_ERR=$(echo $?)
CURL_RESULT=$CURL_CODE
@@ -821,7 +821,7 @@ for LINE in `cat "$LINKS_FILE"`; do
for CODE in "${RD_CODES[@]}"; do
if [[ $CODE == $CURL_CODE ]]; then
# Get URL header again in order to retrieve the URL we are being redirected to
- NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL)
+ NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL)
# Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter
# those changes out if the user didn't ask for them
@@ -841,14 +841,14 @@ for LINE in `cat "$LINKS_FILE"`; do
# If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user
# wants those to be reported)
if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
STATUS="OK"
let OK_LINKS+=1
let SKIP_HTTPS_UP+=1
# If the URLs match besides an added ending slash, then the link is OK (unless user wants
# those to be reported)
elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
STATUS="OK"
let OK_LINKS+=1
let SKIP_SLASH_ADD+=1
@@ -874,7 +874,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# If we didn't match a known status code, advise the reader
if [ $STATUS == "??" ]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE."
let SKIP_UNK_CODE+=1
continue
fi
@@ -895,7 +895,7 @@ for LINE in `cat "$LINKS_FILE"`; do
if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then
EXCEPT_CODE=${GREP_RESULT%%,*}
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
- valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
+ valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file."
if [ $STATUS == "EI" ]; then
let SKIP_EXPECT_EI+=1
elif [ $STATUS == "IW" ]; then
@@ -925,6 +925,9 @@ for LINE in `cat "$LINKS_FILE"`; do
valPrint hn "| $STATUS ($CURL_RESULT) | $STR_TYPE | $URL |
"
valPrint hn "| linked from | $LOCAL_PAGE_PATH |
"
+ # Place vertical space here since we won't be printing anything more about this link
+ if [ $STATUS == "OK" ]; then valPrint trh ""; fi
+
# Record redirect URL if one was given by a 3xx response page
if [ $STATUS == "RD" ]; then
valPrint ts " Server suggests $NEW_URL"
@@ -989,7 +992,7 @@ for LINE in `cat "$LINKS_FILE"`; do
valPrint trhs "Screenshot of URL $URL seems to have failed!"
fi
else
- valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
+ valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists."
fi
fi
done