--- Validate External Links/validate_external_links.sh 2020/03/20 22:13:48 1122 +++ Validate External Links/validate_external_links.sh 2020/03/21 22:08:35 1123 @@ -455,15 +455,15 @@ function wrapupAndExit() valPrint ct "Summary ($ELAPSED):" valPrint r "\b1 Summary \b0 ($ELAPSED)" valPrint hn "

Summary ($ELAPSED)

" - valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." + valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." # Print processed link totals if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi - if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi - if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi - if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi - if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi + if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi + if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi + if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi + if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi # Print excepted link totals if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi @@ -481,7 +481,7 @@ function wrapupAndExit() if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi # Print checked link totals - if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi + if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi @@ -640,9 +640,9 @@ for LINE in `cat "$LINKS_FILE"`; do done if [ "$NS_NAME" == "" ]; then if [ $NS_ID == "NULL" ]; then - valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." + valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki." else - valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." + valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID." fi let SKIP_UNK_NS+=1 continue @@ -657,7 +657,7 @@ for LINE in `cat "$LINKS_FILE"`; do # JavaScript code, so it will return erroneous links PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') if [ $PAGE_NAME_SUFFIX == "js" ]; then - valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." + valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'." let SKIP_JS_PAGE+=1 continue fi @@ -678,7 +678,7 @@ for LINE in `cat "$LINKS_FILE"`; do # Scan for illegal characters if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL." let SKIP_BAD_URL+=1 continue fi @@ -695,7 +695,7 @@ for LINE in `cat "$LINKS_FILE"`; do # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters." let SKIP_NON_ASCII+=1 continue fi @@ -758,7 +758,7 @@ for LINE in `cat "$LINKS_FILE"`; do # If this suffix escaped identification as either a file, page or TLD, inform the user STR_TYPE="" if [ $IS_FILE -eq -1 ]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." let SKIP_UNK_SUFFIX+=1 continue elif [ $IS_FILE -eq 1 ]; then @@ -771,7 +771,7 @@ for LINE in `cat "$LINKS_FILE"`; do # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an # issue with sites that require HTTPS - CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL) + CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL) CURL_ERR=$(echo $?) CURL_RESULT=$CURL_CODE @@ -821,7 +821,7 @@ for LINE in `cat "$LINKS_FILE"`; do for CODE in "${RD_CODES[@]}"; do if [[ $CODE == $CURL_CODE ]]; then # Get URL header again in order to retrieve the URL we are being redirected to - NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL) + NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL) # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter # those changes out if the user didn't ask for them @@ -841,14 +841,14 @@ for LINE in `cat "$LINKS_FILE"`; do # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user # wants those to be reported) if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." STATUS="OK" let OK_LINKS+=1 let SKIP_HTTPS_UP+=1 # If the URLs match besides an added ending slash, then the link is OK (unless user wants # those to be reported) elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." STATUS="OK" let OK_LINKS+=1 let SKIP_SLASH_ADD+=1 @@ -874,7 +874,7 @@ for LINE in `cat "$LINKS_FILE"`; do # If we didn't match a known status code, advise the reader if [ $STATUS == "??" ]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE." let SKIP_UNK_CODE+=1 continue fi @@ -895,7 +895,7 @@ for LINE in `cat "$LINKS_FILE"`; do if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then EXCEPT_CODE=${GREP_RESULT%%,*} if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then - valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." + valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file." if [ $STATUS == "EI" ]; then let SKIP_EXPECT_EI+=1 elif [ $STATUS == "IW" ]; then @@ -925,6 +925,9 @@ for LINE in `cat "$LINKS_FILE"`; do valPrint hn "$STATUS ($CURL_RESULT)$STR_TYPE$URL" valPrint hn "linked from$LOCAL_PAGE_PATH" + # Place vertical space here since we won't be printing anything more about this link + if [ $STATUS == "OK" ]; then valPrint trh ""; fi + # Record redirect URL if one was given by a 3xx response page if [ $STATUS == "RD" ]; then valPrint ts " Server suggests $NEW_URL" @@ -989,7 +992,7 @@ for LINE in `cat "$LINKS_FILE"`; do valPrint trhs "Screenshot of URL $URL seems to have failed!" fi else - valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists." + valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists." fi fi done