| 455 |
|
valPrint ct "Summary ($ELAPSED):" |
| 456 |
|
valPrint r "\b1 Summary \b0 ($ELAPSED)" |
| 457 |
|
valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>" |
| 458 |
< |
valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." |
| 458 |
> |
valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." |
| 459 |
|
|
| 460 |
|
# Print processed link totals |
| 461 |
|
if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi |
| 462 |
|
if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi |
| 463 |
< |
if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi |
| 464 |
< |
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi |
| 465 |
< |
if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi |
| 466 |
< |
if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi |
| 463 |
> |
if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi |
| 464 |
> |
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi |
| 465 |
> |
if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi |
| 466 |
> |
if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi |
| 467 |
|
|
| 468 |
|
# Print excepted link totals |
| 469 |
|
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi |
| 481 |
|
if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi |
| 482 |
|
|
| 483 |
|
# Print checked link totals |
| 484 |
< |
if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi |
| 484 |
> |
if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi |
| 485 |
|
if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi |
| 486 |
|
if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi |
| 487 |
|
if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi |
| 640 |
|
done |
| 641 |
|
if [ "$NS_NAME" == "" ]; then |
| 642 |
|
if [ $NS_ID == "NULL" ]; then |
| 643 |
< |
valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." |
| 643 |
> |
valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki." |
| 644 |
|
else |
| 645 |
< |
valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." |
| 645 |
> |
valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID." |
| 646 |
|
fi |
| 647 |
|
let SKIP_UNK_NS+=1 |
| 648 |
|
continue |
| 657 |
|
# JavaScript code, so it will return erroneous links |
| 658 |
|
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') |
| 659 |
|
if [ $PAGE_NAME_SUFFIX == "js" ]; then |
| 660 |
< |
valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." |
| 660 |
> |
valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'." |
| 661 |
|
let SKIP_JS_PAGE+=1 |
| 662 |
|
continue |
| 663 |
|
fi |
| 678 |
|
|
| 679 |
|
# Scan for illegal characters |
| 680 |
|
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then |
| 681 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." |
| 681 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL." |
| 682 |
|
let SKIP_BAD_URL+=1 |
| 683 |
|
continue |
| 684 |
|
fi |
| 695 |
|
|
| 696 |
|
# 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it |
| 697 |
|
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then |
| 698 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." |
| 698 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters." |
| 699 |
|
let SKIP_NON_ASCII+=1 |
| 700 |
|
continue |
| 701 |
|
fi |
| 758 |
|
# If this suffix escaped identification as either a file, page or TLD, inform the user |
| 759 |
|
STR_TYPE="" |
| 760 |
|
if [ $IS_FILE -eq -1 ]; then |
| 761 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 761 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 762 |
|
let SKIP_UNK_SUFFIX+=1 |
| 763 |
|
continue |
| 764 |
|
elif [ $IS_FILE -eq 1 ]; then |
| 771 |
|
|
| 772 |
|
# Get response code using 'curl' to see if this link is valid; the --insecure option avoids an |
| 773 |
|
# issue with sites that require HTTPS |
| 774 |
< |
CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL) |
| 774 |
> |
CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL) |
| 775 |
|
CURL_ERR=$(echo $?) |
| 776 |
|
CURL_RESULT=$CURL_CODE |
| 777 |
|
|
| 821 |
|
for CODE in "${RD_CODES[@]}"; do |
| 822 |
|
if [[ $CODE == $CURL_CODE ]]; then |
| 823 |
|
# Get URL header again in order to retrieve the URL we are being redirected to |
| 824 |
< |
NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL) |
| 824 |
> |
NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL) |
| 825 |
|
|
| 826 |
|
# Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter |
| 827 |
|
# those changes out if the user didn't ask for them |
| 841 |
|
# If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user |
| 842 |
|
# wants those to be reported) |
| 843 |
|
if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then |
| 844 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL." |
| 844 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." |
| 845 |
|
STATUS="OK" |
| 846 |
|
let OK_LINKS+=1 |
| 847 |
|
let SKIP_HTTPS_UP+=1 |
| 848 |
|
# If the URLs match besides an added ending slash, then the link is OK (unless user wants |
| 849 |
|
# those to be reported) |
| 850 |
|
elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then |
| 851 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL." |
| 851 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." |
| 852 |
|
STATUS="OK" |
| 853 |
|
let OK_LINKS+=1 |
| 854 |
|
let SKIP_SLASH_ADD+=1 |
| 874 |
|
|
| 875 |
|
# If we didn't match a known status code, advise the reader |
| 876 |
|
if [ $STATUS == "??" ]; then |
| 877 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." |
| 877 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE." |
| 878 |
|
let SKIP_UNK_CODE+=1 |
| 879 |
|
continue |
| 880 |
|
fi |
| 895 |
|
if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then |
| 896 |
|
EXCEPT_CODE=${GREP_RESULT%%,*} |
| 897 |
|
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then |
| 898 |
< |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
| 898 |
> |
valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
| 899 |
|
if [ $STATUS == "EI" ]; then |
| 900 |
|
let SKIP_EXPECT_EI+=1 |
| 901 |
|
elif [ $STATUS == "IW" ]; then |
| 925 |
|
valPrint hn "<tr><td style=\"white-space:nowrap\">$STATUS ($CURL_RESULT)</td><td align=\"right\">$STR_TYPE</td><td><a href=\"$URL\" target=\"_blank\">$URL</a></td></tr>" |
| 926 |
|
valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>" |
| 927 |
|
|
| 928 |
+ |
# Place vertical space here since we won't be printing anything more about this link |
| 929 |
+ |
if [ $STATUS == "OK" ]; then valPrint trh ""; fi |
| 930 |
+ |
|
| 931 |
|
# Record redirect URL if one was given by a 3xx response page |
| 932 |
|
if [ $STATUS == "RD" ]; then |
| 933 |
|
valPrint ts " Server suggests $NEW_URL" |
| 992 |
|
valPrint trhs "Screenshot of URL $URL seems to have failed!" |
| 993 |
|
fi |
| 994 |
|
else |
| 995 |
< |
valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists." |
| 995 |
> |
valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists." |
| 996 |
|
fi |
| 997 |
|
fi |
| 998 |
|
done |