ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1122 by iritscen, Fri Mar 20 22:13:48 2020 UTC vs.
Revision 1123 by iritscen, Sat Mar 21 22:08:35 2020 UTC

# Line 455 | Line 455 | function wrapupAndExit()
455     valPrint ct "Summary ($ELAPSED):"
456     valPrint r "\b1 Summary \b0 ($ELAPSED)"
457     valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>"
458 <   valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
458 >   valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
459  
460     # Print processed link totals
461     if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
462     if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
463 <   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi
464 <   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
465 <   if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi
466 <   if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
463 >   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi
464 >   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
465 >   if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
466 >   if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h "&nbsp;&nbsp;(counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
467  
468     # Print excepted link totals
469     if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "$LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"; fi
# Line 481 | Line 481 | function wrapupAndExit()
481     if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi
482  
483     # Print checked link totals
484 <   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi
484 >   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi
485     if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi
486     if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi
487     if [ $EI_LINKS -gt 0 ]; then valPrint ctrh "- $EI_LINKS $(pluralCheckNoun link $EI_LINKS) that could be intrawiki"; fi
# Line 640 | Line 640 | for LINE in `cat "$LINKS_FILE"`; do
640     done
641     if [ "$NS_NAME" == "" ]; then
642        if [ $NS_ID == "NULL" ]; then
643 <         valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
643 >         valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki."
644        else
645 <         valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
645 >         valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
646        fi
647        let SKIP_UNK_NS+=1
648        continue
# Line 657 | Line 657 | for LINE in `cat "$LINKS_FILE"`; do
657     # JavaScript code, so it will return erroneous links
658     PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
659     if [ $PAGE_NAME_SUFFIX == "js" ]; then
660 <      valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
660 >      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
661        let SKIP_JS_PAGE+=1
662        continue
663     fi
# Line 678 | Line 678 | for LINE in `cat "$LINKS_FILE"`; do
678  
679     # Scan for illegal characters
680     if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
681 <      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
681 >      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
682        let SKIP_BAD_URL+=1
683        continue
684     fi
# Line 695 | Line 695 | for LINE in `cat "$LINKS_FILE"`; do
695  
696     # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it
697     if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
698 <      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
698 >      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
699        let SKIP_NON_ASCII+=1
700        continue
701     fi
# Line 758 | Line 758 | for LINE in `cat "$LINKS_FILE"`; do
758     # If this suffix escaped identification as either a file, page or TLD, inform the user
759     STR_TYPE=""
760     if [ $IS_FILE -eq -1 ]; then
761 <      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
761 >      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
762        let SKIP_UNK_SUFFIX+=1
763        continue
764     elif [ $IS_FILE -eq 1 ]; then
# Line 771 | Line 771 | for LINE in `cat "$LINKS_FILE"`; do
771  
772     # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
773     # issue with sites that require HTTPS
774 <   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL)
774 >   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL)
775     CURL_ERR=$(echo $?)
776     CURL_RESULT=$CURL_CODE
777  
# Line 821 | Line 821 | for LINE in `cat "$LINKS_FILE"`; do
821        for CODE in "${RD_CODES[@]}"; do
822           if [[ $CODE == $CURL_CODE ]]; then
823              # Get URL header again in order to retrieve the URL we are being redirected to
824 <            NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL)
824 >            NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL)
825  
826              # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter
827              # those changes out if the user didn't ask for them
# Line 841 | Line 841 | for LINE in `cat "$LINKS_FILE"`; do
841              # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user
842              # wants those to be reported)
843              if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
844 <               valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL."
844 >               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
845                 STATUS="OK"
846                 let OK_LINKS+=1
847                 let SKIP_HTTPS_UP+=1
848              # If the URLs match besides an added ending slash, then the link is OK (unless user wants
849              # those to be reported)
850              elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
851 <               valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL."
851 >               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
852                 STATUS="OK"
853                 let OK_LINKS+=1
854                 let SKIP_SLASH_ADD+=1
# Line 874 | Line 874 | for LINE in `cat "$LINKS_FILE"`; do
874  
875     # If we didn't match a known status code, advise the reader
876     if [ $STATUS == "??" ]; then
877 <      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
877 >      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE."
878        let SKIP_UNK_CODE+=1
879        continue
880     fi
# Line 895 | Line 895 | for LINE in `cat "$LINKS_FILE"`; do
895        if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then
896           EXCEPT_CODE=${GREP_RESULT%%,*}
897           if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
898 <            valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
898 >            valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file."
899              if [ $STATUS == "EI" ]; then
900                 let SKIP_EXPECT_EI+=1
901              elif [ $STATUS == "IW" ]; then
# Line 925 | Line 925 | for LINE in `cat "$LINKS_FILE"`; do
925        valPrint hn "<tr><td style=\"white-space:nowrap\">$STATUS ($CURL_RESULT)</td><td align=\"right\">$STR_TYPE</td><td><a href=\"$URL\" target=\"_blank\">$URL</a></td></tr>"
926        valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>"
927  
928 +      # Place vertical space here since we won't be printing anything more about this link
929 +      if [ $STATUS == "OK" ]; then valPrint trh ""; fi
930 +
931        # Record redirect URL if one was given by a 3xx response page
932        if [ $STATUS == "RD" ]; then
933           valPrint ts "  Server suggests $NEW_URL"
# Line 989 | Line 992 | for LINE in `cat "$LINKS_FILE"`; do
992              valPrint trhs "Screenshot of URL $URL seems to have failed!"
993           fi
994        else
995 <         valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
995 >         valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists."
996        fi
997     fi
998   done

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)