ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1118 by iritscen, Tue Mar 17 16:07:35 2020 UTC vs.
Revision 1119 by iritscen, Wed Mar 18 00:24:42 2020 UTC

# Line 295 | Line 295 | function printHTMfooter()
295  
296   # The central logging function. The first parameter is a string composed of one or more characters that
297   # indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and
298 < # 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 'w' means "Don't
299 < # pass console output through 'fmt'" ("fmt" fits the output to an 80-column CLI but can break special
300 < # formatting and the 'n' option).
298 > # 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output
299 > # to an 80-column CLI but can break special formatting and the 'n' option).
300   function valPrint()
301   {
302     if [[ "$1" == *c* ]]; then
# Line 305 | Line 304 | function valPrint()
304           echo -n "$2"
305        elif [[ "$1" == *w* ]]; then
306           echo "$2"
307 +      elif [[ "$1" == *s* ]]; then
308 +         echo -e "$2\n"
309        else
310           echo "$2" | fmt -w 80
311        fi
# Line 312 | Line 313 | function valPrint()
313     if [[ "$1" == *t* ]]; then
314        if [[ "$1" == *n* ]]; then
315           echo -n "$2" >> "$LOG_TXT"
316 +      elif [[ "$1" == *s* ]]; then
317 +         echo -e "$2\n" >> "$LOG_TXT"
318        else
319           echo "$2" >> "$LOG_TXT"
320        fi
# Line 319 | Line 322 | function valPrint()
322     if [[ "$1" == *r* ]]; then
323        if [[ "$1" == *n* ]]; then
324           echo "$2" >> "$LOG_RTF"
325 +      elif [[ "$1" == *s* ]]; then
326 +         echo "$2\line\line" >> "$LOG_RTF"
327        else
328 <         echo "$2\\" >> "$LOG_RTF"
328 >         echo "$2\line" >> "$LOG_RTF"
329        fi
330     fi
331     if [[ "$1" == *h* ]]; then
332 <      if [[ "$1" == *n* ]]; then
332 >      if [[ "$1" == *s* ]]; then
333 >         echo "$2<tr><td>&nbsp;</td></tr>" >> "$LOG_HTM"
334 >      elif [[ "$1" == *n* ]]; then
335           echo "$2" >> "$LOG_HTM"
336        else
337           echo "$2<br />" >> "$LOG_HTM"
# Line 598 | Line 605 | for LINE in `cat "$LINKS_FILE"`; do
605     done
606     if [ "$NS_NAME" == "" ]; then
607        if [ $NS_ID == "NULL" ]; then
608 <         valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
608 >         valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
609        else
610 <         valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
610 >         valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
611        fi
612        let SKIP_UNK_NS+=1
613        continue
# Line 615 | Line 622 | for LINE in `cat "$LINKS_FILE"`; do
622     # JavaScript code, so it will return erroneous links
623     PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
624     if [ $PAGE_NAME_SUFFIX == "js" ]; then
625 <      valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
625 >      valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
626        let SKIP_JS_PAGE+=1
627        continue
628     fi
# Line 636 | Line 643 | for LINE in `cat "$LINKS_FILE"`; do
643  
644     # Scan for illegal characters
645     if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
646 <      valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
646 >      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
647        let SKIP_BAD_URL+=1
648        continue
649     fi
# Line 653 | Line 660 | for LINE in `cat "$LINKS_FILE"`; do
660  
661     # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it
662     if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
663 <      valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
663 >      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
664        let SKIP_NON_ASCII+=1
665        continue
666     fi
# Line 716 | Line 723 | for LINE in `cat "$LINKS_FILE"`; do
723     # If this suffix escaped identification as either a file, page or TLD, inform the user
724     STR_TYPE=""
725     if [ $IS_FILE -eq -1 ]; then
726 <      valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
726 >      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
727        let SKIP_UNK_SUFFIX+=1
728        continue
729     elif [ $IS_FILE -eq 1 ]; then
# Line 821 | Line 828 | for LINE in `cat "$LINKS_FILE"`; do
828  
829     # If we didn't match a known status code, advise the reader
830     if [ $STATUS == "??" ]; then
831 <      valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
831 >      valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
832        let SKIP_UNK_CODE+=1
833        continue
834     fi
# Line 842 | Line 849 | for LINE in `cat "$LINKS_FILE"`; do
849        if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then
850           EXCEPT_CODE=${GREP_RESULT%%,*}
851           if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
852 <            valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
852 >            valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
853              if [ $STATUS == "EI" ]; then
854                 let SKIP_EXPECT_EI+=1
855              elif [ $STATUS == "IW" ]; then
# Line 874 | Line 881 | for LINE in `cat "$LINKS_FILE"`; do
881  
882        # Record redirect URL if one was given by a 3xx response page
883        if [ $STATUS == "RD" ]; then
884 <         valPrint t "  Server suggests $NEW_URL"
885 <         valPrint r "   Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
886 <         valPrint hn "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>"
884 >         valPrint ts "  Server suggests $NEW_URL"
885 >         valPrint rs "  Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
886 >         valPrint hs "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>"
887        fi
888  
889        # Notify reader if we can use an intrawiki link for this URL
890        if [ $STATUS == "EI" ]; then
891           INTRA_PAGE=${URL#*://*/}
892 <         valPrint t "  Just use [[$INTRA_PAGE]]"
893 <         valPrint r "           Just use [[$INTRA_PAGE]]"
894 <         valPrint hn "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>"
892 >         valPrint ts "  Just use [[$INTRA_PAGE]]"
893 >         valPrint rs "          Just use [[$INTRA_PAGE]]"
894 >         valPrint hs "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>"
895        fi
896  
897        # Notify reader if we can use an interwiki prefix for this URL
898        if [ $STATUS == "IW" ]; then
899           INTER_PAGE=$(echo "$URL" | sed 's/.*\///')
900 <         valPrint t "  You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
901 <         valPrint r "           You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
902 <         valPrint hn "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>"
900 >         valPrint ts "  You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
901 >         valPrint rs "          You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
902 >         valPrint hs "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>"
903        fi
904  
905        # Query Internet Archive for latest "OK" snapshot for "NG" page
# Line 906 | Line 913 | for LINE in `cat "$LINKS_FILE"`; do
913  
914              # ...isolate "url" property in the response that follows the "closest" tag
915              SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":'
916 <            SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "'
916 >            SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "'
917              SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"'
918  
919              # Inform the user of the snapshot URL
920 <            valPrint t "  IA suggests $SNAPSHOT_URL"
921 <            valPrint r "                IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
922 <            valPrint hn "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>"
920 >            valPrint ts "  IA suggests $SNAPSHOT_URL"
921 >            valPrint rs "               IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
922 >            valPrint hs "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>"
923           else # ...otherwise give generic Wayback Machine link for this URL
924 <            valPrint t " Try browsing $ARCHIVE_GENERIC/$URL"
925 <            valPrint r "                Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}"
926 <            valPrint hn "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>"
924 >            valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL"
925 >            valPrint rs "               Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}"
926 >            valPrint hs "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>"
927           fi
928        fi
929     fi
# Line 933 | Line 940 | for LINE in `cat "$LINKS_FILE"`; do
940           if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then
941              mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE"
942           else
943 <            valPrint trh "Screenshot of URL $URL seems to have failed!"
943 >            valPrint trhs "Screenshot of URL $URL seems to have failed!"
944           fi
945        else
946 <         valPrint trh "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
946 >         valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
947        fi
948     fi
949   done

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)