295 |
|
|
296 |
|
# The central logging function. The first parameter is a string composed of one or more characters that |
297 |
|
# indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and |
298 |
< |
# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 'w' means "Don't |
299 |
< |
# pass console output through 'fmt'" ("fmt" fits the output to an 80-column CLI but can break special |
300 |
< |
# formatting and the 'n' option). |
298 |
> |
# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output |
299 |
> |
# to an 80-column CLI but can break special formatting and the 'n' option). |
300 |
|
function valPrint() |
301 |
|
{ |
302 |
|
if [[ "$1" == *c* ]]; then |
304 |
|
echo -n "$2" |
305 |
|
elif [[ "$1" == *w* ]]; then |
306 |
|
echo "$2" |
307 |
+ |
elif [[ "$1" == *s* ]]; then |
308 |
+ |
echo -e "$2\n" |
309 |
|
else |
310 |
|
echo "$2" | fmt -w 80 |
311 |
|
fi |
313 |
|
if [[ "$1" == *t* ]]; then |
314 |
|
if [[ "$1" == *n* ]]; then |
315 |
|
echo -n "$2" >> "$LOG_TXT" |
316 |
+ |
elif [[ "$1" == *s* ]]; then |
317 |
+ |
echo -e "$2\n" >> "$LOG_TXT" |
318 |
|
else |
319 |
|
echo "$2" >> "$LOG_TXT" |
320 |
|
fi |
322 |
|
if [[ "$1" == *r* ]]; then |
323 |
|
if [[ "$1" == *n* ]]; then |
324 |
|
echo "$2" >> "$LOG_RTF" |
325 |
+ |
elif [[ "$1" == *s* ]]; then |
326 |
+ |
echo "$2\line\line" >> "$LOG_RTF" |
327 |
|
else |
328 |
< |
echo "$2\\" >> "$LOG_RTF" |
328 |
> |
echo "$2\line" >> "$LOG_RTF" |
329 |
|
fi |
330 |
|
fi |
331 |
|
if [[ "$1" == *h* ]]; then |
332 |
< |
if [[ "$1" == *n* ]]; then |
332 |
> |
if [[ "$1" == *s* ]]; then |
333 |
> |
echo "$2<tr><td> </td></tr>" >> "$LOG_HTM" |
334 |
> |
elif [[ "$1" == *n* ]]; then |
335 |
|
echo "$2" >> "$LOG_HTM" |
336 |
|
else |
337 |
|
echo "$2<br />" >> "$LOG_HTM" |
605 |
|
done |
606 |
|
if [ "$NS_NAME" == "" ]; then |
607 |
|
if [ $NS_ID == "NULL" ]; then |
608 |
< |
valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." |
608 |
> |
valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." |
609 |
|
else |
610 |
< |
valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." |
610 |
> |
valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." |
611 |
|
fi |
612 |
|
let SKIP_UNK_NS+=1 |
613 |
|
continue |
622 |
|
# JavaScript code, so it will return erroneous links |
623 |
|
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') |
624 |
|
if [ $PAGE_NAME_SUFFIX == "js" ]; then |
625 |
< |
valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." |
625 |
> |
valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." |
626 |
|
let SKIP_JS_PAGE+=1 |
627 |
|
continue |
628 |
|
fi |
643 |
|
|
644 |
|
# Scan for illegal characters |
645 |
|
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then |
646 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." |
646 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." |
647 |
|
let SKIP_BAD_URL+=1 |
648 |
|
continue |
649 |
|
fi |
660 |
|
|
661 |
|
# 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it |
662 |
|
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then |
663 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." |
663 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." |
664 |
|
let SKIP_NON_ASCII+=1 |
665 |
|
continue |
666 |
|
fi |
723 |
|
# If this suffix escaped identification as either a file, page or TLD, inform the user |
724 |
|
STR_TYPE="" |
725 |
|
if [ $IS_FILE -eq -1 ]; then |
726 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
726 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
727 |
|
let SKIP_UNK_SUFFIX+=1 |
728 |
|
continue |
729 |
|
elif [ $IS_FILE -eq 1 ]; then |
828 |
|
|
829 |
|
# If we didn't match a known status code, advise the reader |
830 |
|
if [ $STATUS == "??" ]; then |
831 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." |
831 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." |
832 |
|
let SKIP_UNK_CODE+=1 |
833 |
|
continue |
834 |
|
fi |
849 |
|
if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then |
850 |
|
EXCEPT_CODE=${GREP_RESULT%%,*} |
851 |
|
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then |
852 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
852 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
853 |
|
if [ $STATUS == "EI" ]; then |
854 |
|
let SKIP_EXPECT_EI+=1 |
855 |
|
elif [ $STATUS == "IW" ]; then |
881 |
|
|
882 |
|
# Record redirect URL if one was given by a 3xx response page |
883 |
|
if [ $STATUS == "RD" ]; then |
884 |
< |
valPrint t " Server suggests $NEW_URL" |
885 |
< |
valPrint r " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" |
886 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>" |
884 |
> |
valPrint ts " Server suggests $NEW_URL" |
885 |
> |
valPrint rs " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" |
886 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>" |
887 |
|
fi |
888 |
|
|
889 |
|
# Notify reader if we can use an intrawiki link for this URL |
890 |
|
if [ $STATUS == "EI" ]; then |
891 |
|
INTRA_PAGE=${URL#*://*/} |
892 |
< |
valPrint t " Just use [[$INTRA_PAGE]]" |
893 |
< |
valPrint r " Just use [[$INTRA_PAGE]]" |
894 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>" |
892 |
> |
valPrint ts " Just use [[$INTRA_PAGE]]" |
893 |
> |
valPrint rs " Just use [[$INTRA_PAGE]]" |
894 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>" |
895 |
|
fi |
896 |
|
|
897 |
|
# Notify reader if we can use an interwiki prefix for this URL |
898 |
|
if [ $STATUS == "IW" ]; then |
899 |
|
INTER_PAGE=$(echo "$URL" | sed 's/.*\///') |
900 |
< |
valPrint t " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
901 |
< |
valPrint r " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
902 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>" |
900 |
> |
valPrint ts " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
901 |
> |
valPrint rs " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
902 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>" |
903 |
|
fi |
904 |
|
|
905 |
|
# Query Internet Archive for latest "OK" snapshot for "NG" page |
913 |
|
|
914 |
|
# ...isolate "url" property in the response that follows the "closest" tag |
915 |
|
SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":' |
916 |
< |
SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "' |
916 |
> |
SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "' |
917 |
|
SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"' |
918 |
|
|
919 |
|
# Inform the user of the snapshot URL |
920 |
< |
valPrint t " IA suggests $SNAPSHOT_URL" |
921 |
< |
valPrint r " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" |
922 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>" |
920 |
> |
valPrint ts " IA suggests $SNAPSHOT_URL" |
921 |
> |
valPrint rs " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" |
922 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>" |
923 |
|
else # ...otherwise give generic Wayback Machine link for this URL |
924 |
< |
valPrint t " Try browsing $ARCHIVE_GENERIC/$URL" |
925 |
< |
valPrint r " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" |
926 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>" |
924 |
> |
valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL" |
925 |
> |
valPrint rs " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" |
926 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>" |
927 |
|
fi |
928 |
|
fi |
929 |
|
fi |
940 |
|
if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then |
941 |
|
mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE" |
942 |
|
else |
943 |
< |
valPrint trh "Screenshot of URL $URL seems to have failed!" |
943 |
> |
valPrint trhs "Screenshot of URL $URL seems to have failed!" |
944 |
|
fi |
945 |
|
else |
946 |
< |
valPrint trh "Skipping screenshot of URL $URL because $SHOT_FILE already exists." |
946 |
> |
valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists." |
947 |
|
fi |
948 |
|
fi |
949 |
|
done |