| 295 |
|
|
| 296 |
|
# The central logging function. The first parameter is a string composed of one or more characters that |
| 297 |
|
# indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and |
| 298 |
< |
# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 'w' means "Don't |
| 299 |
< |
# pass console output through 'fmt'" ("fmt" fits the output to an 80-column CLI but can break special |
| 300 |
< |
# formatting and the 'n' option). |
| 298 |
> |
# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output |
| 299 |
> |
# to an 80-column CLI but can break special formatting and the 'n' option). |
| 300 |
|
function valPrint() |
| 301 |
|
{ |
| 302 |
|
if [[ "$1" == *c* ]]; then |
| 304 |
|
echo -n "$2" |
| 305 |
|
elif [[ "$1" == *w* ]]; then |
| 306 |
|
echo "$2" |
| 307 |
+ |
elif [[ "$1" == *s* ]]; then |
| 308 |
+ |
echo -e "$2\n" |
| 309 |
|
else |
| 310 |
|
echo "$2" | fmt -w 80 |
| 311 |
|
fi |
| 313 |
|
if [[ "$1" == *t* ]]; then |
| 314 |
|
if [[ "$1" == *n* ]]; then |
| 315 |
|
echo -n "$2" >> "$LOG_TXT" |
| 316 |
+ |
elif [[ "$1" == *s* ]]; then |
| 317 |
+ |
echo -e "$2\n" >> "$LOG_TXT" |
| 318 |
|
else |
| 319 |
|
echo "$2" >> "$LOG_TXT" |
| 320 |
|
fi |
| 322 |
|
if [[ "$1" == *r* ]]; then |
| 323 |
|
if [[ "$1" == *n* ]]; then |
| 324 |
|
echo "$2" >> "$LOG_RTF" |
| 325 |
+ |
elif [[ "$1" == *s* ]]; then |
| 326 |
+ |
echo "$2\line\line" >> "$LOG_RTF" |
| 327 |
|
else |
| 328 |
< |
echo "$2\\" >> "$LOG_RTF" |
| 328 |
> |
echo "$2\line" >> "$LOG_RTF" |
| 329 |
|
fi |
| 330 |
|
fi |
| 331 |
|
if [[ "$1" == *h* ]]; then |
| 332 |
< |
if [[ "$1" == *n* ]]; then |
| 332 |
> |
if [[ "$1" == *s* ]]; then |
| 333 |
> |
echo "$2<tr><td> </td></tr>" >> "$LOG_HTM" |
| 334 |
> |
elif [[ "$1" == *n* ]]; then |
| 335 |
|
echo "$2" >> "$LOG_HTM" |
| 336 |
|
else |
| 337 |
|
echo "$2<br />" >> "$LOG_HTM" |
| 605 |
|
done |
| 606 |
|
if [ "$NS_NAME" == "" ]; then |
| 607 |
|
if [ $NS_ID == "NULL" ]; then |
| 608 |
< |
valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." |
| 608 |
> |
valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." |
| 609 |
|
else |
| 610 |
< |
valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." |
| 610 |
> |
valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." |
| 611 |
|
fi |
| 612 |
|
let SKIP_UNK_NS+=1 |
| 613 |
|
continue |
| 622 |
|
# JavaScript code, so it will return erroneous links |
| 623 |
|
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') |
| 624 |
|
if [ $PAGE_NAME_SUFFIX == "js" ]; then |
| 625 |
< |
valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." |
| 625 |
> |
valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." |
| 626 |
|
let SKIP_JS_PAGE+=1 |
| 627 |
|
continue |
| 628 |
|
fi |
| 643 |
|
|
| 644 |
|
# Scan for illegal characters |
| 645 |
|
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then |
| 646 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." |
| 646 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." |
| 647 |
|
let SKIP_BAD_URL+=1 |
| 648 |
|
continue |
| 649 |
|
fi |
| 660 |
|
|
| 661 |
|
# 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it |
| 662 |
|
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then |
| 663 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." |
| 663 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." |
| 664 |
|
let SKIP_NON_ASCII+=1 |
| 665 |
|
continue |
| 666 |
|
fi |
| 723 |
|
# If this suffix escaped identification as either a file, page or TLD, inform the user |
| 724 |
|
STR_TYPE="" |
| 725 |
|
if [ $IS_FILE -eq -1 ]; then |
| 726 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 726 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." |
| 727 |
|
let SKIP_UNK_SUFFIX+=1 |
| 728 |
|
continue |
| 729 |
|
elif [ $IS_FILE -eq 1 ]; then |
| 828 |
|
|
| 829 |
|
# If we didn't match a known status code, advise the reader |
| 830 |
|
if [ $STATUS == "??" ]; then |
| 831 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." |
| 831 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." |
| 832 |
|
let SKIP_UNK_CODE+=1 |
| 833 |
|
continue |
| 834 |
|
fi |
| 849 |
|
if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then |
| 850 |
|
EXCEPT_CODE=${GREP_RESULT%%,*} |
| 851 |
|
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then |
| 852 |
< |
valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
| 852 |
> |
valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." |
| 853 |
|
if [ $STATUS == "EI" ]; then |
| 854 |
|
let SKIP_EXPECT_EI+=1 |
| 855 |
|
elif [ $STATUS == "IW" ]; then |
| 881 |
|
|
| 882 |
|
# Record redirect URL if one was given by a 3xx response page |
| 883 |
|
if [ $STATUS == "RD" ]; then |
| 884 |
< |
valPrint t " Server suggests $NEW_URL" |
| 885 |
< |
valPrint r " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" |
| 886 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>" |
| 884 |
> |
valPrint ts " Server suggests $NEW_URL" |
| 885 |
> |
valPrint rs " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" |
| 886 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>" |
| 887 |
|
fi |
| 888 |
|
|
| 889 |
|
# Notify reader if we can use an intrawiki link for this URL |
| 890 |
|
if [ $STATUS == "EI" ]; then |
| 891 |
|
INTRA_PAGE=${URL#*://*/} |
| 892 |
< |
valPrint t " Just use [[$INTRA_PAGE]]" |
| 893 |
< |
valPrint r " Just use [[$INTRA_PAGE]]" |
| 894 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>" |
| 892 |
> |
valPrint ts " Just use [[$INTRA_PAGE]]" |
| 893 |
> |
valPrint rs " Just use [[$INTRA_PAGE]]" |
| 894 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>" |
| 895 |
|
fi |
| 896 |
|
|
| 897 |
|
# Notify reader if we can use an interwiki prefix for this URL |
| 898 |
|
if [ $STATUS == "IW" ]; then |
| 899 |
|
INTER_PAGE=$(echo "$URL" | sed 's/.*\///') |
| 900 |
< |
valPrint t " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
| 901 |
< |
valPrint r " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
| 902 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>" |
| 900 |
> |
valPrint ts " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
| 901 |
> |
valPrint rs " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" |
| 902 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>" |
| 903 |
|
fi |
| 904 |
|
|
| 905 |
|
# Query Internet Archive for latest "OK" snapshot for "NG" page |
| 913 |
|
|
| 914 |
|
# ...isolate "url" property in the response that follows the "closest" tag |
| 915 |
|
SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":' |
| 916 |
< |
SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "' |
| 916 |
> |
SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "' |
| 917 |
|
SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"' |
| 918 |
|
|
| 919 |
|
# Inform the user of the snapshot URL |
| 920 |
< |
valPrint t " IA suggests $SNAPSHOT_URL" |
| 921 |
< |
valPrint r " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" |
| 922 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>" |
| 920 |
> |
valPrint ts " IA suggests $SNAPSHOT_URL" |
| 921 |
> |
valPrint rs " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" |
| 922 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>" |
| 923 |
|
else # ...otherwise give generic Wayback Machine link for this URL |
| 924 |
< |
valPrint t " Try browsing $ARCHIVE_GENERIC/$URL" |
| 925 |
< |
valPrint r " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" |
| 926 |
< |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>" |
| 924 |
> |
valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL" |
| 925 |
> |
valPrint rs " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" |
| 926 |
> |
valPrint hs "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>" |
| 927 |
|
fi |
| 928 |
|
fi |
| 929 |
|
fi |
| 940 |
|
if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then |
| 941 |
|
mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE" |
| 942 |
|
else |
| 943 |
< |
valPrint trh "Screenshot of URL $URL seems to have failed!" |
| 943 |
> |
valPrint trhs "Screenshot of URL $URL seems to have failed!" |
| 944 |
|
fi |
| 945 |
|
else |
| 946 |
< |
valPrint trh "Skipping screenshot of URL $URL because $SHOT_FILE already exists." |
| 946 |
> |
valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists." |
| 947 |
|
fi |
| 948 |
|
fi |
| 949 |
|
done |