--- Validate External Links/validate_external_links.sh 2020/03/17 16:07:35 1118 +++ Validate External Links/validate_external_links.sh 2020/03/18 00:24:42 1119 @@ -295,9 +295,8 @@ function printHTMfooter() # The central logging function. The first parameter is a string composed of one or more characters that # indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and -# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 'w' means "Don't -# pass console output through 'fmt'" ("fmt" fits the output to an 80-column CLI but can break special -# formatting and the 'n' option). +# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output +# to an 80-column CLI but can break special formatting and the 'n' option). function valPrint() { if [[ "$1" == *c* ]]; then @@ -305,6 +304,8 @@ function valPrint() echo -n "$2" elif [[ "$1" == *w* ]]; then echo "$2" + elif [[ "$1" == *s* ]]; then + echo -e "$2\n" else echo "$2" | fmt -w 80 fi @@ -312,6 +313,8 @@ function valPrint() if [[ "$1" == *t* ]]; then if [[ "$1" == *n* ]]; then echo -n "$2" >> "$LOG_TXT" + elif [[ "$1" == *s* ]]; then + echo -e "$2\n" >> "$LOG_TXT" else echo "$2" >> "$LOG_TXT" fi @@ -319,12 +322,16 @@ function valPrint() if [[ "$1" == *r* ]]; then if [[ "$1" == *n* ]]; then echo "$2" >> "$LOG_RTF" + elif [[ "$1" == *s* ]]; then + echo "$2\line\line" >> "$LOG_RTF" else - echo "$2\\" >> "$LOG_RTF" + echo "$2\line" >> "$LOG_RTF" fi fi if [[ "$1" == *h* ]]; then - if [[ "$1" == *n* ]]; then + if [[ "$1" == *s* ]]; then + echo "$2 " >> "$LOG_HTM" + elif [[ "$1" == *n* ]]; then echo "$2" >> "$LOG_HTM" else echo "$2
" >> "$LOG_HTM" @@ -598,9 +605,9 @@ for LINE in `cat "$LINKS_FILE"`; do done if [ "$NS_NAME" == "" ]; then if [ $NS_ID == "NULL" ]; then - valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." + valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki." else - valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." + valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID." fi let SKIP_UNK_NS+=1 continue @@ -615,7 +622,7 @@ for LINE in `cat "$LINKS_FILE"`; do # JavaScript code, so it will return erroneous links PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') if [ $PAGE_NAME_SUFFIX == "js" ]; then - valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." + valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME." let SKIP_JS_PAGE+=1 continue fi @@ -636,7 +643,7 @@ for LINE in `cat "$LINKS_FILE"`; do # Scan for illegal characters if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then - valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." + valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL." let SKIP_BAD_URL+=1 continue fi @@ -653,7 +660,7 @@ for LINE in `cat "$LINKS_FILE"`; do # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then - valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." + valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters." let SKIP_NON_ASCII+=1 continue fi @@ -716,7 +723,7 @@ for LINE in `cat "$LINKS_FILE"`; do # If this suffix escaped identification as either a file, page or TLD, inform the user STR_TYPE="" if [ $IS_FILE -eq -1 ]; then - valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." + valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." let SKIP_UNK_SUFFIX+=1 continue elif [ $IS_FILE -eq 1 ]; then @@ -821,7 +828,7 @@ for LINE in `cat "$LINKS_FILE"`; do # If we didn't match a known status code, advise the reader if [ $STATUS == "??" ]; then - valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." + valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE." let SKIP_UNK_CODE+=1 continue fi @@ -842,7 +849,7 @@ for LINE in `cat "$LINKS_FILE"`; do if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then EXCEPT_CODE=${GREP_RESULT%%,*} if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then - valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." + valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file." if [ $STATUS == "EI" ]; then let SKIP_EXPECT_EI+=1 elif [ $STATUS == "IW" ]; then @@ -874,25 +881,25 @@ for LINE in `cat "$LINKS_FILE"`; do # Record redirect URL if one was given by a 3xx response page if [ $STATUS == "RD" ]; then - valPrint t " Server suggests $NEW_URL" - valPrint r " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" - valPrint hn "Server suggests$NEW_URL" + valPrint ts " Server suggests $NEW_URL" + valPrint rs " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" + valPrint hs "Server suggests$NEW_URL" fi # Notify reader if we can use an intrawiki link for this URL if [ $STATUS == "EI" ]; then INTRA_PAGE=${URL#*://*/} - valPrint t " Just use [[$INTRA_PAGE]]" - valPrint r " Just use [[$INTRA_PAGE]]" - valPrint hn "Just use[[$INTRA_PAGE]]" + valPrint ts " Just use [[$INTRA_PAGE]]" + valPrint rs " Just use [[$INTRA_PAGE]]" + valPrint hs "Just use[[$INTRA_PAGE]]" fi # Notify reader if we can use an interwiki prefix for this URL if [ $STATUS == "IW" ]; then INTER_PAGE=$(echo "$URL" | sed 's/.*\///') - valPrint t " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" - valPrint r " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" - valPrint hn "You can use[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" + valPrint ts " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" + valPrint rs " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" + valPrint hs "You can use[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]" fi # Query Internet Archive for latest "OK" snapshot for "NG" page @@ -906,17 +913,17 @@ for LINE in `cat "$LINKS_FILE"`; do # ...isolate "url" property in the response that follows the "closest" tag SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":' - SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "' + SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "' SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"' # Inform the user of the snapshot URL - valPrint t " IA suggests $SNAPSHOT_URL" - valPrint r " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" - valPrint hn "IA suggests$SNAPSHOT_URL" + valPrint ts " IA suggests $SNAPSHOT_URL" + valPrint rs " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}" + valPrint hs "IA suggests$SNAPSHOT_URL" else # ...otherwise give generic Wayback Machine link for this URL - valPrint t " Try browsing $ARCHIVE_GENERIC/$URL" - valPrint r " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" - valPrint hn "Try browsing$ARCHIVE_GENERIC/$URL" + valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL" + valPrint rs " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}" + valPrint hs "Try browsing$ARCHIVE_GENERIC/$URL" fi fi fi @@ -933,10 +940,10 @@ for LINE in `cat "$LINKS_FILE"`; do if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE" else - valPrint trh "Screenshot of URL $URL seems to have failed!" + valPrint trhs "Screenshot of URL $URL seems to have failed!" fi else - valPrint trh "Skipping screenshot of URL $URL because $SHOT_FILE already exists." + valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists." fi fi done