--- Validate External Links/validate_external_links.sh 2020/03/17 16:07:35 1118
+++ Validate External Links/validate_external_links.sh 2020/03/18 00:24:42 1119
@@ -295,9 +295,8 @@ function printHTMfooter()
# The central logging function. The first parameter is a string composed of one or more characters that
# indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and
-# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 'w' means "Don't
-# pass console output through 'fmt'" ("fmt" fits the output to an 80-column CLI but can break special
-# formatting and the 'n' option).
+# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output
+# to an 80-column CLI but can break special formatting and the 'n' option).
function valPrint()
{
if [[ "$1" == *c* ]]; then
@@ -305,6 +304,8 @@ function valPrint()
echo -n "$2"
elif [[ "$1" == *w* ]]; then
echo "$2"
+ elif [[ "$1" == *s* ]]; then
+ echo -e "$2\n"
else
echo "$2" | fmt -w 80
fi
@@ -312,6 +313,8 @@ function valPrint()
if [[ "$1" == *t* ]]; then
if [[ "$1" == *n* ]]; then
echo -n "$2" >> "$LOG_TXT"
+ elif [[ "$1" == *s* ]]; then
+ echo -e "$2\n" >> "$LOG_TXT"
else
echo "$2" >> "$LOG_TXT"
fi
@@ -319,12 +322,16 @@ function valPrint()
if [[ "$1" == *r* ]]; then
if [[ "$1" == *n* ]]; then
echo "$2" >> "$LOG_RTF"
+ elif [[ "$1" == *s* ]]; then
+ echo "$2\line\line" >> "$LOG_RTF"
else
- echo "$2\\" >> "$LOG_RTF"
+ echo "$2\line" >> "$LOG_RTF"
fi
fi
if [[ "$1" == *h* ]]; then
- if [[ "$1" == *n* ]]; then
+ if [[ "$1" == *s* ]]; then
+ echo "$2
| |
" >> "$LOG_HTM"
+ elif [[ "$1" == *n* ]]; then
echo "$2" >> "$LOG_HTM"
else
echo "$2
" >> "$LOG_HTM"
@@ -598,9 +605,9 @@ for LINE in `cat "$LINKS_FILE"`; do
done
if [ "$NS_NAME" == "" ]; then
if [ $NS_ID == "NULL" ]; then
- valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
+ valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
else
- valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
+ valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
fi
let SKIP_UNK_NS+=1
continue
@@ -615,7 +622,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# JavaScript code, so it will return erroneous links
PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
if [ $PAGE_NAME_SUFFIX == "js" ]; then
- valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
+ valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
let SKIP_JS_PAGE+=1
continue
fi
@@ -636,7 +643,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# Scan for illegal characters
if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
- valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
+ valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
let SKIP_BAD_URL+=1
continue
fi
@@ -653,7 +660,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it
if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
- valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
+ valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
let SKIP_NON_ASCII+=1
continue
fi
@@ -716,7 +723,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# If this suffix escaped identification as either a file, page or TLD, inform the user
STR_TYPE=""
if [ $IS_FILE -eq -1 ]; then
- valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
+ valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
let SKIP_UNK_SUFFIX+=1
continue
elif [ $IS_FILE -eq 1 ]; then
@@ -821,7 +828,7 @@ for LINE in `cat "$LINKS_FILE"`; do
# If we didn't match a known status code, advise the reader
if [ $STATUS == "??" ]; then
- valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
+ valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
let SKIP_UNK_CODE+=1
continue
fi
@@ -842,7 +849,7 @@ for LINE in `cat "$LINKS_FILE"`; do
if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == $LOCAL_PAGE_PATH ]; then
EXCEPT_CODE=${GREP_RESULT%%,*}
if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
- valPrint tr "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
+ valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
if [ $STATUS == "EI" ]; then
let SKIP_EXPECT_EI+=1
elif [ $STATUS == "IW" ]; then
@@ -874,25 +881,25 @@ for LINE in `cat "$LINKS_FILE"`; do
# Record redirect URL if one was given by a 3xx response page
if [ $STATUS == "RD" ]; then
- valPrint t " Server suggests $NEW_URL"
- valPrint r " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
- valPrint hn "| Server suggests | $NEW_URL |
"
+ valPrint ts " Server suggests $NEW_URL"
+ valPrint rs " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
+ valPrint hs "| Server suggests | $NEW_URL |
"
fi
# Notify reader if we can use an intrawiki link for this URL
if [ $STATUS == "EI" ]; then
INTRA_PAGE=${URL#*://*/}
- valPrint t " Just use [[$INTRA_PAGE]]"
- valPrint r " Just use [[$INTRA_PAGE]]"
- valPrint hn "| Just use | [[$INTRA_PAGE]] |
"
+ valPrint ts " Just use [[$INTRA_PAGE]]"
+ valPrint rs " Just use [[$INTRA_PAGE]]"
+ valPrint hs "| Just use | [[$INTRA_PAGE]] |
"
fi
# Notify reader if we can use an interwiki prefix for this URL
if [ $STATUS == "IW" ]; then
INTER_PAGE=$(echo "$URL" | sed 's/.*\///')
- valPrint t " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
- valPrint r " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
- valPrint hn "| You can use | [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]] |
"
+ valPrint ts " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
+ valPrint rs " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
+ valPrint hs "| You can use | [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]] |
"
fi
# Query Internet Archive for latest "OK" snapshot for "NG" page
@@ -906,17 +913,17 @@ for LINE in `cat "$LINKS_FILE"`; do
# ...isolate "url" property in the response that follows the "closest" tag
SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":'
- SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "'
+ SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "'
SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"'
# Inform the user of the snapshot URL
- valPrint t " IA suggests $SNAPSHOT_URL"
- valPrint r " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
- valPrint hn "| IA suggests | $SNAPSHOT_URL |
"
+ valPrint ts " IA suggests $SNAPSHOT_URL"
+ valPrint rs " IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
+ valPrint hs "| IA suggests | $SNAPSHOT_URL |
"
else # ...otherwise give generic Wayback Machine link for this URL
- valPrint t " Try browsing $ARCHIVE_GENERIC/$URL"
- valPrint r " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}"
- valPrint hn "| Try browsing | $ARCHIVE_GENERIC/$URL |
"
+ valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL"
+ valPrint rs " Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}"
+ valPrint hs "| Try browsing | $ARCHIVE_GENERIC/$URL |
"
fi
fi
fi
@@ -933,10 +940,10 @@ for LINE in `cat "$LINKS_FILE"`; do
if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then
mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE"
else
- valPrint trh "Screenshot of URL $URL seems to have failed!"
+ valPrint trhs "Screenshot of URL $URL seems to have failed!"
fi
else
- valPrint trh "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
+ valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
fi
fi
done