[ViewVC] Diff of: Oni2/Validate External Links/validate_external

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1175 by iritscen, Tue Aug 23 14:15:48 2022 UTC vs.
Revision 1177 by iritscen, Fri Jan 13 22:26:56 2023 UTC

+#!/bin/bash
-<
+# Validate External Links by Iritscen
->
+# Validate External Links by Iritscen (iritscen@yahoo.com)
+# Validates a list of external links in CSV format. The resulting logs are produced in three formats:
+# - TXT (for easy diffing with an earlier log)
+# Settings -- these will be changed from their defaults by the arguments passed in to the script
+LINKS_URL=""           # download external link CSV from this location (can use "file://" protocol)
+EXCEPT_URL=""          # location of wiki page with a list of exceptions for NG results
-<
+OUTPUT_DIR=""          # place reports and all other output in a folder inside this existing folder
->
+OUTPUT_DIR=""           # place reports and all other output in a folder inside this existing folder
+RECORD_OK_LINKS=0      # record response code to the log even when it's a value in OK_CODES
+SHOW_SLASH=0           # record issue when a slash is added to the end of a URL
+SHOW_HTTPS=0           # record issue when "http" is upgraded to "https"
+OK_LINKS=0
+RD_LINKS=0
+NG_LINKS=0
-+
+SKIP_PARSE_FAIL=0
-+
+SKIP_UNK_PROT=0
+SKIP_UNK_NS=0
+SKIP_JS_PAGE=0
+SKIP_BAD_URL=0
+   # Do some math on results of session
+   LINKS_PROCESSED=$((LINK_NUM-URL_START+1))
+   TRIVIAL_RDS=$((SKIP_SLASH_ADD+SKIP_HTTPS_UP+SKIP_YOUTU_BE))
-<
+   LINK_ERRORS=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE))
->
+   LINK_ERRORS=$((SKIP_PARSE_FAIL+SKIP_UNK_PROT+SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE))
+   LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_RD+SKIP_EXPECT_EI+SKIP_EXPECT_IW))
+   LINK_PROBLEMS_TOTAL=$((NG_LINKS+RD_LINKS+EI_LINKS+IW_LINKS))
+   LINK_PROBLEMS_NG=$((NG_LINKS-SKIP_EXPECT_NG))
+      valPrint h "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS) (see <a href=\"$LOG_NAME_RTF\" target=\"_blank\">RTF</a> or <a href=\"$LOG_NAME_TXT\" target=\"_blank\">TXT</a> report for specific links):"
+      valPrint rt "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS):"
+   fi
-+
+   if [ $SKIP_PARSE_FAIL -gt 0 ]; then valPrint ctrh "- $SKIP_PARSE_FAIL line-parsing $(pluralCheckNoun failure $SKIP_PARSE_FAIL)"; fi
-+
+   if [ $SKIP_UNK_PROT -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_PROT unknown $(pluralCheckNoun protocol $SKIP_UNK_PROT)"; fi
+   if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS missing/unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi
+   if [ $SKIP_JS_PAGE -gt 0 ]; then valPrint ctrh "- $SKIP_JS_PAGE $(pluralCheckNoun link $SKIP_JS_PAGE) on $(pluralCheckA $SKIP_JS_PAGE)JavaScript $(pluralCheckNoun page $SKIP_JS_PAGE)"; fi
+   if [ $SKIP_BAD_URL -gt 0 ]; then valPrint ctrh "- $SKIP_BAD_URL illegal $(pluralCheckNoun URL $SKIP_BAD_URL)"; fi
+      FINISHED_LIST="limit"
+      wrapupAndExit
+   fi
-+
-+
+   # Parse line into namespace ID number, containing wiki page, and external link URL
-+
+   NS_ID=${LINE%%,*}
-+
+   PAGE_NAME=${LINE#$NS_ID,}
-+
+   PAGE_NAME=${PAGE_NAME%%,*} # a comma in the page name will break this
-+
+   URL=${LINE#$NS_ID,$PAGE_NAME,} # commas can be in this
-+
+   if [ -z "$NS_ID" ] || [ -z "$PAGE_NAME" ] || [ -z "$URL" ]; then
-+
+      valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace, wiki page or link URL could not be read."
-+
+      let SKIP_PARSE_FAIL+=1
-+
+      continue
-+
+   fi
-+
-+
+   # Skip any link that isn't "http://" or "https://"
-+
+   if [[ ! $URL =~ ^http* ]]; then
-+
+      valPrint trs "Skipping line $LINK_NUM ('$LINE') because the protocol isn't 'http://' or 'https://'."
-+
+      let SKIP_UNK_PROT+=1
-+
+      continue
-+
+   fi
+   # Print progress to screen
+   if [ $LINK_NUM -gt 1 ]; then
+   fi
+   valPrint cn "Evaluating URL $LINK_NUM/$LINK_COUNT..."
-–
+   # The number of the namespace is the element before the first comma on the line
-–
+   NS_ID=${LINE%%,*}
-–
+   # Find namespace number in NS_IDS and use it to look up namespace's name in NS_NAMES
+   NS_NAME=""
+   a=0
+      continue
+   fi
-–
+   # The name of the page is everything between the namespace ID and the next comma on the line (commas
-–
+   # in page names will break this)
-–
+   PAGE_NAME=${LINE#$NS_ID,}
-–
+   PAGE_NAME=${PAGE_NAME%%,*}
-–
+   # Build longer wiki page URLs from namespace and page names
+   FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME
+   LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME
+      continue
+   fi
-–
+   # The URL being linked to is everything after the previous two fields (this allows commas to be in
-–
+   # the URLs, but a comma in the previous field, the page name, will break this)
-–
+   URL=${LINE#$NS_ID,$PAGE_NAME,}
-–
+   # Scan for illegal characters
+   if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL."

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Validate External Links/validate_external_links.sh (file contents): Revision 1175 by iritscen, Tue Aug 23 14:15:48 2022 UTC vs. Revision 1177 by iritscen, Fri Jan 13 22:26:56 2023 UTC

Diff Legend

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1175 by iritscen, Tue Aug 23 14:15:48 2022 UTC vs.
Revision 1177 by iritscen, Fri Jan 13 22:26:56 2023 UTC