[ViewVC] Diff of: Oni2/Validate External Links/validate_external

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1075 by iritscen, Fri Oct 6 02:02:16 2017 UTC vs.
Revision 1118 by iritscen, Tue Mar 17 16:07:35 2020 UTC

+# (for reading as a local file with clickable links), and HTML (for uploading as a web page).
+# Call script with "--help" argument for documentation. Also see Read Me First.rtf for critical notes.
+# Recommended rule:
-<
+# ------------------------------------------------------------------------------------------------------
->
+# |----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----|
+# Set separator token to newline
+IFS="
+UPLOAD_INFO=""      # path to a file on your hard drive with the login info needed to upload a report
+# Fixed strings -- see the occurrences of these variables to learn their purpose
-<
+AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0"
->
+AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36 OPR/67.0.3575.53"
+ARCHIVE_API="http://archive.org/wayback/available"
+ARCHIVE_GENERIC="https://web.archive.org/web/*"
+ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206"
+PAGE_LINKS=0
+SKIPPED_HEADER_ROW=0
+FINISHED_LIST="no"
-+
+START_RUN=0
-+
+END_RUN=0
+### HELP ###
+      fi
+   fi
-+
+   # Generate string with elapsed time
-+
+   END_RUN=$(date +%s)
-+
+   ELAPSED=$(echo $(($END_RUN - $START_RUN)) | awk '{printf "%d min. %d sec. elapsed", int($1/60), int($1%60)}')
-+
+   # Output results of session and close the log file's markup
+   LINKS_PROCESSED=$((LINK_NUM-URL_START+1))
+   LINKS_SKIPPED=$((SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE))
+   LINKS_CHECKED=$((LINKS_PROCESSED-LINKS_SKIPPED))
-<
+   valPrint ct "Summary:"
-<
+   valPrint r "\b1 Summary \b0"
-<
+   valPrint hn "<h3><span id=\"summary\">Summary</span></h3>"
->
+   valPrint ct "Summary ($ELAPSED):"
->
+   valPrint r "\b1 Summary \b0 ($ELAPSED)"
->
+   valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>"
+   valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT)."
+   valPrint ctrh "I skipped $LINKS_SKIPPED $(pluralCheckNoun link $LINKS_SKIPPED), and found $FILE_LINKS $(pluralCheckNoun file $FILE_LINKS) and $PAGE_LINKS $(pluralCheckNoun page $PAGE_LINKS)."
+   if [ $LINKS_SKIPPED -gt 0 ]; then valPrint ctrh "Skip breakdown: "; fi
+### MAIN LOOP ###
-+
+START_RUN=$(date +%s)
+# Process each line of the .csv in LINKS_FILE
+for LINE in `cat "$LINKS_FILE"`; do
+   let LINK_NUM+=1
+   NS_NAME=""
+   a=0
+   while [ "x${NS_IDS[$a]}" != "x" ]; do # once this evaluates to "x", the array is done
-<
+      if [ $NS_ID -eq ${NS_IDS[$a]} ]; then
->
+      if [ $NS_ID == "NULL" ]; then
->
+         break
->
+      elif [ $NS_ID -eq ${NS_IDS[$a]} ]; then
+         NS_NAME="${NS_NAMES[$a]}"
+         break
+      fi
+      let a+=1
+   done
-<
+   if [ -z "$NS_NAME" ]; then
-<
+      valPrint tr "Skipping URL found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
->
+   if [ "$NS_NAME" == "" ]; then
->
+      if [ $NS_ID == "NULL" ]; then
->
+         valPrint tr "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
->
+      else
->
+         valPrint tr "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
->
+      fi
+      let SKIP_UNK_NS+=1
+      continue
+   fi
+   # JavaScript code, so it will return erroneous links
+   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
+   if [ $PAGE_NAME_SUFFIX == "js" ]; then
-<
+      valPrint tr "Skipping URL found on JavaScript page $PAGE_NAME."
->
+      valPrint tr "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
+      let SKIP_JS_PAGE+=1
+      continue
+   fi
+      if [ $STATUS == "NG" ] && [ $SUGGEST_SNAPSHOTS -eq 1 ]; then
+         ARCHIVE_QUERY=$(curl --silent --max-time 10 "$ARCHIVE_API?url=$URL&$ARCHIVE_OK_CODES")
-<
+         # Isolate "url" property in response and log it if a "closest" snapshot was received...
->
+         # If a "closest" snapshot was received...
+         if [[ "$ARCHIVE_QUERY" == *\"closest\":* ]]; then
-<
+            SNAPSHOT_URL=${ARCHIVE_QUERY##*\"url\": \"}
-<
+            SNAPSHOT_URL=${SNAPSHOT_URL%\", \"timestamp*}
->
+            # In case the URL has a shebang in it (like mega.nz links do), escape the '!' to break it
->
+            ARCHIVE_QUERY=$(echo "$ARCHIVE_QUERY" | sed 's/#!/#\\!/')
->
->
+            # ...isolate "url" property in the response that follows the "closest" tag
->
+            SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":'
->
+            SNAPSHOT_URL=${SNAPSHOT_URL##*\"url\": \"} # everything after '"url": "'
->
+            SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"'
->
->
+            # Inform the user of the snapshot URL
+            valPrint t "  IA suggests $SNAPSHOT_URL"
+            valPrint r "                IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
+            valPrint hn "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>"

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Validate External Links/validate_external_links.sh (file contents): Revision 1075 by iritscen, Fri Oct 6 02:02:16 2017 UTC vs. Revision 1118 by iritscen, Tue Mar 17 16:07:35 2020 UTC

Diff Legend

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1075 by iritscen, Fri Oct 6 02:02:16 2017 UTC vs.
Revision 1118 by iritscen, Tue Mar 17 16:07:35 2020 UTC