[ViewVC] Diff of: Oni2/Validate External Links/validate_external

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1157 by iritscen, Sun May 9 21:53:48 2021 UTC vs.
Revision 1158 by iritscen, Sun Jun 13 20:50:43 2021 UTC

+SHOW_YT_RD=0           # record redirection for a youtu.be URL expanding to the full URL
+SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page
+SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page
-<
+CHECK_ARCHIVE_LINKS=0  # check URLs under the archive.org domain
->
+CHECK_ARCHIVE_LINKS=0  # check URLs on archive.org and archive.is
+TAKE_PAGE_SHOT=0       # take a screenshot of each OK page
+TIMEOUT=10             # time to wait for a response when querying a site
+CHROME_PATH=""         # path to a copy of Google Chrome that has the command-line screenshot feature
+SKIP_HTTPS_UP=0
+SKIP_SLASH_ADD=0
+SKIP_YOUTU_BE=0
-<
+SKIP_ARCHIVE_ORG=0
->
+SKIP_ARCHIVES=0
+FILE_LINKS=0
+PAGE_LINKS=0
+SKIPPED_HEADER_ROW=0
+                               does nothing unless you also use the
+                               --record-ok-links argument.
+       --check-archive-links   Check links that are already pointing to a page
-<
+                               on the Internet Archive. In theory these links
-<
+                               should be totally stable and not need validation.
->
+                               on the Internet Archive or archive.is (AKA
->
+                               archive.today). In theory these links should be
->
+                               totally stable and not need validation.
+       --take-screenshots FILE Call the Google Chrome binary at this path to
+                               take screenshots of each "OK" page.
+       --timeout NUM           Wait this many seconds for a site to respond. The
+   # Print processed link totals
+   if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
+   if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
-<
+   if [ $SKIP_ARCHIVE_ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi
->
+   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
+   if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
+   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "&nbsp;&nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
+   if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
+valPrint ctrhn "Ignore youtu.be redirects: "
+if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
-<
+valPrint ctrhn "Check archive.org links: "
->
+valPrint ctrhn "Check archive.org and archive.is links: "
+if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
+valPrint tr "A summary of my findings will be found at the bottom of the report."
+      continue
+   fi
-<
+   # If we're skipping Archive.org links, see if this is one
-<
+   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."
-<
+      let SKIP_ARCHIVE_ORG+=1
->
+   # If we're skipping archive links, see if this is one
->
+   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links."
->
+      let SKIP_ARCHIVES+=1
+      let PAGE_LINKS+=1
+      continue
+   fi
+   # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
+   # issue with sites that require HTTPS
-<
+   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
->
+   CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
+   CURL_ERR=$(echo $?)
+   CURL_RESULT=$CURL_CODE

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Validate External Links/validate_external_links.sh (file contents): Revision 1157 by iritscen, Sun May 9 21:53:48 2021 UTC vs. Revision 1158 by iritscen, Sun Jun 13 20:50:43 2021 UTC

Diff Legend

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1157 by iritscen, Sun May 9 21:53:48 2021 UTC vs.
Revision 1158 by iritscen, Sun Jun 13 20:50:43 2021 UTC