[ViewVC] Diff of: Oni2/Validate External Links/validate_external

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1147 by iritscen, Tue Feb 2 20:10:39 2021 UTC vs.
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC

+UPLOAD_INFO=""         # path to a file on your hard drive with the login info needed to upload a report
+# Fixed strings -- see the occurrences of these variables to learn their purpose
-<
+AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36 OPR/70.0.3728.154"
->
+AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/88.0.4324.146 Safari/537.36"
+ARCHIVE_API="http://archive.org/wayback/available"
+ARCHIVE_GENERIC="https://web.archive.org/web/*"
+ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206"
+# if you add a new code.
+declare -a OK_CODES=(200 401 405 406 418 501)
+declare -a RD_CODES=(301 302 303 307 308)
-<
+declare -a NG_CODES=(000 400 403 404 410 500 502 503 530)
->
+declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530)
+# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
+# transcluded text, and if the transclusion fails, then the braces show up in the URL
+   if [ $SKIPPED_HEADER_ROW -eq 0 ]; then
+      if [ $LINE == "namespace,title,target" ]; then
+         SKIPPED_HEADER_ROW=1
-<
+         LINK_NUM=0 # this line is it's not a link, so reset the link counter
->
+         LINK_NUM=0 # this line is not a link, so reset the link counter
+         valPrint hn "<table>"
+         continue
+      else
+         valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
+      fi
+      let SKIP_UNK_NS+=1
-+
+      let PAGE_LINKS+=1
+      continue
+   fi
+   if [ $PAGE_NAME_SUFFIX == "js" ]; then
+      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
+      let SKIP_JS_PAGE+=1
-+
+      let PAGE_LINKS+=1
+      continue
+   fi
+   if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
+      let SKIP_BAD_URL+=1
-+
+      let PAGE_LINKS+=1
+      continue
+   fi
+   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links."
+      let SKIP_ARCHIVE_ORG+=1
-+
+      let PAGE_LINKS+=1
+      continue
+   fi
+   if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
+      let SKIP_NON_ASCII+=1
-+
+      let PAGE_LINKS+=1
+      continue
+   fi
+   elif [ $IS_FILE -eq 1 ]; then
+      STR_TYPE="file"
+      let FILE_LINKS+=1
-<
+   elif [ $IS_FILE -eq 0 ]; then
->
+   else
+      STR_TYPE="page"
+      let PAGE_LINKS+=1
+   fi
+         if [[ $CODE == $CURL_CODE ]]; then
+            STATUS="OK"
+            let OK_LINKS+=1
-+
-+
+            # If this is a YouTube link, we have to look at the actual page source to know if the video
-+
+            # is good or not
-+
+            if [[ $URL == *www.youtube.com* ]]; then
-+
+               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL | grep "\"simpleText\":\"Video unavailable\"")
-+
+               if [ ! -z "$PAGE_TEXT" ]; then
-+
+                  STATUS="NG"
-+
+                  let OK_LINKS-=1
-+
+                  let NG_LINKS+=1
-+
+               fi
-+
+            fi
+            break
+         fi
+      done
+               STATUS="OK"
+               let OK_LINKS+=1
+               let SKIP_SLASH_ADD+=1
-<
+            elif [ $SHOW_YT_RD -eq 0 ] && [ $YOUTU_BE -eq 1 ]; then
-<
+               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
-<
+               STATUS="OK"
-<
+               let OK_LINKS+=1
-<
+               let SKIP_YOUTU_BE+=1
->
+            elif [ $YOUTU_BE -eq 1 ]; then
->
+               # We have to look at the actual page source to know if a YouTube video is good or not
->
+               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep "\"simpleText\":\"Video unavailable\"")
->
+               if [ ! -z "$PAGE_TEXT" ]; then
->
+                  STATUS="NG"
->
+                  let NG_LINKS+=1
->
+               else
->
+                  if [ $SHOW_YT_RD -eq 0 ]; then
->
+                     valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
->
+                     STATUS="OK"
->
+                     let OK_LINKS+=1
->
+                     let SKIP_YOUTU_BE+=1
->
+                  else
->
+                     STATUS="RD"
->
+                     let RD_LINKS+=1
->
+                  fi
->
+               fi
+            else
+               STATUS="RD"
+               let RD_LINKS+=1

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Validate External Links/validate_external_links.sh (file contents): Revision 1147 by iritscen, Tue Feb 2 20:10:39 2021 UTC vs. Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC

Diff Legend

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1147 by iritscen, Tue Feb 2 20:10:39 2021 UTC vs.
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC