[ViewVC] Diff of: Oni2/Validate External Links/validate_external

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC vs.
Revision 1149 by iritscen, Sun Feb 7 22:36:56 2021 UTC

+# if you add a new code.
+declare -a OK_CODES=(200 401 405 406 418 501)
+declare -a RD_CODES=(301 302 303 307 308)
-<
+declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 530)
->
+declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530)
+# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
+# transcluded text, and if the transclusion fails, then the braces show up in the URL
+   PAGE_NAME=${LINE#$NS_ID,}
+   PAGE_NAME=${PAGE_NAME%%,*}
-–
+   # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS
-–
+   # in JavaScript code, so it returns erroneous links
-–
+   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
-–
+   if [ $PAGE_NAME_SUFFIX == "js" ]; then
-–
+      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
-–
+      let SKIP_JS_PAGE+=1
-–
+      let PAGE_LINKS+=1
-–
+      continue
-–
+   fi
-–
+   # Build longer wiki page URLs from namespace and page names
+   FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME
+   LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME
+      LOCAL_PAGE_PATH=$PAGE_NAME
+   fi
-+
+   # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS
-+
+   # in JavaScript code, so it returns erroneous links
-+
+   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
-+
+   if [ $PAGE_NAME_SUFFIX == "js" ]; then
-+
+      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$LOCAL_PAGE_PATH'."
-+
+      let SKIP_JS_PAGE+=1
-+
+      let PAGE_LINKS+=1
-+
+      continue
-+
+   fi
-+
+   # The URL being linked to is everything after the previous two fields (this allows commas to be in
+   # the URLs, but a comma in the previous field, the page name, will break this)
+   URL=${LINE#$NS_ID,$PAGE_NAME,}
+   # Scan for illegal characters
+   if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL."
+      let SKIP_BAD_URL+=1
+      let PAGE_LINKS+=1
+      continue
+   # If we're skipping Archive.org links, see if this is one
+   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org* ]]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to check Wayback Machine links."
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."
+      let SKIP_ARCHIVE_ORG+=1
+      let PAGE_LINKS+=1
+      continue
+   # 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make user check it
+   if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I cannot handle non-ASCII characters."
+      let SKIP_NON_ASCII+=1
+      let PAGE_LINKS+=1
+      continue
+   # If this suffix escaped identification as either a file, page or TLD, inform the user
+   STR_TYPE=""
+   if [ $IS_FILE -eq -1 ]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
+      let SKIP_UNK_SUFFIX+=1
+      continue
+   elif [ $IS_FILE -eq 1 ]; then
+            # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user
+            # wants those to be reported)
+            if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
-<
+               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
->
+               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
+               STATUS="OK"
+               let OK_LINKS+=1
+               let SKIP_HTTPS_UP+=1
+            # If the URLs match besides an added ending slash, then the link is OK (unless user wants
+            # those to be reported)
+            elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
-<
+               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
->
+               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
+               STATUS="OK"
+               let OK_LINKS+=1
+               let SKIP_SLASH_ADD+=1
+                  let NG_LINKS+=1
+               else
+                  if [ $SHOW_YT_RD -eq 0 ]; then
-<
+                     valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
->
+                     valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
+                     STATUS="OK"
+                     let OK_LINKS+=1
+                     let SKIP_YOUTU_BE+=1
+   # If we didn't match a known status code, advise the reader
+   if [ $STATUS == "??" ]; then
-<
+      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown response code $CURL_CODE."
->
+      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown response code $CURL_CODE."
+      let SKIP_UNK_CODE+=1
+      continue
+   fi
+            # Match result code
+            EXCEPT_CODE=${EXCEPT_LINE%%,*}
+            if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
-<
+               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, '$EXPECT_CODE', is in the exceptions list."
->
+               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because its expected result, '$EXPECT_CODE', is in the exceptions list."
+               if [ $STATUS == "EI" ]; then
+                  let SKIP_EXPECT_EI+=1
+               elif [ $STATUS == "IW" ]; then

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines (old)
->
+Changed lines (new)

Comparing Validate External Links/validate_external_links.sh (file contents): Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC vs. Revision 1149 by iritscen, Sun Feb 7 22:36:56 2021 UTC

Diff Legend

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1148 by iritscen, Thu Feb 4 23:15:20 2021 UTC vs.
Revision 1149 by iritscen, Sun Feb 7 22:36:56 2021 UTC