ViewVC Help
View File | Revision Log | View Changeset | Root Listing
root/Oni2/Validate External Links/validate_external_links.sh
(Generate patch)

Comparing Validate External Links/validate_external_links.sh (file contents):
Revision 1066 by iritscen, Tue Aug 1 14:30:24 2017 UTC vs.
Revision 1067 by iritscen, Tue Aug 1 17:09:42 2017 UTC

# Line 48 | Line 48 | declare -a NS_NAMES=("Media" "Special" "
48   declare -a HTTP_FILES=(txt zip wmv jpg png m4a bsl rar oni mp3 mov ONWC vbs TRMA mp4 doc avi log gif pdf dmg exe cpp tga 7z wav east BINA xml dll dae xaf fbx 3ds blend flv csv)
49   declare -a HTTP_TLDS_AND_PAGES=(com net org uk ru de it htm html php pl asp aspx shtml pgi cgi php3 x jsp phtml cfm css action stm js)
50  
51 < # These arrays tells us which HTTP response codes are OK (good) and which are NG (no good). Pages that
52 < # return NG codes will not be screenshotted. Remember to update http_codes.txt if you add a new code.
53 < declare -a OK_CODES=(200 301 307 401 405 406 501)
54 < declare -a NG_CODES=(000 302 403 404 410 500 503)
51 > # These arrays tells us which HTTP response codes are OK (good), which are RD (redirections), and which
52 > # are NG (no good). Pages that return OK codes will be screenshotted. Remember to update http_codes.txt
53 > # if you add a new code.
54 > declare -a OK_CODES=(200 401 405 406 501)
55 > declare -a RD_CODES=(301 302 303 307 308)
56 > declare -a NG_CODES=(000 403 404 410 500 503)
57  
58   # Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
59   # transcluded text, and if the transclusion fails, then the braces show up in the URL
# Line 65 | Line 67 | declare -a INTERWIKI_DOMAINS=(meta.wikip
67   # Variables for keeping track of main loop progress and findings
68   LINK_NUM=0
69   OK_LINKS=0
70 + RD_LINKS=0
71 + IW_LINKS=0
72   NG_LINKS=0
73   SKIP_UNK_NS=0
74   SKIP_JS_PAGE=0
# Line 319 | Line 323 | function pluralCheckNoun()
323     fi
324   }
325  
326 + # Output "is" if parameter 1 is 1, otherwise "are"
327 + function pluralCheckIs()
328 + {
329 +   if [ $1 -ne 1 ]; then
330 +      echo "are"
331 +   else
332 +      echo "is"
333 +   fi
334 + }
335 +
336   # Output "was" if parameter 1 is 1, otherwise "were"
337   function pluralCheckWas()
338   {
# Line 329 | Line 343 | function pluralCheckWas()
343     fi
344   }
345  
346 + # Output "a " if parameter 1 is 1, otherwise nothing
347 + function pluralCheckA()
348 + {
349 +   if [ $1 -eq 1 ]; then
350 +      echo "a "
351 +   fi
352 + }
353 +
354 + # Output "an " if parameter 1 is 1, otherwise nothing
355 + function pluralCheckAn()
356 + {
357 +   if [ $1 -eq 1 ]; then
358 +      echo "an "
359 +   fi
360 + }
361 +
362   # Upload HTML report using info specified in the --upload argument. ONLY USE "valPrint c" here, as the
363   # reports being saved to disk have already been closed.
364   function uploadReport()
# Line 385 | Line 415 | function wrapupAndExit()
415     if [ $SKIP_NON_ASCII -gt 0 ]; then valPrint ctrh "- $SKIP_NON_ASCII non-ASCII $(pluralCheckNoun URL $SKIP_NON_ASCII)"; fi
416     if [ $SKIP_UNK_SUFFIX -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_SUFFIX unknown URL $(pluralCheckNoun suffix $SKIP_UNK_SUFFIX)"; fi
417     if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi
418 <   valPrint ctrh "Out of the $LINKS_CHECKED links checked, $OK_LINKS $(pluralCheckWas $OK_LINKS) OK and $NG_LINKS $(pluralCheckWas $NG_LINKS) NG."
418 >   valPrint ctrh "Out of the $LINKS_CHECKED links checked, $OK_LINKS $(pluralCheckWas $OK_LINKS) OK, $RD_LINKS $(pluralCheckWas $RD_LINKS) $(pluralCheckA $RD_LINKS)redirection $(pluralCheckNoun notice $RD_LINKS), and $NG_LINKS $(pluralCheckWas $NG_LINKS) NG."
419 >   if [ $IW_LINKS -gt 0 ]; then
420 >      valPrint ctrh "$IW_LINKS/$OK_LINKS OK $(pluralCheckNoun link $OK_LINKS) $(pluralCheckIs $IW_LINKS) $(pluralCheckAn $IW_LINKS)external $(pluralCheckNoun link $IW_LINKS) that could be $(pluralCheckAn $IW_LINKS)interwiki $(pluralCheckNoun link $IW_LINKS)."
421 >   fi
422     if [ $SKIP_EXCEPT -gt 0 ]; then
423        valPrint ctrh "$SKIP_EXCEPT/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS) went unlisted due to being found in the exceptions file."
424     fi
# Line 466 | Line 499 | valPrint t "Legend:"
499   valPrint r "\b1 Legend \b0"
500   valPrint hn "<h3>Legend</h3>"
501   valPrint trh "OK = URL seems to be working."
502 < valPrint trh "NG = URL no longer seems to work. You should click each URL marked as NG before attempting to fix it. False negatives will occur from time to time due to hiccups in the Internet. Please report any persistent false negatives or other issues to Iritscen."
502 > valPrint trh "NG = URL no longer seems to work. You should click each URL marked as NG before attempting to fix it, because false negatives will occur from time to time due to hiccups in the Internet. Please report any persistent false negatives or other issues to Iritscen. An NG link should be followed by a link to the Internet Archive's Wayback Machine which may help you repair the link. If the link cannot be repaired, you can disable it on the wiki (which prevents it from showing up in future ValExtLinks reports) by wrapping it in nowiki tags."
503 > valPrint trh "RD = The server responding to this URL is saying that the page moved and you should instead use the supplied new URL. Some RD links represent minor adjustments in the organization of a web site, and some are soft 404s (the file/page has been removed and you are being redirected to something like the main page of the web site). You will have to look at the new URL yourself to determine if it represents an OK link and the link on the wiki should be updated to this one, or if the desired file/page is actually gone and we need to replace the wiki link with an Internet Archive snapshot link -- or disable the URL if it has not been archived."
504   valPrint trh "IW = URL is working but should be converted to interwiki link using the suggested markup."
505   valPrint t "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see here for code reference: $HTTP_CODES)."
506   valPrint r "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see {\field{\*\fldinst{HYPERLINK \"$HTTP_CODES\"}}{\fldrslt here}} for code reference)."
# Line 658 | Line 692 | for LINE in `cat "$LINKS_FILE"`; do
692  
693     # Determine if this code is in our "OK" list
694     STATUS="??"
695 +   NEW_URL=""
696     INTERWIKI_INDEX=-1
697     for CODE in "${OK_CODES[@]}"; do
698        if [[ $CODE == $CURL_CODE ]]; then
# Line 667 | Line 702 | for LINE in `cat "$LINKS_FILE"`; do
702           for ((i = 0; i < ${#INTERWIKI_DOMAINS[@]}; ++i)); do
703              if [[ $URL == *${INTERWIKI_DOMAINS[$i]}* ]]; then
704                 STATUS="IW"
705 +               let IW_LINKS+=1
706                 INTERWIKI_INDEX=$i
707                 break
708              fi
# Line 680 | Line 716 | for LINE in `cat "$LINKS_FILE"`; do
716        fi
717     done
718  
719 <   # If we didn't get a match with the "OK" codes, check it against the "NG" codes
719 >   # If we didn't get a match with the "OK" codes, check it against the "RD" codes
720 >   if [ $STATUS == "??" ]; then
721 >      for CODE in "${RD_CODES[@]}"; do
722 >         if [[ $CODE == $CURL_CODE ]]; then
723 >            STATUS="RD"
724 >            let RD_LINKS+=1
725 >
726 >            # Get URL header again in order to retrieve the URL we are being redirected to
727 >            NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL)
728 >
729 >            break
730 >         fi
731 >      done
732 >   fi
733 >
734 >   # If we didn't get a match with the "RD" codes, check it against the "NG" codes
735     if [ $STATUS == "??" ]; then
736        for CODE in "${NG_CODES[@]}"; do
737           if [[ $CODE == $CURL_CODE ]]; then
# Line 719 | Line 770 | for LINE in `cat "$LINKS_FILE"`; do
770           LOCAL_PAGE_PATH=$PAGE_NAME
771        fi
772  
773 <      # Stupid hack since the text "IW" is narrower than "OK" or "NG" and it takes an extra tab to get
774 <      # to the desired level of indentation in the RTF log
773 >      # Stupid hack since the text "IW" is narrower than "OK", "RD", or "NG" and it takes an extra tab
774 >      # to get to the desired level of indentation in the RTF log
775        RTF_TABS="        "
776        if [ $STATUS == "IW" ]; then
777           RTF_TABS="             "
# Line 734 | Line 785 | for LINE in `cat "$LINKS_FILE"`; do
785        valPrint hn "<tr><td style=\"white-space:nowrap\">$STATUS ($CURL_RESULT)</td><td align=\"right\">$STR_TYPE</td><td><a href=\"$URL\" target=\"_blank\">$URL</a></td></tr>"
786        valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>"
787  
788 +      # Record redirect URL if one was given by a 3xx response page
789 +      if [ $STATUS == "RD" ]; then
790 +         valPrint t "  Server suggests $NEW_URL"
791 +         valPrint r "   Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
792 +         valPrint hn "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>"
793 +      fi
794 +
795        # Notify reader if we can use an interwiki prefix for this URL
796        if [ $STATUS == "IW" ]; then
797           valPrint t "  You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$POST_SLASH]]"

Diff Legend

Removed lines
+ Added lines
< Changed lines (old)
> Changed lines (new)