48 |
|
declare -a HTTP_FILES=(txt zip wmv jpg png m4a bsl rar oni mp3 mov ONWC vbs TRMA mp4 doc avi log gif pdf dmg exe cpp tga 7z wav east BINA xml dll dae xaf fbx 3ds blend flv csv) |
49 |
|
declare -a HTTP_TLDS_AND_PAGES=(com net org uk ru de it htm html php pl asp aspx shtml pgi cgi php3 x jsp phtml cfm css action stm js) |
50 |
|
|
51 |
< |
# These arrays tells us which HTTP response codes are OK (good) and which are NG (no good). Pages that |
52 |
< |
# return NG codes will not be screenshotted. Remember to update http_codes.txt if you add a new code. |
53 |
< |
declare -a OK_CODES=(200 301 307 401 405 406 501) |
54 |
< |
declare -a NG_CODES=(000 302 403 404 410 500 503) |
51 |
> |
# These arrays tells us which HTTP response codes are OK (good), which are RD (redirections), and which |
52 |
> |
# are NG (no good). Pages that return OK codes will be screenshotted. Remember to update http_codes.txt |
53 |
> |
# if you add a new code. |
54 |
> |
declare -a OK_CODES=(200 401 405 406 501) |
55 |
> |
declare -a RD_CODES=(301 302 303 307 308) |
56 |
> |
declare -a NG_CODES=(000 403 404 410 500 503) |
57 |
|
|
58 |
|
# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using |
59 |
|
# transcluded text, and if the transclusion fails, then the braces show up in the URL |
67 |
|
# Variables for keeping track of main loop progress and findings |
68 |
|
LINK_NUM=0 |
69 |
|
OK_LINKS=0 |
70 |
+ |
RD_LINKS=0 |
71 |
+ |
IW_LINKS=0 |
72 |
|
NG_LINKS=0 |
73 |
|
SKIP_UNK_NS=0 |
74 |
|
SKIP_JS_PAGE=0 |
323 |
|
fi |
324 |
|
} |
325 |
|
|
326 |
+ |
# Output "is" if parameter 1 is 1, otherwise "are" |
327 |
+ |
function pluralCheckIs() |
328 |
+ |
{ |
329 |
+ |
if [ $1 -ne 1 ]; then |
330 |
+ |
echo "are" |
331 |
+ |
else |
332 |
+ |
echo "is" |
333 |
+ |
fi |
334 |
+ |
} |
335 |
+ |
|
336 |
|
# Output "was" if parameter 1 is 1, otherwise "were" |
337 |
|
function pluralCheckWas() |
338 |
|
{ |
343 |
|
fi |
344 |
|
} |
345 |
|
|
346 |
+ |
# Output "a " if parameter 1 is 1, otherwise nothing |
347 |
+ |
function pluralCheckA() |
348 |
+ |
{ |
349 |
+ |
if [ $1 -eq 1 ]; then |
350 |
+ |
echo "a " |
351 |
+ |
fi |
352 |
+ |
} |
353 |
+ |
|
354 |
+ |
# Output "an " if parameter 1 is 1, otherwise nothing |
355 |
+ |
function pluralCheckAn() |
356 |
+ |
{ |
357 |
+ |
if [ $1 -eq 1 ]; then |
358 |
+ |
echo "an " |
359 |
+ |
fi |
360 |
+ |
} |
361 |
+ |
|
362 |
|
# Upload HTML report using info specified in the --upload argument. ONLY USE "valPrint c" here, as the |
363 |
|
# reports being saved to disk have already been closed. |
364 |
|
function uploadReport() |
415 |
|
if [ $SKIP_NON_ASCII -gt 0 ]; then valPrint ctrh "- $SKIP_NON_ASCII non-ASCII $(pluralCheckNoun URL $SKIP_NON_ASCII)"; fi |
416 |
|
if [ $SKIP_UNK_SUFFIX -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_SUFFIX unknown URL $(pluralCheckNoun suffix $SKIP_UNK_SUFFIX)"; fi |
417 |
|
if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi |
418 |
< |
valPrint ctrh "Out of the $LINKS_CHECKED links checked, $OK_LINKS $(pluralCheckWas $OK_LINKS) OK and $NG_LINKS $(pluralCheckWas $NG_LINKS) NG." |
418 |
> |
valPrint ctrh "Out of the $LINKS_CHECKED links checked, $OK_LINKS $(pluralCheckWas $OK_LINKS) OK, $RD_LINKS $(pluralCheckWas $RD_LINKS) $(pluralCheckA $RD_LINKS)redirection $(pluralCheckNoun notice $RD_LINKS), and $NG_LINKS $(pluralCheckWas $NG_LINKS) NG." |
419 |
> |
if [ $IW_LINKS -gt 0 ]; then |
420 |
> |
valPrint ctrh "$IW_LINKS/$OK_LINKS OK $(pluralCheckNoun link $OK_LINKS) $(pluralCheckIs $IW_LINKS) $(pluralCheckAn $IW_LINKS)external $(pluralCheckNoun link $IW_LINKS) that could be $(pluralCheckAn $IW_LINKS)interwiki $(pluralCheckNoun link $IW_LINKS)." |
421 |
> |
fi |
422 |
|
if [ $SKIP_EXCEPT -gt 0 ]; then |
423 |
|
valPrint ctrh "$SKIP_EXCEPT/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS) went unlisted due to being found in the exceptions file." |
424 |
|
fi |
499 |
|
valPrint r "\b1 Legend \b0" |
500 |
|
valPrint hn "<h3>Legend</h3>" |
501 |
|
valPrint trh "OK = URL seems to be working." |
502 |
< |
valPrint trh "NG = URL no longer seems to work. You should click each URL marked as NG before attempting to fix it. False negatives will occur from time to time due to hiccups in the Internet. Please report any persistent false negatives or other issues to Iritscen." |
502 |
> |
valPrint trh "NG = URL no longer seems to work. You should click each URL marked as NG before attempting to fix it, because false negatives will occur from time to time due to hiccups in the Internet. Please report any persistent false negatives or other issues to Iritscen. An NG link should be followed by a link to the Internet Archive's Wayback Machine which may help you repair the link. If the link cannot be repaired, you can disable it on the wiki (which prevents it from showing up in future ValExtLinks reports) by wrapping it in nowiki tags." |
503 |
> |
valPrint trh "RD = The server responding to this URL is saying that the page moved and you should instead use the supplied new URL. Some RD links represent minor adjustments in the organization of a web site, and some are soft 404s (the file/page has been removed and you are being redirected to something like the main page of the web site). You will have to look at the new URL yourself to determine if it represents an OK link and the link on the wiki should be updated to this one, or if the desired file/page is actually gone and we need to replace the wiki link with an Internet Archive snapshot link -- or disable the URL if it has not been archived." |
504 |
|
valPrint trh "IW = URL is working but should be converted to interwiki link using the suggested markup." |
505 |
|
valPrint t "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see here for code reference: $HTTP_CODES)." |
506 |
|
valPrint r "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see {\field{\*\fldinst{HYPERLINK \"$HTTP_CODES\"}}{\fldrslt here}} for code reference)." |
692 |
|
|
693 |
|
# Determine if this code is in our "OK" list |
694 |
|
STATUS="??" |
695 |
+ |
NEW_URL="" |
696 |
|
INTERWIKI_INDEX=-1 |
697 |
|
for CODE in "${OK_CODES[@]}"; do |
698 |
|
if [[ $CODE == $CURL_CODE ]]; then |
702 |
|
for ((i = 0; i < ${#INTERWIKI_DOMAINS[@]}; ++i)); do |
703 |
|
if [[ $URL == *${INTERWIKI_DOMAINS[$i]}* ]]; then |
704 |
|
STATUS="IW" |
705 |
+ |
let IW_LINKS+=1 |
706 |
|
INTERWIKI_INDEX=$i |
707 |
|
break |
708 |
|
fi |
716 |
|
fi |
717 |
|
done |
718 |
|
|
719 |
< |
# If we didn't get a match with the "OK" codes, check it against the "NG" codes |
719 |
> |
# If we didn't get a match with the "OK" codes, check it against the "RD" codes |
720 |
> |
if [ $STATUS == "??" ]; then |
721 |
> |
for CODE in "${RD_CODES[@]}"; do |
722 |
> |
if [[ $CODE == $CURL_CODE ]]; then |
723 |
> |
STATUS="RD" |
724 |
> |
let RD_LINKS+=1 |
725 |
> |
|
726 |
> |
# Get URL header again in order to retrieve the URL we are being redirected to |
727 |
> |
NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL) |
728 |
> |
|
729 |
> |
break |
730 |
> |
fi |
731 |
> |
done |
732 |
> |
fi |
733 |
> |
|
734 |
> |
# If we didn't get a match with the "RD" codes, check it against the "NG" codes |
735 |
|
if [ $STATUS == "??" ]; then |
736 |
|
for CODE in "${NG_CODES[@]}"; do |
737 |
|
if [[ $CODE == $CURL_CODE ]]; then |
770 |
|
LOCAL_PAGE_PATH=$PAGE_NAME |
771 |
|
fi |
772 |
|
|
773 |
< |
# Stupid hack since the text "IW" is narrower than "OK" or "NG" and it takes an extra tab to get |
774 |
< |
# to the desired level of indentation in the RTF log |
773 |
> |
# Stupid hack since the text "IW" is narrower than "OK", "RD", or "NG" and it takes an extra tab |
774 |
> |
# to get to the desired level of indentation in the RTF log |
775 |
|
RTF_TABS=" " |
776 |
|
if [ $STATUS == "IW" ]; then |
777 |
|
RTF_TABS=" " |
785 |
|
valPrint hn "<tr><td style=\"white-space:nowrap\">$STATUS ($CURL_RESULT)</td><td align=\"right\">$STR_TYPE</td><td><a href=\"$URL\" target=\"_blank\">$URL</a></td></tr>" |
786 |
|
valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>" |
787 |
|
|
788 |
+ |
# Record redirect URL if one was given by a 3xx response page |
789 |
+ |
if [ $STATUS == "RD" ]; then |
790 |
+ |
valPrint t " Server suggests $NEW_URL" |
791 |
+ |
valPrint r " Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}" |
792 |
+ |
valPrint hn "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>" |
793 |
+ |
fi |
794 |
+ |
|
795 |
|
# Notify reader if we can use an interwiki prefix for this URL |
796 |
|
if [ $STATUS == "IW" ]; then |
797 |
|
valPrint t " You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$POST_SLASH]]" |