| 47 |
|
UPLOAD_INFO="" # path to a file on your hard drive with the login info needed to upload a report |
| 48 |
|
|
| 49 |
|
# Fixed strings -- see the occurrences of these variables to learn their purpose |
| 50 |
< |
AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36" |
| 50 |
> |
AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36" |
| 51 |
|
ARCHIVE_API="http://archive.org/wayback/available" |
| 52 |
|
ARCHIVE_GENERIC="https://web.archive.org/web/*" |
| 53 |
|
ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206" |
| 76 |
|
# if you add a new code. |
| 77 |
|
declare -a OK_CODES=(200 401 405 406 418 501) |
| 78 |
|
declare -a RD_CODES=(301 302 303 307 308) |
| 79 |
< |
declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530) |
| 79 |
> |
declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 520 530) |
| 80 |
|
|
| 81 |
|
# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using |
| 82 |
|
# transcluded text, and if the transclusion fails, then the braces show up in the URL |
| 123 |
|
|
| 124 |
|
### HELP OUTPUT ### |
| 125 |
|
# A pseudo-man page. Here is the 80-character rule for the page text: |
| 126 |
< |
# 234567890123456789012345678901234567890123456789012345678901234567890123456789 |
| 126 |
> |
# 345678901234567890123456789012345678901234567890123456789012345678901234567890 |
| 127 |
|
function printHelp() |
| 128 |
|
{ |
| 129 |
|
cat << EOF |
| 534 |
|
# Print processed link totals |
| 535 |
|
if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi |
| 536 |
|
if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi |
| 537 |
< |
if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi |
| 537 |
> |
if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) $(pluralCheckWas $SKIP_ARCHIVES) not checked"; fi |
| 538 |
|
if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi |
| 539 |
|
if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi |
| 540 |
|
if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi |
| 1136 |
|
# Check for URL match |
| 1137 |
|
EXCEPT_URL="${EXCEPT_LINE#*,}" |
| 1138 |
|
EXCEPT_URL="${EXCEPT_URL%,*}" |
| 1139 |
< |
if [ "$EXCEPT_URL" != "$URL" ]; then |
| 1140 |
< |
continue |
| 1139 |
> |
if [[ "$EXCEPT_URL" =~ \* ]]; then # if this exception URL contains the '*' wildcard, use pattern-matching with it |
| 1140 |
> |
if [[ "$URL" =~ "$EXCEPT_URL" ]]; then |
| 1141 |
> |
continue |
| 1142 |
> |
fi |
| 1143 |
> |
else |
| 1144 |
> |
if [ "$EXCEPT_URL" != "$URL" ]; then # otherwise just use a straight string comparison |
| 1145 |
> |
continue |
| 1146 |
> |
fi |
| 1147 |
|
fi |
| 1148 |
|
|
| 1149 |
|
# Check for page name match |