33 |
|
EXCEPT_URL="" # location of wiki page with a list of exceptions for NG results |
34 |
|
OUTPUT_DIR="" # place reports and all other output in a folder inside this existing folder |
35 |
|
RECORD_OK_LINKS=0 # record response code to the log even when it's a value in OK_CODES |
36 |
+ |
ONLY_200_OK=0 # only treat code 200 as "OK" and not any other code in OK_CODES |
37 |
|
SHOW_SLASH=0 # record issue when a slash is added to the end of a URL |
38 |
|
SHOW_HTTPS=0 # record issue when "http" is upgraded to "https" |
39 |
|
SHOW_YT_RD=0 # record redirection for a youtu.be URL expanding to the full URL |
68 |
|
declare -a NS_NAMES=("Media" "Special" "Main" "Talk" "User" "User_talk" "OniGalore" "OniGalore_talk" "File" "File_talk" "MediaWiki" "MediaWiki_talk" "Template" "Template_talk" "Help" "Help_talk" "Category" "Category_talk" "BSL" "BSL_talk" "OBD" "OBD_talk" "AE" "AE_talk" "Oni2" "Oni2_talk" "XML" "XML_talk") |
69 |
|
|
70 |
|
# These arrays tell the script which suffixes at the ends of URLs represent files and which are pages. |
71 |
< |
# This determines whether the script tries to take a screenshot of the URL or just gets its HTTP code. |
71 |
> |
# This determines whether the script tries to take a screenshot of the URL (when screenshots are |
72 |
> |
# requested). |
73 |
|
declare -a HTTP_FILES=(3ds 7z avi BINA blend bsl cpp csv dae dll dmg doc east exe fbx first63 flv gamedata gif jpg last32 log m4a mhm mov mp3 mp4 oni ONWC pdf png psd py rar tga TRMA txt vbs wav wmv xaf xcf xlsx xml zip) |
74 |
|
declare -a HTTP_TLDS_AND_PAGES=(abstract action ars asp aspx cfm cgi com css de do full htm html it js jsp net org pgi php php3 phtml pl ru shtml stm uk x) |
75 |
|
|
136 |
|
SYNOPSIS |
137 |
|
validate_external_links.sh --help |
138 |
|
validate_external_links.sh --links URL --output DIR [--exceptions URL] |
139 |
< |
[--record-ok-links] [--show-added-slashes] [--show-https-upgrades] |
140 |
< |
[--show-yt-redirects] [--suggest-snapshots] [--check-archive-links] |
141 |
< |
[--take-screenshots FILE] [--timeout NUM] [--start-url NUM] |
142 |
< |
[--end-url NUM] [--upload FILE] |
139 |
> |
[--record-ok-links] [--only-200-ok] [--show-added-slashes] |
140 |
> |
[--show-https-upgrades] [--show-yt-redirects] [--suggest-snapshots] |
141 |
> |
[--check-archive-links] [--take-screenshots FILE] [--timeout NUM] |
142 |
> |
[--start-url NUM] [--end-url NUM] [--upload FILE] |
143 |
|
|
144 |
|
DESCRIPTION |
145 |
|
This script parses a list of external links found in the OniGalore wiki |
174 |
|
beginning with "file://". |
175 |
|
--record-ok-links Log a link in the report even if its response |
176 |
|
code is "OK". |
177 |
+ |
--only-200-ok Only treat response code 200 as "OK". Normally |
178 |
+ |
several additional codes are treated as "OK" (see |
179 |
+ |
the array OK_CODES in script) because they are |
180 |
+ |
typically not an indicator of a bad link. |
181 |
|
--show-added-slashes Report on redirects that simply add a '/' to the |
182 |
|
end of the URL. |
183 |
|
--show-https-upgrades Report on redirects that simply upgrade a |
231 |
|
--exceptions ) EXCEPT_URL="$2"; shift 2;; |
232 |
|
--output ) OUTPUT_DIR="$2"; shift 2;; |
233 |
|
--record-ok-links ) RECORD_OK_LINKS=1; shift;; |
234 |
+ |
--only-200-ok ) ONLY_200_OK=1; shift;; |
235 |
|
--show-added-slashes ) SHOW_SLASH=1; shift;; |
236 |
|
--show-https-upgrades ) SHOW_HTTPS=1; shift;; |
237 |
|
--show-yt-redirects ) SHOW_YT_RD=1; shift;; |
711 |
|
valPrint ctrhn "Show OK links: " |
712 |
|
if [ $RECORD_OK_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi |
713 |
|
|
714 |
+ |
valPrint ctrhn "Treat these response codes as OK: " |
715 |
+ |
if [ $ONLY_200_OK -eq 1 ]; then valPrint ctrh "200"; else valPrint ctrh "${OK_CODES[*]}"; fi |
716 |
+ |
|
717 |
|
valPrint ctrhn "Take screenshots: " |
718 |
|
if [ $TAKE_PAGE_SHOT -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi |
719 |
|
|
1016 |
|
# If we didn't match an interwiki domain, see if the status code is in our "OK" codes list |
1017 |
|
if [ $STATUS == "??" ]; then |
1018 |
|
for CODE in "${OK_CODES[@]}"; do |
1019 |
+ |
if [ $ONLY_200_OK -eq 1 ] && [ $CODE -ne 200 ]; then |
1020 |
+ |
continue |
1021 |
+ |
fi |
1022 |
+ |
|
1023 |
|
if [[ $CODE == $CURL_CODE ]]; then |
1024 |
|
STATUS="OK" |
1025 |
|
let OK_LINKS+=1 |
1156 |
|
break |
1157 |
|
fi |
1158 |
|
done |
1159 |
+ |
# Also check it against the "OK" codes besides 200 if the --only-200-ok argument was received |
1160 |
+ |
if [ $ONLY_200_OK -eq 1 ]; then |
1161 |
+ |
for CODE in "${OK_CODES[@]}"; do |
1162 |
+ |
if [ $CODE -eq 200 ]; then |
1163 |
+ |
continue |
1164 |
+ |
fi |
1165 |
+ |
if [[ $CODE == $CURL_CODE ]]; then |
1166 |
+ |
STATUS="NG" |
1167 |
+ |
let NG_LINKS+=1 |
1168 |
+ |
break |
1169 |
+ |
fi |
1170 |
+ |
done |
1171 |
+ |
fi |
1172 |
|
fi |
1173 |
|
|
1174 |
|
# If we didn't match a known status code, advise the reader |