From afc3528e3bf3c4c7c8743a2b24349ba72176ee75 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Mon, 1 Aug 2022 19:49:13 +0200 Subject: [PATCH] Display ignored urls --- checklinks | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/checklinks b/checklinks index 2f37c5a..a189022 100755 --- a/checklinks +++ b/checklinks @@ -5,25 +5,53 @@ RETRY=1 RETRY_DELAY=10 TIMEOUT=3 # seconds +EXCLUDED_URLS="https?://(\ +localhost|\ +old.nabble.com|\ +news.gmane.org|\ +[0-9]+|\ +.+:[0-9]+|\ +example|\ +[^/]+.example|\ +host|\ +somehost|\ +nohost|\ +link|\ +acme.org|\ +foo|\ +application|\ +registry.npmjs.org|\ +apache.org/xml/features|\ +java.sun.com/xml/ns|\ +javax.xml.XMLConstants)" + RED='\033[0;31m' GREEN='\033[0;32m' BLUE='\033[0;34m' +GRAY='\033[1;34m' NC='\033[0m' # No Color [ ! -d "$1" ] && echo "'$1' is not a directory" && exit 1 +echo "'$EXCLUDED_URLS'" + # Links are processed in a random order to reduce the risk of being blacklisted and temporarily blocked -for url in $(grep -RioEh 'https?://[^][{} "`<>),*$|\\]*[^][{} "`<>),*$|\\.:'"'"']' | grep -vE 'https?://(localhost|[0-9]+|.+:[0-9]+|example|host|somehost|nohost|link|acme.org|foo.com|application.com|[^/]+.example.com|registry.npmjs.org|apache.org/xml/features|java.sun.com/xml/ns|javax.xml.XMLConstants)' | sort | uniq | sort -R); do - # we could use --head, but it is not always supported... - status=$(curl -o /dev/null --silent --connect-timeout "$TIMEOUT" --retry $RETRY --retry-delay $RETRY_DELAY --user-agent "$USER_AGENT" --location --write-out '%{http_code}' "$url") +for url in $(grep -RioEh 'https?://[^][{} "`<>),*$|\\]*[^][{} "`<>),*$|\\.:'"'"']' | sort | uniq | sort -R); do + if [[ "$url" =~ $EXCLUDED_URLS ]]; then + echo -e "${GRAY}$url (IGNORED)${NC}" - if [ "$status" = "200" ]; then - if [[ $url =~ "http://" ]]; then - echo -e "${BLUE}$url ($status)${NC}" + else + # we could use --head, but it is not always supported... + status=$(curl -o /dev/null --silent --connect-timeout "$TIMEOUT" --retry $RETRY --retry-delay $RETRY_DELAY --user-agent "$USER_AGENT" --location --write-out '%{http_code}' "$url") + + if [ "$status" = "200" ]; then + if [[ $url =~ "http://" ]]; then + echo -e "${BLUE}$url ($status)${NC}" + else + echo -e "${GREEN}$url ($status)${NC}" + fi else - echo -e "${GREEN}$url ($status)${NC}" + echo -e "${RED}$url ($status)${NC}" fi - else - echo -e "${RED}$url ($status)${NC}" fi done