Skip to content

Commit

Permalink
Display ignored urls
Browse files Browse the repository at this point in the history
  • Loading branch information
marcwrobel committed Aug 1, 2022
1 parent a1fc1ba commit afc3528
Showing 1 changed file with 37 additions and 9 deletions.
46 changes: 37 additions & 9 deletions checklinks
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,53 @@ RETRY=1
RETRY_DELAY=10
TIMEOUT=3 # seconds

EXCLUDED_URLS="https?://(\
localhost|\
old.nabble.com|\
news.gmane.org|\
[0-9]+|\
.+:[0-9]+|\
example|\
[^/]+.example|\
host|\
somehost|\
nohost|\
link|\
acme.org|\
foo|\
application|\
registry.npmjs.org|\
apache.org/xml/features|\
java.sun.com/xml/ns|\
javax.xml.XMLConstants)"

RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
GRAY='\033[1;34m'
NC='\033[0m' # No Color

[ ! -d "$1" ] && echo "'$1' is not a directory" && exit 1

echo "'$EXCLUDED_URLS'"

# Links are processed in a random order to reduce the risk of being blacklisted and temporarily blocked
for url in $(grep -RioEh 'https?://[^][{} "`<>),*$|\\]*[^][{} "`<>),*$|\\.:'"'"']' | grep -vE 'https?://(localhost|[0-9]+|.+:[0-9]+|example|host|somehost|nohost|link|acme.org|foo.com|application.com|[^/]+.example.com|registry.npmjs.org|apache.org/xml/features|java.sun.com/xml/ns|javax.xml.XMLConstants)' | sort | uniq | sort -R); do
# we could use --head, but it is not always supported...
status=$(curl -o /dev/null --silent --connect-timeout "$TIMEOUT" --retry $RETRY --retry-delay $RETRY_DELAY --user-agent "$USER_AGENT" --location --write-out '%{http_code}' "$url")
for url in $(grep -RioEh 'https?://[^][{} "`<>),*$|\\]*[^][{} "`<>),*$|\\.:'"'"']' | sort | uniq | sort -R); do
if [[ "$url" =~ $EXCLUDED_URLS ]]; then
echo -e "${GRAY}$url (IGNORED)${NC}"

if [ "$status" = "200" ]; then
if [[ $url =~ "http://" ]]; then
echo -e "${BLUE}$url ($status)${NC}"
else
# we could use --head, but it is not always supported...
status=$(curl -o /dev/null --silent --connect-timeout "$TIMEOUT" --retry $RETRY --retry-delay $RETRY_DELAY --user-agent "$USER_AGENT" --location --write-out '%{http_code}' "$url")

if [ "$status" = "200" ]; then
if [[ $url =~ "http://" ]]; then
echo -e "${BLUE}$url ($status)${NC}"
else
echo -e "${GREEN}$url ($status)${NC}"
fi
else
echo -e "${GREEN}$url ($status)${NC}"
echo -e "${RED}$url ($status)${NC}"
fi
else
echo -e "${RED}$url ($status)${NC}"
fi
done

0 comments on commit afc3528

Please sign in to comment.