-
Notifications
You must be signed in to change notification settings - Fork 5
/
check_links.py
47 lines (37 loc) · 1.27 KB
/
check_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
Parse all URLs of the documentation and check that they work
"""
import os
import re
import requests
from rich import print as print
import urllib3
# Disable warning from python-nomad
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
session = requests.Session()
source_folder = 'source/user'
links = []
for root, _, files in os.walk(source_folder):
for file in files:
if file.endswith('.rst'):
print(f"[blue]Parsing: {file}[/blue]")
file_path = os.path.join(root, file)
with open(file_path, 'r') as file:
content = file.read()
links += re.findall(r'<\s*(http[s]?://[^>\s]+)\s*>`__', content)
# Remove duplicates
links = set(links)
links = sorted(links)
# Check links
for link in links:
try:
# We use GET and not HEAD, because HEAD does not return the correct status codes
r = session.get(link, allow_redirects=True, timeout=5, verify=False)
if r.ok:
print(f"[grey]{link}[/grey]")
elif r.status_code == 404:
print(f"[orange1]{link}, {r.status_code}[/orange1]")
else:
print(f"[magenta3]{link}, {r.status_code}[/magenta3]")
except requests.RequestException:
print(f"[red]{link}, invalid URL[/red]")