-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract.py
66 lines (57 loc) · 2.4 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
import re
import sys
import argparse
import requests
EXCERPT_FORMAT = "------------------------------------------------\n%s\n"
# Function to sanitize non-ASCII characters
def sanitize_non_ascii(string):
return string.encode('ascii', errors='replace').decode('ascii').replace('\ufffd', '_')
# Function to extract endpoints from content
def extract_endpoints(content, show_line=False):
REGEX = re.compile(r'(["\'])(/[^"\']*[\w\d\W\?/&=#.!:_-]*?)\1')
matched_endpoints = []
print("\n[ Extracted Endpoints ]")
print("-" * 60)
for idx, match in enumerate(REGEX.finditer(content), start=1):
full_match, endpoint = match.group(0), match.group(2)
if endpoint not in matched_endpoints:
matched_endpoints.append(endpoint)
print(f"{idx:02d}. Endpoint: {endpoint}")
if show_line:
print(f" Full Match: {full_match}")
print("-" * 60)
if not matched_endpoints:
print("No endpoints found.")
print("-" * 60)
# Function to get content from a file or URL
def get_content(source):
try:
# Check if source is a URL or a local file
if source.startswith("http://") or source.startswith("https://"):
response = requests.get(source)
response.raise_for_status() # Will raise an exception for HTTP error responses
return response.text
else:
with open(source, "r", encoding="utf-8") as f:
return f.read()
except requests.exceptions.RequestException as e:
print(f"Error fetching the URL: {e}")
sys.exit(1)
except FileNotFoundError:
print(f"Error: File '{source}' not found.")
sys.exit(1)
except Exception as e:
print(f"An error occurred: {e}")
sys.exit(1)
# Main function
def main():
parser = argparse.ArgumentParser(description="Extract endpoints from JavaScript files or URLs.")
parser.add_argument("source", help="Path to the JavaScript file or URL to be analyzed.")
parser.add_argument("--show-line", action="store_true", help="Show the full line containing the endpoint.")
args = parser.parse_args()
content = get_content(args.source)
sanitized_content = sanitize_non_ascii(content)
extract_endpoints(sanitized_content, args.show_line)
if __name__ == "__main__":
main()