-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcloned_website.py
68 lines (55 loc) · 2.02 KB
/
cloned_website.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import requests
from bs4 import BeautifulSoup as bs
from urllib.parse import urljoin
from tokenfinder import Tokenfinder
import argparse
def check_cloned_website(url):
# URL of the web page you want to extract
# url = "http://localhost:8000"
#url = "https://docs.microsoft.com/en-us/learn/modules/build-simple-website/"
# initialize a session
session = requests.Session()
# set the User-agent as a regular browser
session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
# get the HTML content
html = session.get(url).content
list_url = []
token = Tokenfinder.find_tokens_in_string(str(html))
if token:
list_url.append(token)
# parse HTML using beautiful soup
soup = bs(html, "html.parser")
# get the JavaScript files
script_files = []
for script in soup.find_all("script"):
if script.attrs.get("src"):
# if the tag has the attribute 'src'
script_url = urljoin(url, script.attrs.get("src"))
script_files.append(script_url)
print("Total script files in the page:", len(script_files))
# write file links into files
#if len(script_files) > 0:
# with open("javascript_files.txt", "w") as f:
i = 1
if len(script_files) > 0:
for js_file in script_files:
print("Going through file " + str(i))
js_content = session.get(js_file).content
for line in js_content:
token = Tokenfinder.find_tokens_in_string(str(line))
if token:
list_url.append(token)
i = i + 1
if len(list_url) > 0:
for entry in list_url:
print(entry)
else:
print("No honeytoken found on this website")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", "-f", type=str, required=True)
args = parser.parse_args()
check_cloned_website(args.file)
if __name__ == "__main__":
print("------- START CLONED WEBSITE ------")
main()