-
Notifications
You must be signed in to change notification settings - Fork 0
/
link_collector_button_1.py
36 lines (27 loc) · 1.08 KB
/
link_collector_button_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import re
import sys
import requests
from bs4 import BeautifulSoup
import json
from urllib.parse import urlparse, parse_qs
import os
page_contents = int(input(' Введите количество переходов в оглавлении:'))
directory = 'data/temp/'
with open(f"{directory}url.txt", "r", encoding="utf-8") as f:
url = f.read()
parsed_url = urlparse(url)
domain = parsed_url.scheme + '://' + parsed_url.netloc
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
name_novel = re.search(r"novel/(.+)\.html", url).group(1)
# print(name_novel)
link_content = {}
for page_number in range(page_contents):
full_href = domain + "/e/extend/fy.php?page=" + \
str(page_number) + "&wjm=" + name_novel
# print(" ", full_href)
link_content[full_href] = page_number
json_data = json.dumps(link_content, indent=4, ensure_ascii=False)
with open(f"data/temp/link_content.json", "w", encoding="utf-8") as outfile:
outfile.write(json_data)
print(" Ссылки на главы получены и сохранены !")