-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha.py
106 lines (84 loc) · 3.54 KB
/
a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# https://github.com/Saebasol/tag-info/blob/master/script/auto_complete.py
from json import dumps, loads
import logging
from aiohttp import ClientSession
from asyncio.runners import run
from asyncio.tasks import sleep
logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)
formatter = logging.Formatter("%(asctime)s - (%(name)s) - [%(levelname)s]: %(message)s")
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
fh = logging.FileHandler("auto_complete.log")
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
class Request:
crawled_dict = {}
def __init__(self, key) -> None:
self.key = key
self.session = None
self._req_list = []
def load_character(self):
logger.info("load series file")
with open("test2.json", "r", encoding="UTF-8") as f:
return loads(f.read())
async def request(self, query):
PARAMS = {
"limit": 1,
"indent": "true",
"languages": "ko",
"key":self.key,
"query": query
}
if not self.session:
logger.info("Make Session")
self.session = ClientSession()
descHubo = ['애니메이션', '게임', 'TV 프로그램', '영화', '만화', '등장', '책']
bodyHubo = ['라이트 노벨', '게임', '만화', '애니메이션', '소설']
for _ in range(5):
async with self.session.get("https://kgsearch.googleapis.com/v1/entities:search", params=PARAMS) as response:
logger.info("get %s", query)
if response.status == 429:
logger.warn("429 sleep")
await sleep(30)
continue
logger.info(await response.json())
if element := (await response.json())["itemListElement"]:
result = element[0]["result"]
if ((desc := result.get("description")) and any(desc.find(item) != -1 for item in descHubo)) or \
((_ := result.get('detailedDescription')) and (body := result['detailedDescription'].get("articleBody")) and any(body.find(item) != -1 for item in bodyHubo)):
logger.info("found korean_name: %s", result["name"])
return {query: result["name"]}
logger.info("Not found %s", query)
return {query: ""}
async def __run(self):
try:
logger.info("start append task")
chs = self.load_character()
for charactor_key in chs:
if chs[charactor_key] != '':
logger.info(chs[charactor_key])
continue
self._req_list.append(self.request(charactor_key))
logger.info("Complete append task")
logger.info("Start Tasks")
# TODO: more faster
count = 0
total = len(self._req_list)
for req in self._req_list:
count += 1
logger.info("%s/%s", count, total)
self.crawled_dict.update(await req)
finally:
logger.info("close clientsession")
await self.session.close()
with open("acrawled.json", "w") as f:
logger.info("Saving...")
f.write(dumps(self.crawled_dict))
logger.info("Done!")
def run(self):
logger.info("Starting")
run(self.__run())
logger.info("Done.")
Request("AIzaSyCQYJvJzWy-iUHCi9C6E8o1RpOY5K4ae-8").run()