-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlast_ranked.py
126 lines (91 loc) · 3.81 KB
/
last_ranked.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
import asyncio
from aiohttp import ClientSession
import typing as t
import json
from pathlib import Path
from main import fetch_leaders, BROWSER_USER_AGENT, DELAY, METRICS, LOGGER, Metric, MetricLeader, submit_updates
#########################################################
# START Configuration
#########################################################
PAGE_SKIP = 1_000
""" The amount of pages to skip when searching for new bounds """
MAX_PAGE = 20_000 # Last league ended with 14_184 pages.
""" The absolute last possible page of any metric """
LAST_PAGES_FILE = "last_pages.json"
""" The file where all previous last pages are stored """
#########################################################
# END Configuration
#########################################################
LEAGUES_ONLY: t.Final[t.List[Metric]] = [
Metric("League Points", 0, 1)
]
async def binary_search(session: ClientSession, metric: Metric, low: int = 1, high: int = MAX_PAGE) -> t.Tuple[int, str]:
""" Finds the last page and player for a hiscore metric using binary search """
_low = low
_high = high
last_page_with_data = low
last_player: MetricLeader
while _low <= _high:
mid = (_low + _high) // 2
board = await fetch_leaders(session, metric, mid)
if board[0].rank != 1:
last_page_with_data = mid
last_player = board[-1]
_low = mid + 1
else:
_high = mid - 1
await asyncio.sleep(DELAY)
return last_page_with_data, last_player
async def new_bounds(session: ClientSession, metric: Metric, low: int) -> t.Tuple[int, int]:
""" Finds new bounds to search within by using yesterday's bounds """
new_high = low + PAGE_SKIP
new_low = low
while True:
board = await fetch_leaders(session, metric, new_high)
if board[0].rank == 1:
return (new_low, new_high)
else:
new_high += PAGE_SKIP
new_low += PAGE_SKIP
async def find_last_players(session: ClientSession) -> t.List[MetricLeader]:
if (Path(LAST_PAGES_FILE).is_file()):
with open(LAST_PAGES_FILE, "r") as f:
last_pages = json.load(f)
else:
last_pages = {}
# The players ranked last in each metric
last_players: t.List[MetricLeader] = []
for metric in LEAGUES_ONLY:
LOGGER.info(f"Finding last player for {metric.name}.")
if metric.name in last_pages:
last_page = last_pages[metric.name]
if last_page == MAX_PAGE:
last_page_with_data, last_player = await binary_search(session, metric, last_page)
last_players.append(last_player)
continue
low, high = await new_bounds(session, metric, last_page)
last_page_with_data, last_player = await binary_search(session, metric, low, high)
else:
last_page_with_data, last_player = await binary_search(session, metric)
last_players.append(last_player)
last_pages[metric.name] = last_page_with_data
LOGGER.info(
f"Found last page of {metric.name} at page {last_page_with_data} and last player: {last_player.username}.")
# Write last pages to file for use at next scrape
data = json.dumps(last_pages, indent=4)
with open(LAST_PAGES_FILE, "w") as f:
LOGGER.info(f"Writing last pages to file...")
f.write(data)
return last_players
async def main() -> None:
LOGGER.info("*" * 64)
LOGGER.info("WOM Leagues Last Page Scraper starting...")
session = ClientSession(headers={"User-Agent": BROWSER_USER_AGENT})
last_players = await find_last_players(session)
await session.close()
LOGGER.info("Scrape complete")
await submit_updates(last_players)
LOGGER.info("*" * 64)
if __name__ == "__main__":
asyncio.run(main())