-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdump.py
69 lines (54 loc) · 2.74 KB
/
dump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import gzip
from io import BytesIO
from datetime import datetime, timedelta
import xml.etree.ElementTree as ET
from urllib.request import urlopen, Request
import json
todays_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
def download_and_save_xml(url, output_file, approved_keys, iter_str):
try:
with urlopen(Request(url, headers={'User-Agent': 'Kractero'})) as response:
if response.status == 200:
with gzip.GzipFile(fileobj=BytesIO(response.read())) as gzipped_file:
xml_text = gzipped_file.read()
root = ET.fromstring(xml_text)
for iterable in root.iter(iter_str):
for element in list(iterable):
if element.tag not in approved_keys:
iterable.remove(element)
filtered_xml_text = ET.tostring(root)
with open(output_file, 'wb') as xml_file:
xml_file.write(filtered_xml_text)
print(f'XML file downloaded and saved successfully: {output_file}')
else:
print(f'Failed to fetch dump from NationStates with status {response.status}')
except Exception as e:
print(f'An error occurred: {e}')
def main():
todays_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
download_and_save_xml('https://www.nationstates.net/pages/regions.xml.gz', f'data/{todays_date}-Regions.xml', ("NAME", "NUMNATIONS", "UNNATIONS", "DELEGATEVOTES", "DELEGATEAUTH", "LASTUPDATE", "FACTBOOK", "EMBASSIES"), "REGION")
download_and_save_xml('https://www.nationstates.net/pages/nations.xml.gz', f'data/{todays_date}-Nations.xml', ("NAME", "DBID", "ENDORSEMENTS"), "NATION")
previous_date = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')
with open('files/card_ids.txt', 'r') as file:
card_ids = set(line.strip() for line in file if line.strip())
tree = ET.parse(f'data/{todays_date}-Nations.xml')
root = tree.getroot()
ids_in_xml = set()
for nation in root.iter('NATION'):
name = nation.find('DBID').text.lower()
ids_in_xml.add(name)
cards = {}
for id in card_ids:
cards[id] = False if id.lower() in ids_in_xml else True
with open(f'data/{todays_date}-cards.json', 'w') as json_file:
json.dump(cards, json_file, indent=2)
remove_suffix = ['-Nations.xml', '-Regions.xml', '-cards.json']
xml_files = os.listdir('data')
for xml_file in xml_files:
if any(xml_file.endswith(suffix) for suffix in remove_suffix):
file_date = xml_file.split('-R')[0]
if file_date < previous_date:
os.remove(os.path.join('data', xml_file))
if __name__ == "__main__":
main()