From ed3e04e5fee6d206f609584ca6b83939dea93574 Mon Sep 17 00:00:00 2001 From: yixinBC Date: Tue, 20 Jun 2023 21:32:43 +0800 Subject: [PATCH] complete basic functions --- .gitignore | 1 + README.md | 44 +++++++++++++++++ pyproject.toml | 9 ++-- src/aria2_dht_dat_parser/__init__.py | 1 + src/aria2_dht_dat_parser/__main__.py | 18 +++++++ src/aria2_dht_dat_parser/parser.py | 73 ++++++++++++++++++++++++++++ 6 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 src/aria2_dht_dat_parser/__init__.py create mode 100644 src/aria2_dht_dat_parser/__main__.py create mode 100644 src/aria2_dht_dat_parser/parser.py diff --git a/.gitignore b/.gitignore index f8f38ac..607bff5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build/ dist/ .venv/ +.vscode/ *.egg-info/ __pycache__/ *.py[cod] diff --git a/README.md b/README.md index eeeb2a5..75e48f7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,47 @@ # aria2_dht_dat_parser parse aria2's dht.dat into human-readable json + +## Install + +```bash +pip install aria2-dht-dat-parser +``` + +## Usage + +use as a cli tool + +```bash +aria2-dht-dat-parser -i dht.dat [-o dht.json] +``` + +use as a python module + +```python +from aria2_dht_dat_parser import parse +result = parse('dht.dat') +``` + +## Example + +```json +{ + "format_id": 2, + "version": 3, + "file_saved_time": 1620000000, + "local_node_id": "x"*20, + "num_node": 100, + "nodes": [ + { + "ip": "127.0.0.1", + "port": 6881, + "id": "x"*20 + }, ...(in total 100 nodes) + ] +} +``` + +## License + +MIT diff --git a/pyproject.toml b/pyproject.toml index e7f9f85..6aceb56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,13 +3,14 @@ name = "aria2_dht_dat_parser" version = "0.1.0" description = "parse aria2's dht.dat into human-readable json" -authors = [ - {name = "yixinBC", email = "yixinbc@foxmail.com"}, -] +authors = [{ name = "yixinBC", email = "yixinbc@foxmail.com" }] dependencies = [] requires-python = ">=3.7" readme = "README.md" -license = {text = "MIT"} +license = { text = "MIT" } + +[project.scripts] +aria2_dht_dat_parser = "aria2_dht_dat_parser.__main__:main" [build-system] requires = ["pdm-backend"] diff --git a/src/aria2_dht_dat_parser/__init__.py b/src/aria2_dht_dat_parser/__init__.py new file mode 100644 index 0000000..17c35ad --- /dev/null +++ b/src/aria2_dht_dat_parser/__init__.py @@ -0,0 +1 @@ +from .parser import parse diff --git a/src/aria2_dht_dat_parser/__main__.py b/src/aria2_dht_dat_parser/__main__.py new file mode 100644 index 0000000..df2ab11 --- /dev/null +++ b/src/aria2_dht_dat_parser/__main__.py @@ -0,0 +1,18 @@ +import argparse +import sys +import json +from .parser import parse + + +def main(): + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("-i", help="input dht.dat file", required=True) + arg_parser.add_argument( + "-o", help="output file", type=argparse.FileType("w"), default=sys.stdout + ) + args = arg_parser.parse_args() + args.o.write(json.dumps(parse(args.i), indent=2)) + + +if __name__ == "__main__": + main() diff --git a/src/aria2_dht_dat_parser/parser.py b/src/aria2_dht_dat_parser/parser.py new file mode 100644 index 0000000..136d7e0 --- /dev/null +++ b/src/aria2_dht_dat_parser/parser.py @@ -0,0 +1,73 @@ +import os +from socket import inet_ntop, AF_INET, AF_INET6 + + +def parse(path: str) -> dict: + """ + [WARNING] This function is not tested on IPv6 addresses. + Parses an Aria2 DHT file and returns a dictionary containing its contents. + + Args: + path (str): The path to the Aria2 DHT file. + + Returns: + dict: A json-liked dictionary containing the parsed contents of the Aria2 DHT file. + It's structure is as follows: + { + "format_id": 2, + "version": 3, + "file_saved_time": 1620000000, + "local_node_id": "x"*20, + "num_node": 100, + "nodes": [ + { + "ip": "127.0.0.1", + "port": 6881, + "id": "x"*20 + }, ...(in total 100 nodes) + ] + } + + Raises: + ValueError: If the file has an invalid magic header. + FileNotFoundError: If the file does not exist. + AssertionError: If the file has an invalid format id or version. + AssertionError: If the file has an invalid node length. + """ + result = {} + if os.path.isfile(path): + with open(path, "rb") as f: + magic_header = f.read(2) + if magic_header != b"\xA1\xA2": + raise ValueError("Invalid magic header") + result["format_id"] = int.from_bytes(f.read(1), "big") + assert result["format_id"] == 2 + f.read(3) # reserved bytes (3 bytes) + result["version"] = int.from_bytes(f.read(2), "big") + assert result["version"] == 3 + result["file_saved_time"] = int.from_bytes(f.read(8), "big") + f.read(8) # reserved bytes (8 bytes) + result["local_node_id"] = f.read(20).hex() + f.read(4) # reserved bytes (4 bytes) + result["num_node"] = int.from_bytes(f.read(4), "big") + f.read(4) # reserved bytes (4 bytes) + result["nodes"] = [] + for _ in range(result["num_node"]): + PLEN = int.from_bytes(f.read(1), "big") + assert PLEN == 6 or PLEN == 18 + f.read(7) # reserved bytes (7 bytes) + if PLEN == 6: + node_ip = inet_ntop(AF_INET, f.read(4)) + node_port = int.from_bytes(f.read(2), "big") + else: + node_ip = inet_ntop(AF_INET6, f.read(16)) # IPv6 not tested + node_port = int.from_bytes(f.read(2), "big") + f.read(24 - PLEN) # reserved bytes (24-PLEN bytes) + node_id = f.read(20).hex() + f.read(4) # reserved bytes (4 bytes) + result["nodes"].append( + {"ip": node_ip, "port": node_port, "id": node_id} + ) + return result + else: + raise FileNotFoundError(f"File not found: {path}")