Skip to content

Commit

Permalink
complete basic functions
Browse files Browse the repository at this point in the history
  • Loading branch information
yixinBC committed Jun 20, 2023
1 parent 4855546 commit ed3e04e
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
build/
dist/
.venv/
.vscode/
*.egg-info/
__pycache__/
*.py[cod]
Expand Down
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
# aria2_dht_dat_parser

parse aria2's dht.dat into human-readable json

## Install

```bash
pip install aria2-dht-dat-parser
```

## Usage

use as a cli tool

```bash
aria2-dht-dat-parser -i dht.dat [-o dht.json]
```

use as a python module

```python
from aria2_dht_dat_parser import parse
result = parse('dht.dat')
```

## Example

```json
{
"format_id": 2,
"version": 3,
"file_saved_time": 1620000000,
"local_node_id": "x"*20,
"num_node": 100,
"nodes": [
{
"ip": "127.0.0.1",
"port": 6881,
"id": "x"*20
}, ...(in total 100 nodes)
]
}
```

## License

MIT
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
name = "aria2_dht_dat_parser"
version = "0.1.0"
description = "parse aria2's dht.dat into human-readable json"
authors = [
{name = "yixinBC", email = "[email protected]"},
]
authors = [{ name = "yixinBC", email = "[email protected]" }]
dependencies = []
requires-python = ">=3.7"
readme = "README.md"
license = {text = "MIT"}
license = { text = "MIT" }

[project.scripts]
aria2_dht_dat_parser = "aria2_dht_dat_parser.__main__:main"

[build-system]
requires = ["pdm-backend"]
Expand Down
1 change: 1 addition & 0 deletions src/aria2_dht_dat_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .parser import parse
18 changes: 18 additions & 0 deletions src/aria2_dht_dat_parser/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import argparse
import sys
import json
from .parser import parse


def main():
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-i", help="input dht.dat file", required=True)
arg_parser.add_argument(
"-o", help="output file", type=argparse.FileType("w"), default=sys.stdout
)
args = arg_parser.parse_args()
args.o.write(json.dumps(parse(args.i), indent=2))


if __name__ == "__main__":
main()
73 changes: 73 additions & 0 deletions src/aria2_dht_dat_parser/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
from socket import inet_ntop, AF_INET, AF_INET6


def parse(path: str) -> dict:
"""
[WARNING] This function is not tested on IPv6 addresses.
Parses an Aria2 DHT file and returns a dictionary containing its contents.
Args:
path (str): The path to the Aria2 DHT file.
Returns:
dict: A json-liked dictionary containing the parsed contents of the Aria2 DHT file.
It's structure is as follows:
{
"format_id": 2,
"version": 3,
"file_saved_time": 1620000000,
"local_node_id": "x"*20,
"num_node": 100,
"nodes": [
{
"ip": "127.0.0.1",
"port": 6881,
"id": "x"*20
}, ...(in total 100 nodes)
]
}
Raises:
ValueError: If the file has an invalid magic header.
FileNotFoundError: If the file does not exist.
AssertionError: If the file has an invalid format id or version.
AssertionError: If the file has an invalid node length.
"""
result = {}
if os.path.isfile(path):
with open(path, "rb") as f:
magic_header = f.read(2)
if magic_header != b"\xA1\xA2":
raise ValueError("Invalid magic header")
result["format_id"] = int.from_bytes(f.read(1), "big")
assert result["format_id"] == 2
f.read(3) # reserved bytes (3 bytes)
result["version"] = int.from_bytes(f.read(2), "big")
assert result["version"] == 3
result["file_saved_time"] = int.from_bytes(f.read(8), "big")
f.read(8) # reserved bytes (8 bytes)
result["local_node_id"] = f.read(20).hex()
f.read(4) # reserved bytes (4 bytes)
result["num_node"] = int.from_bytes(f.read(4), "big")
f.read(4) # reserved bytes (4 bytes)
result["nodes"] = []
for _ in range(result["num_node"]):
PLEN = int.from_bytes(f.read(1), "big")
assert PLEN == 6 or PLEN == 18
f.read(7) # reserved bytes (7 bytes)
if PLEN == 6:
node_ip = inet_ntop(AF_INET, f.read(4))
node_port = int.from_bytes(f.read(2), "big")
else:
node_ip = inet_ntop(AF_INET6, f.read(16)) # IPv6 not tested
node_port = int.from_bytes(f.read(2), "big")
f.read(24 - PLEN) # reserved bytes (24-PLEN bytes)
node_id = f.read(20).hex()
f.read(4) # reserved bytes (4 bytes)
result["nodes"].append(
{"ip": node_ip, "port": node_port, "id": node_id}
)
return result
else:
raise FileNotFoundError(f"File not found: {path}")

0 comments on commit ed3e04e

Please sign in to comment.