-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import os | ||
from concurrent.futures import ProcessPoolExecutor, as_completed | ||
from typing import Iterator, Optional, Tuple | ||
from loguru import logger as log | ||
import typer | ||
from pathlib import Path | ||
import iscc_sdk as idk | ||
from rich.console import Console | ||
from rich.progress import ( | ||
Progress, | ||
BarColumn, | ||
TextColumn, | ||
TransferSpeedColumn, | ||
TimeRemainingColumn, | ||
DownloadColumn, | ||
) | ||
|
||
console = Console() | ||
app = typer.Typer(add_completion=False, no_args_is_help=True) | ||
|
||
|
||
def log_formatter(record: dict) -> str: # pragma: no cover | ||
"""Log message formatter""" | ||
color_map = { | ||
"TRACE": "blue", | ||
"DEBUG": "cyan", | ||
"INFO": "bold", | ||
"SUCCESS": "bold green", | ||
"WARNING": "yellow", | ||
"ERROR": "bold red", | ||
"CRITICAL": "bold white on red", | ||
} | ||
lvl_color = color_map.get(record["level"].name, "cyan") | ||
return ( | ||
"[not bold green]{time:YYYY/MM/DD HH:mm:ss}[/not bold green] | {module:<12} | {level.icon}" | ||
+ f" - [{lvl_color}]{{message}}[/{lvl_color}]" | ||
) | ||
|
||
|
||
def iter_unprocessed(path, root_path=None): | ||
# type: (str|Path, Optional[str|Path]) -> Iterator[Tuple[Path, int]] | ||
""" | ||
Walk directory tree recursively with deterministic ordering and yield tuples of file metadata. | ||
Metadata = (relpath, size) | ||
- path: pathlib.Path object | ||
- size: integer file size in number of bytes | ||
File-entries are yielded in reproducible and deterministic order (bottom-up). Symlink and | ||
processed files are ignored silently. | ||
Implementation Note: We use os.scandir to reduce the number of syscalls for metadata collection. | ||
""" | ||
root_path = Path(root_path or path) | ||
with os.scandir(path) as entries: | ||
# Sort the entries | ||
sorted_entries = sorted(entries, key=lambda e: e.name) | ||
|
||
# Separate directories and files | ||
dirs = [entry for entry in sorted_entries if entry.is_dir()] | ||
files = [entry for entry in sorted_entries if entry.is_file()] | ||
|
||
# Recursively process directories first (bottom-up traversal) | ||
for dir_entry in dirs: | ||
yield from iter_unprocessed(Path(dir_entry.path), root_path=root_path) | ||
|
||
# Process files in the current directory | ||
for file_entry in files: | ||
file_path = Path(file_entry) | ||
# Ignore result files | ||
if file_path.name.endswith(".iscc.json"): | ||
continue | ||
# Ignore files that have results | ||
if Path(file_path.as_posix() + ".iscc.json").exists(): | ||
continue | ||
file_size = file_entry.stat().st_size | ||
yield file_path, file_size | ||
|
||
|
||
def process_file(fp: Path): | ||
try: | ||
return fp, idk.code_iscc(fp.as_posix()) | ||
except Exception as e: | ||
return fp, e | ||
|
||
|
||
@app.command() | ||
def create(file: Path): | ||
"""Create ISCC-CODE for single FILE.""" | ||
if file.is_file() and file.exists(): | ||
result = idk.code_iscc(file.as_posix()) | ||
typer.echo(result.json(indent=2)) | ||
else: | ||
typer.echo(f"Invalid file path {file}") | ||
raise typer.Exit(code=1) | ||
|
||
|
||
@app.command() | ||
def batch(folder: Path, workers: int = os.cpu_count()): | ||
"""Create ISCC-CODEs for files in FOLDER (parallel & recursive).""" | ||
log.add(console.print, level="TRACE", format=log_formatter, colorize=True) | ||
if not folder.is_dir() or not folder.exists(): | ||
typer.echo(f"Invalid folder {folder}") | ||
raise typer.Exit(1) | ||
|
||
file_paths = [] | ||
file_sizes = [] | ||
for path, size in iter_unprocessed(folder): | ||
file_paths.append(path) | ||
file_sizes.append(size) | ||
|
||
file_sizes_dict = {path: size for path, size in zip(file_paths, file_sizes)} | ||
total_size = sum(file_sizes) | ||
progress = Progress( | ||
TextColumn("[bold blue]Processing {task.fields[dirname]}", justify="right"), | ||
BarColumn(), | ||
"[progress.percentage]{task.percentage:>3.1f}%", | ||
"•", | ||
DownloadColumn(), | ||
"•", | ||
TransferSpeedColumn(), | ||
"•", | ||
TimeRemainingColumn(), | ||
console=console, | ||
) | ||
|
||
with progress: | ||
task_id = progress.add_task("Processing", dirname=folder.name, total=total_size) | ||
|
||
with ProcessPoolExecutor(max_workers=workers) as executor: | ||
futures = [executor.submit(process_file, fp) for fp in file_paths] | ||
for future in as_completed(futures): | ||
fp, iscc_meta = future.result() | ||
if isinstance(iscc_meta, idk.IsccMeta): | ||
out_path = Path(fp.as_posix() + ".iscc.json") | ||
with out_path.open(mode="wt") as outf: | ||
outf.write(iscc_meta.json(indent=2)) | ||
log.debug(f"Finished {fp.name}") | ||
else: # pragma: no cover | ||
log.warning(f"Failed {fp.name}: {iscc_meta}") | ||
progress.update(task_id, advance=file_sizes_dict[fp], refresh=True) | ||
|
||
|
||
if __name__ == "__main__": # pragma: no cover | ||
app() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from pathlib import Path | ||
from typing import Tuple | ||
|
||
from typer.testing import CliRunner | ||
from iscc_sdk.cli import app | ||
import iscc_samples as iss | ||
import json | ||
|
||
|
||
runner = CliRunner() | ||
|
||
|
||
def test_iter_unprocessed(): | ||
from iscc_sdk.cli import iter_unprocessed | ||
|
||
files = list(iter_unprocessed(iss.audios()[0].parent)) | ||
assert isinstance(files[0], Tuple) | ||
assert isinstance(files[0][0], Path) | ||
assert isinstance(files[0][1], int) | ||
assert len(files) == 10 | ||
|
||
|
||
def test_process_file(jpg_file): | ||
from iscc_sdk.cli import process_file | ||
|
||
fp, iscc_meta = process_file(Path(jpg_file)) | ||
assert fp == Path(jpg_file) | ||
assert iscc_meta.iscc == "ISCC:KECWRY3VY6R5SNV4YNBTBHR4T2HGP3HKVFO7TYUP2BKVFG724W63HVI" | ||
|
||
|
||
def test_process_file_error(): | ||
from iscc_sdk.cli import process_file | ||
|
||
fp, iscc_meta = process_file(Path("does-not-exist")) | ||
assert fp == Path("does-not-exist") | ||
assert isinstance(iscc_meta, Exception) | ||
|
||
|
||
def test_cli_no_arg(): | ||
result = runner.invoke(app) | ||
assert result.exit_code == 0 | ||
assert "Usage" in result.stdout | ||
|
||
|
||
def test_cli_create_no_arg(): | ||
result = runner.invoke(app, ["create"]) | ||
assert result.exit_code == 2 | ||
assert "Missing argument 'FILE'" in result.stdout | ||
|
||
|
||
def test_cli_create_not_file(): | ||
result = runner.invoke(app, ["create", "not-a-file"]) | ||
assert result.exit_code == 1 | ||
assert "Invalid file path" in result.stdout | ||
|
||
|
||
def test_cli_create(): | ||
result = runner.invoke(app, ["create", iss.audios(ext="mp3")[0].as_posix()]) | ||
assert result.exit_code == 0 | ||
assert json.loads(result.stdout) == { | ||
"@context": "http://purl.org/iscc/context/0.4.0.jsonld", | ||
"$schema": "http://purl.org/iscc/schema/0.4.0.json", | ||
"@type": "AudioObject", | ||
"iscc": "ISCC:KIC2JKSX7OH5PBIENISKEJTS4TRKHYJBCZDNLQXYILWJHQAP3N3KPTQ", | ||
"name": "Belly Button", | ||
"mode": "audio", | ||
"filename": "demo.mp3", | ||
"filesize": 225707, | ||
"mediatype": "audio/mpeg", | ||
"duration": 15, | ||
"metahash": "1e20c4933dc8c03ea58568159a1cbfb04132c7db93b6b4cd025ffd4db37f52a4756f", | ||
"datahash": "1e20ec93c00fdb76a7cec587e4a2bddfa8d0a0bac8110d0c7130c351ea07c366d626", | ||
} | ||
|
||
|
||
def test_cli_batch_no_arg(): | ||
result = runner.invoke(app, ["batch"]) | ||
assert result.exit_code == 2 | ||
assert "Missing argument 'FOLDER'" in result.stdout | ||
|
||
|
||
def test_cli_batch_not_a_folder(): | ||
result = runner.invoke(app, ["batch", "not-a-folder"]) | ||
assert result.exit_code == 1 | ||
assert "Invalid folder" in result.stdout | ||
|
||
|
||
def test_cli_batch(image_dir): | ||
from iscc_sdk.cli import iter_unprocessed | ||
|
||
result = runner.invoke(app, ["batch", image_dir.as_posix()]) | ||
assert result.exit_code == 0 | ||
assert list(iter_unprocessed(image_dir)) == [] |