Skip to content

Commit

Permalink
Add basic command line interface
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Jun 20, 2023
1 parent 389fc32 commit 33bed59
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 0.5.7 - Unreleased
- Added parallel processing of ISCC-UNITs
- Handle video thumbnail extraction errors gracefully
- Add basic command line interface
- Updated dependencies
- Fixed mkdocstrings

Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ import iscc_sdk as idk
print(idk.code_iscc("/path/to/mediafile.jpg"))
```

Using the CLI tool to create an ISCC-CODE

```shell
idk create /path/to/mediafile.jpg
```

Batch creation of ISCC-CODEs

```shell
idk batch /folder_with_media_files
```

## Documentation

<https://sdk.iscc.codes>
Expand Down
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 0.5.7 - Unreleased
- Added parallel processing of ISCC-UNITs
- Handle video thumbnail extraction errors gracefully
- Add basic command line interface
- Updated dependencies
- Fixed mkdocstrings

Expand Down
12 changes: 12 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ import iscc_sdk as idk
print(idk.code_iscc("/path/to/mediafile.jpg"))
```

Using the CLI tool to create an ISCC-CODE

```shell
idk create /path/to/mediafile.jpg
```

Batch creation of ISCC-CODEs

```shell
idk batch /folder_with_media_files
```

## Documentation

<https://sdk.iscc.codes>
Expand Down
146 changes: 146 additions & 0 deletions iscc_sdk/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import os
from concurrent.futures import ProcessPoolExecutor, as_completed
from typing import Iterator, Optional, Tuple
from loguru import logger as log
import typer
from pathlib import Path
import iscc_sdk as idk
from rich.console import Console
from rich.progress import (
Progress,
BarColumn,
TextColumn,
TransferSpeedColumn,
TimeRemainingColumn,
DownloadColumn,
)

console = Console()
app = typer.Typer(add_completion=False, no_args_is_help=True)


def log_formatter(record: dict) -> str: # pragma: no cover
"""Log message formatter"""
color_map = {
"TRACE": "blue",
"DEBUG": "cyan",
"INFO": "bold",
"SUCCESS": "bold green",
"WARNING": "yellow",
"ERROR": "bold red",
"CRITICAL": "bold white on red",
}
lvl_color = color_map.get(record["level"].name, "cyan")
return (
"[not bold green]{time:YYYY/MM/DD HH:mm:ss}[/not bold green] | {module:<12} | {level.icon}"
+ f" - [{lvl_color}]{{message}}[/{lvl_color}]"
)


def iter_unprocessed(path, root_path=None):
# type: (str|Path, Optional[str|Path]) -> Iterator[Tuple[Path, int]]
"""
Walk directory tree recursively with deterministic ordering and yield tuples of file metadata.
Metadata = (relpath, size)
- path: pathlib.Path object
- size: integer file size in number of bytes
File-entries are yielded in reproducible and deterministic order (bottom-up). Symlink and
processed files are ignored silently.
Implementation Note: We use os.scandir to reduce the number of syscalls for metadata collection.
"""
root_path = Path(root_path or path)
with os.scandir(path) as entries:
# Sort the entries
sorted_entries = sorted(entries, key=lambda e: e.name)

# Separate directories and files
dirs = [entry for entry in sorted_entries if entry.is_dir()]
files = [entry for entry in sorted_entries if entry.is_file()]

# Recursively process directories first (bottom-up traversal)
for dir_entry in dirs:
yield from iter_unprocessed(Path(dir_entry.path), root_path=root_path)

# Process files in the current directory
for file_entry in files:
file_path = Path(file_entry)
# Ignore result files
if file_path.name.endswith(".iscc.json"):
continue
# Ignore files that have results
if Path(file_path.as_posix() + ".iscc.json").exists():
continue
file_size = file_entry.stat().st_size
yield file_path, file_size


def process_file(fp: Path):
try:
return fp, idk.code_iscc(fp.as_posix())
except Exception as e:
return fp, e


@app.command()
def create(file: Path):
"""Create ISCC-CODE for single FILE."""
if file.is_file() and file.exists():
result = idk.code_iscc(file.as_posix())
typer.echo(result.json(indent=2))
else:
typer.echo(f"Invalid file path {file}")
raise typer.Exit(code=1)


@app.command()
def batch(folder: Path, workers: int = os.cpu_count()):
"""Create ISCC-CODEs for files in FOLDER (parallel & recursive)."""
log.add(console.print, level="TRACE", format=log_formatter, colorize=True)
if not folder.is_dir() or not folder.exists():
typer.echo(f"Invalid folder {folder}")
raise typer.Exit(1)

file_paths = []
file_sizes = []
for path, size in iter_unprocessed(folder):
file_paths.append(path)
file_sizes.append(size)

file_sizes_dict = {path: size for path, size in zip(file_paths, file_sizes)}
total_size = sum(file_sizes)
progress = Progress(
TextColumn("[bold blue]Processing {task.fields[dirname]}", justify="right"),
BarColumn(),
"[progress.percentage]{task.percentage:>3.1f}%",
"•",
DownloadColumn(),
"•",
TransferSpeedColumn(),
"•",
TimeRemainingColumn(),
console=console,
)

with progress:
task_id = progress.add_task("Processing", dirname=folder.name, total=total_size)

with ProcessPoolExecutor(max_workers=workers) as executor:
futures = [executor.submit(process_file, fp) for fp in file_paths]
for future in as_completed(futures):
fp, iscc_meta = future.result()
if isinstance(iscc_meta, idk.IsccMeta):
out_path = Path(fp.as_posix() + ".iscc.json")
with out_path.open(mode="wt") as outf:
outf.write(iscc_meta.json(indent=2))
log.debug(f"Finished {fp.name}")
else: # pragma: no cover
log.warning(f"Failed {fp.name}: {iscc_meta}")
progress.update(task_id, advance=file_sizes_dict[fp], refresh=True)


if __name__ == "__main__": # pragma: no cover
app()
93 changes: 93 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from pathlib import Path
from typing import Tuple

from typer.testing import CliRunner
from iscc_sdk.cli import app
import iscc_samples as iss
import json


runner = CliRunner()


def test_iter_unprocessed():
from iscc_sdk.cli import iter_unprocessed

files = list(iter_unprocessed(iss.audios()[0].parent))
assert isinstance(files[0], Tuple)
assert isinstance(files[0][0], Path)
assert isinstance(files[0][1], int)
assert len(files) == 10


def test_process_file(jpg_file):
from iscc_sdk.cli import process_file

fp, iscc_meta = process_file(Path(jpg_file))
assert fp == Path(jpg_file)
assert iscc_meta.iscc == "ISCC:KECWRY3VY6R5SNV4YNBTBHR4T2HGP3HKVFO7TYUP2BKVFG724W63HVI"


def test_process_file_error():
from iscc_sdk.cli import process_file

fp, iscc_meta = process_file(Path("does-not-exist"))
assert fp == Path("does-not-exist")
assert isinstance(iscc_meta, Exception)


def test_cli_no_arg():
result = runner.invoke(app)
assert result.exit_code == 0
assert "Usage" in result.stdout


def test_cli_create_no_arg():
result = runner.invoke(app, ["create"])
assert result.exit_code == 2
assert "Missing argument 'FILE'" in result.stdout


def test_cli_create_not_file():
result = runner.invoke(app, ["create", "not-a-file"])
assert result.exit_code == 1
assert "Invalid file path" in result.stdout


def test_cli_create():
result = runner.invoke(app, ["create", iss.audios(ext="mp3")[0].as_posix()])
assert result.exit_code == 0
assert json.loads(result.stdout) == {
"@context": "http://purl.org/iscc/context/0.4.0.jsonld",
"$schema": "http://purl.org/iscc/schema/0.4.0.json",
"@type": "AudioObject",
"iscc": "ISCC:KIC2JKSX7OH5PBIENISKEJTS4TRKHYJBCZDNLQXYILWJHQAP3N3KPTQ",
"name": "Belly Button",
"mode": "audio",
"filename": "demo.mp3",
"filesize": 225707,
"mediatype": "audio/mpeg",
"duration": 15,
"metahash": "1e20c4933dc8c03ea58568159a1cbfb04132c7db93b6b4cd025ffd4db37f52a4756f",
"datahash": "1e20ec93c00fdb76a7cec587e4a2bddfa8d0a0bac8110d0c7130c351ea07c366d626",
}


def test_cli_batch_no_arg():
result = runner.invoke(app, ["batch"])
assert result.exit_code == 2
assert "Missing argument 'FOLDER'" in result.stdout


def test_cli_batch_not_a_folder():
result = runner.invoke(app, ["batch", "not-a-folder"])
assert result.exit_code == 1
assert "Invalid folder" in result.stdout


def test_cli_batch(image_dir):
from iscc_sdk.cli import iter_unprocessed

result = runner.invoke(app, ["batch", image_dir.as_posix()])
assert result.exit_code == 0
assert list(iter_unprocessed(image_dir)) == []

0 comments on commit 33bed59

Please sign in to comment.