From 961e0f4aa51e87ed5fb3c8028126d3f2e0a6bc6e Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 20 Jun 2023 17:37:26 +0200 Subject: [PATCH] Support concurrent audio metadata extraction --- CHANGELOG.md | 1 + docs/changelog.md | 1 + iscc_sdk/__init__.py | 1 + iscc_sdk/audio.py | 29 ++++++++++++++++++++--------- iscc_sdk/utils.py | 25 +++++++++++++++++++++++++ tests/conftest.py | 2 +- tests/test_audio.py | 11 +++++++++-- tests/test_main.py | 8 +++----- tests/test_utils.py | 8 ++++++++ 9 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 iscc_sdk/utils.py create mode 100644 tests/test_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 594243f..5446d1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Added parallel processing of ISCC-UNITs - Handle video thumbnail extraction errors gracefully - Add basic command line interface +- Support concurrent audio metadata extraction - Updated dependencies - Fixed mkdocstrings diff --git a/docs/changelog.md b/docs/changelog.md index 594243f..5446d1b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -4,6 +4,7 @@ - Added parallel processing of ISCC-UNITs - Handle video thumbnail extraction errors gracefully - Add basic command line interface +- Support concurrent audio metadata extraction - Updated dependencies - Fixed mkdocstrings diff --git a/iscc_sdk/__init__.py b/iscc_sdk/__init__.py index f7f9725..0787b48 100644 --- a/iscc_sdk/__init__.py +++ b/iscc_sdk/__init__.py @@ -29,3 +29,4 @@ from iscc_sdk.epub import * from iscc_sdk.docx_ import * from iscc_sdk.thumbnail import * +from iscc_sdk.utils import * diff --git a/iscc_sdk/audio.py b/iscc_sdk/audio.py index 16f5cd0..0bfd580 100644 --- a/iscc_sdk/audio.py +++ b/iscc_sdk/audio.py @@ -75,14 +75,27 @@ def audio_meta_extract(fp): :return: Metadata mapped to IsccMeta schema :rtype: dict """ - try: - obj = taglib.File(fp) - except OSError as e: - log.error(f"Failed metadata extraction for {basename(fp)}: {e}") - return {} - meta = dict(obj.tags) mapped = dict() done = set() + + try: + obj = taglib.File(fp) + meta = dict(obj.tags) + mapped["duration"] = obj.length + obj.close() + except OSError as e: # pragma: no cover + # This is a workaround for the issue that taglib requires exclusive access even for reading. + log.warning(f"Create tempfile for taglib access {basename(fp)}: {e}") + try: + with idk.TempFile(fp) as tmp_path: + obj = taglib.File(tmp_path.as_posix()) + meta = dict(obj.tags) + mapped["duration"] = obj.length + obj.close() + except Exception as e: + log.warning(f"Failed metadata extraction for {basename(fp)}: {e}") + return mapped + for tag, mapped_field in AUDIO_META_MAP.items(): if mapped_field in done: continue @@ -91,12 +104,10 @@ def audio_meta_extract(fp): log.debug(f"Mapping audio metadata: {tag} -> {mapped_field} -> {value[0]}") mapped[mapped_field] = value[0] done.add(mapped_field) - mapped["duration"] = obj.length # Todo - add bitrate, channels, samplerate to iscc-schema # mapped["bitrate"] = obj.bitrate # mapped["channels"] = obj.channels # mapped["samplerate"] = obj.sampleRate - obj.close() return mapped @@ -107,7 +118,7 @@ def audio_meta_embed(fp, meta): :param str fp: Filepath to source audio file :param IsccMeta meta: Metadata to embed into audio file - :return: Filepath to new video file with updated metadata + :return: Filepath to new audio file with updated metadata :rtype: str """ tdir = tempfile.mkdtemp() diff --git a/iscc_sdk/utils.py b/iscc_sdk/utils.py new file mode 100644 index 0000000..6de0a74 --- /dev/null +++ b/iscc_sdk/utils.py @@ -0,0 +1,25 @@ +import shutil +import tempfile +from pathlib import Path + + +__all__ = [ + "TempFile", +] + + +class TempFile: + def __init__(self, original_path): + # type: (str|Path) -> None + self.original_path = Path(original_path) + self.temp_dir = None + + def __enter__(self): + # type: () -> Path + self.temp_dir = Path(tempfile.mkdtemp()) + temp_filename = self.temp_dir / self.original_path.name + shutil.copy2(self.original_path, temp_filename) + return temp_filename + + def __exit__(self, exc_type, exc_value, traceback): + shutil.rmtree(self.temp_dir) diff --git a/tests/conftest.py b/tests/conftest.py index 7b47c01..dae8b0a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -113,7 +113,7 @@ def epub_file(tmp_path_factory): return dst.as_posix() -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def asset_tree(tmp_path_factory): src = images()[0].parent dst = tmp_path_factory.mktemp("tree") diff --git a/tests/test_audio.py b/tests/test_audio.py index dab3e11..2d9213d 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- import os.path - from PIL.Image import Image - import iscc_sdk as idk import iscc_samples as iss @@ -23,6 +21,15 @@ def test_audio_meta_extract(mp3_file): } +def test_audio_meta_extract_concurrent(mp3_file): + with open(mp3_file, "rb") as infile: + data = infile.read(64) + assert idk.audio_meta_extract(mp3_file) == { + "name": "Belly Button", + "duration": 15, + } + + def test_audio_meta_extract_all(): for fp in iss.audios(): metadata = idk.audio_meta_extract(fp.as_posix()) diff --git a/tests/test_main.py b/tests/test_main.py index 27a8410..1fd75f7 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -35,17 +35,15 @@ def test_code_iscc_image(jpg_file): } -def test_code_iscc_audio(): - from iscc_samples import audios - - assert idk.code_iscc(audios("mp3")[0].as_posix()).dict() == { +def test_code_iscc_audio(mp3_file): + assert idk.code_iscc(mp3_file).dict() == { "@type": "AudioObject", "iscc": "ISCC:KIC2JKSX7OH5PBIENISKEJTS4TRKHYJBCZDNLQXYILWJHQAP3N3KPTQ", "name": "Belly Button", "datahash": "1e20ec93c00fdb76a7cec587e4a2bddfa8d0a0bac8110d0c7130c351ea07c366d626", "duration": 15, "filesize": 225707, - "filename": "demo.mp3", + "filename": "audio.mp3", "mediatype": "audio/mpeg", "metahash": "1e20c4933dc8c03ea58568159a1cbfb04132c7db93b6b4cd025ffd4db37f52a4756f", "mode": "audio", diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..340f682 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +import iscc_sdk as idk + + +def test_tempfile(jpg_file): + with idk.TempFile(jpg_file) as tf: + assert tf.exists() + assert not tf.exists()