Skip to content

Commit

Permalink
Support concurrent audio metadata extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Jun 20, 2023
1 parent af4c0f4 commit 961e0f4
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Added parallel processing of ISCC-UNITs
- Handle video thumbnail extraction errors gracefully
- Add basic command line interface
- Support concurrent audio metadata extraction
- Updated dependencies
- Fixed mkdocstrings

Expand Down
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Added parallel processing of ISCC-UNITs
- Handle video thumbnail extraction errors gracefully
- Add basic command line interface
- Support concurrent audio metadata extraction
- Updated dependencies
- Fixed mkdocstrings

Expand Down
1 change: 1 addition & 0 deletions iscc_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from iscc_sdk.epub import *
from iscc_sdk.docx_ import *
from iscc_sdk.thumbnail import *
from iscc_sdk.utils import *
29 changes: 20 additions & 9 deletions iscc_sdk/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,27 @@ def audio_meta_extract(fp):
:return: Metadata mapped to IsccMeta schema
:rtype: dict
"""
try:
obj = taglib.File(fp)
except OSError as e:
log.error(f"Failed metadata extraction for {basename(fp)}: {e}")
return {}
meta = dict(obj.tags)
mapped = dict()
done = set()

try:
obj = taglib.File(fp)
meta = dict(obj.tags)
mapped["duration"] = obj.length
obj.close()
except OSError as e: # pragma: no cover
# This is a workaround for the issue that taglib requires exclusive access even for reading.
log.warning(f"Create tempfile for taglib access {basename(fp)}: {e}")
try:
with idk.TempFile(fp) as tmp_path:
obj = taglib.File(tmp_path.as_posix())
meta = dict(obj.tags)
mapped["duration"] = obj.length
obj.close()
except Exception as e:
log.warning(f"Failed metadata extraction for {basename(fp)}: {e}")
return mapped

for tag, mapped_field in AUDIO_META_MAP.items():
if mapped_field in done:
continue
Expand All @@ -91,12 +104,10 @@ def audio_meta_extract(fp):
log.debug(f"Mapping audio metadata: {tag} -> {mapped_field} -> {value[0]}")
mapped[mapped_field] = value[0]
done.add(mapped_field)
mapped["duration"] = obj.length
# Todo - add bitrate, channels, samplerate to iscc-schema
# mapped["bitrate"] = obj.bitrate
# mapped["channels"] = obj.channels
# mapped["samplerate"] = obj.sampleRate
obj.close()
return mapped


Expand All @@ -107,7 +118,7 @@ def audio_meta_embed(fp, meta):
:param str fp: Filepath to source audio file
:param IsccMeta meta: Metadata to embed into audio file
:return: Filepath to new video file with updated metadata
:return: Filepath to new audio file with updated metadata
:rtype: str
"""
tdir = tempfile.mkdtemp()
Expand Down
25 changes: 25 additions & 0 deletions iscc_sdk/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import shutil
import tempfile
from pathlib import Path


__all__ = [
"TempFile",
]


class TempFile:
def __init__(self, original_path):
# type: (str|Path) -> None
self.original_path = Path(original_path)
self.temp_dir = None

def __enter__(self):
# type: () -> Path
self.temp_dir = Path(tempfile.mkdtemp())
temp_filename = self.temp_dir / self.original_path.name
shutil.copy2(self.original_path, temp_filename)
return temp_filename

def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.temp_dir)
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def epub_file(tmp_path_factory):
return dst.as_posix()


@pytest.fixture(scope="session")
@pytest.fixture(scope="module")
def asset_tree(tmp_path_factory):
src = images()[0].parent
dst = tmp_path_factory.mktemp("tree")
Expand Down
11 changes: 9 additions & 2 deletions tests/test_audio.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
import os.path

from PIL.Image import Image

import iscc_sdk as idk
import iscc_samples as iss

Expand All @@ -23,6 +21,15 @@ def test_audio_meta_extract(mp3_file):
}


def test_audio_meta_extract_concurrent(mp3_file):
with open(mp3_file, "rb") as infile:
data = infile.read(64)
assert idk.audio_meta_extract(mp3_file) == {
"name": "Belly Button",
"duration": 15,
}


def test_audio_meta_extract_all():
for fp in iss.audios():
metadata = idk.audio_meta_extract(fp.as_posix())
Expand Down
8 changes: 3 additions & 5 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,15 @@ def test_code_iscc_image(jpg_file):
}


def test_code_iscc_audio():
from iscc_samples import audios

assert idk.code_iscc(audios("mp3")[0].as_posix()).dict() == {
def test_code_iscc_audio(mp3_file):
assert idk.code_iscc(mp3_file).dict() == {
"@type": "AudioObject",
"iscc": "ISCC:KIC2JKSX7OH5PBIENISKEJTS4TRKHYJBCZDNLQXYILWJHQAP3N3KPTQ",
"name": "Belly Button",
"datahash": "1e20ec93c00fdb76a7cec587e4a2bddfa8d0a0bac8110d0c7130c351ea07c366d626",
"duration": 15,
"filesize": 225707,
"filename": "demo.mp3",
"filename": "audio.mp3",
"mediatype": "audio/mpeg",
"metahash": "1e20c4933dc8c03ea58568159a1cbfb04132c7db93b6b4cd025ffd4db37f52a4756f",
"mode": "audio",
Expand Down
8 changes: 8 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
import iscc_sdk as idk


def test_tempfile(jpg_file):
with idk.TempFile(jpg_file) as tf:
assert tf.exists()
assert not tf.exists()

0 comments on commit 961e0f4

Please sign in to comment.