Skip to content

Commit

Permalink
Add video feature extraction.
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Mar 20, 2022
1 parent d8343f6 commit e983811
Show file tree
Hide file tree
Showing 8 changed files with 869 additions and 63 deletions.
1 change: 1 addition & 0 deletions iscc_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@
from iscc_sdk.ipfs import *
from iscc_sdk.audio import *
from iscc_sdk.video import *
from iscc_sdk.mp7 import *
108 changes: 108 additions & 0 deletions iscc_sdk/mp7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
from dataclasses import dataclass
from fractions import Fraction
from functools import cache
from typing import Tuple, List
from bitarray import bitarray
import bitarray
from bitarray.util import ba2int
import numpy as np


__all__ = [
"read_mp7_signature",
]


SIGELEM_SIZE = 380


@dataclass
class Frame:
"""Represents an MP7 Frame Signature."""

vector: np.ndarray # 380 dimensional vector, range: 0..2
elapsed: Fraction # time elapsed since start of video
confidence: int # signature confidence, range: 0..255


@cache
def calc_byte_to_bit3():
# type: () -> np.ndarray
"""
Build lookup table.
:return: table to convert a 8bit value into five three-bit-values
:rtype: np.ndarray
"""
table_3_bit = np.zeros((256, 5), dtype=np.uint8)
for i in range(256):
div3 = 3 * 3 * 3 * 3
for iii in range(0, 5):
table_3_bit[i, iii] = (i // div3) % 3
div3 //= 3
return table_3_bit


def pop_bits(data_bits, pos, bits=32):
# type: (bitarray, int, int) -> Tuple[int, int]
"""
Take out 0/1 values and pack them again to an unsigned integer.
:param bitarray data_bits: 0/1 data
:param int pos: position in 0/1 data
:param int bits: number of bits (default 32)
:return: value, new position
:rtype: Tuple[int, int]
"""
chunk = data_bits[pos : pos + bits]
value = ba2int(chunk, signed=False)
pos += bits
return value, pos


def read_mp7_signature(byte_data):
# type: (bytes) -> List[Frame]
"""
Decode binary MP7 video signature.
:param bytes byte_data: Raw MP7 video signature (as extracted by ffmpeg)
:return: List of Frame Signatures
:rtype: List[Frame]
"""
table_3_bit = calc_byte_to_bit3()
data_bits = bitarray.bitarray()
data_bits.frombytes(byte_data)
pos = 0
pos += 129
num_of_frames, pos = pop_bits(data_bits, pos)
media_time_unit, pos = pop_bits(data_bits, pos, 16)
pos += 1 + 32 + 32
num_of_segments, pos = pop_bits(data_bits, pos)
pos += num_of_segments * (4 * 32 + 1 + 5 * 243)
pos += 1
frame_sigs_v = []
frame_sigs_c = []
frame_sigs_e = []
frame_sigs_tu = []
for i in range(num_of_frames):
pos += 1
raw_media_time, pos = pop_bits(data_bits, pos)
frame_confidence, pos = pop_bits(data_bits, pos, 8)
pos += 5 * 8
vec = np.zeros((SIGELEM_SIZE,), dtype=np.uint8)
p = 0
for ii in range(SIGELEM_SIZE // 5):
dat, pos = pop_bits(data_bits, pos, 8)
vec[p : p + 5] = table_3_bit[dat]
p += 5
frame_sigs_v.append(vec)
frame_sigs_e.append(raw_media_time)
frame_sigs_c.append(frame_confidence)
frame_sigs_tu.append(media_time_unit)

fsigs = []
r = (frame_sigs_v, frame_sigs_e, frame_sigs_c, frame_sigs_tu)
for v, e, c, tu in zip(*r):
fsigs.append(Frame(vector=v, elapsed=Fraction(e, tu), confidence=c))
return fsigs
7 changes: 7 additions & 0 deletions iscc_sdk/options.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""*SDK configuration and options*."""
from typing import Optional

from iscc_core.options import CoreOptions
from pydantic import Field

Expand Down Expand Up @@ -30,5 +32,10 @@ class SdkOptions(CoreOptions):
60, description="Thumbnail image compression setting (0-100)"
)

video_fps: int = Field(
5,
description="Frames per second to process for video hash (ignored when 0).",
)


sdk_opts = SdkOptions()
47 changes: 47 additions & 0 deletions iscc_sdk/video.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
"""*Video handling module*"""
import os
from typing import Sequence, Tuple, List

from loguru import logger as log
import io
import subprocess
import sys
import tempfile
from os.path import join, basename
from pathlib import Path
from secrets import token_hex
from PIL import Image, ImageEnhance
import iscc_sdk as idk
import iscc_schema as iss
Expand All @@ -13,6 +19,8 @@
"video_meta_extract",
"video_meta_embed",
"video_thumbnail",
"video_mp7sig_extract",
"video_features_extract",
]

VIDEO_META_MAP = {
Expand Down Expand Up @@ -169,3 +177,42 @@ def video_thumbnail(fp):
result = subprocess.run(cmd, capture_output=True)
img_obj = Image.open(io.BytesIO(result.stdout))
return ImageEnhance.Sharpness(img_obj.convert("RGB")).enhance(1.4)


def video_features_extract(fp):
# type: (str) -> List[Tuple[int, ...]]
"""
Extract video features.
:param str fp: Filepath to video file.
:return: A sequence of frame signatures.
:rtype: Sequence[Tuple[int, ...]]
"""
sig = video_mp7sig_extract(fp)
frames = idk.read_mp7_signature(sig)
return [tuple(frame.vector.tolist()) for frame in frames]


def video_mp7sig_extract(fp):
# type: (str) -> bytes
"""Extract MPEG-7 Video Signature.
:param str fp: Filepath to video file.
:return: raw signature data
:rtype: bytes
"""

sigfile_path = Path(tempfile.mkdtemp(), token_hex(16) + ".bin")
sigfile_path_escaped = sigfile_path.as_posix().replace(":", "\\\\:")

# Extract MP7 Signature
vf = f"signature=format=binary:filename={sigfile_path_escaped}"
vf = f"fps=fps={idk.sdk_opts.video_fps}," + vf
cmd = [idk.ffmpeg_bin()]
cmd.extend(["-i", fp, "-vf", vf, "-f", "null", "-"])
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)

with open(sigfile_path, "rb") as sig:
sigdata = sig.read()
os.remove(sigfile_path)
return sigdata
Loading

0 comments on commit e983811

Please sign in to comment.