audeering · frankenjoe · Jun 10, 2022 · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022
diff --git a/docs/conf.py b/docs/conf.py
@@ -23,19 +23,23 @@
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
 pygments_style = None
 extensions = [
+    'jupyter_sphinx',
     'sphinx.ext.autodoc',
     'sphinx.ext.napoleon',  # support for Google-style docstrings
     'sphinx_autodoc_typehints',
     'sphinx.ext.viewcode',
     'sphinx.ext.intersphinx',
     'sphinx.ext.autosectionlabel',
+    'sphinx_copybutton',  # for "copy to clipboard" buttons
 ]
 intersphinx_mapping = {
     'python': ('https://docs.python.org/3/', None),
     'numpy': ('https://numpy.org/doc/stable/', None),
     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
     'audformat': ('https://audeering.github.io/audformat/', None),
+    'audobject': ('https://audeering.github.io/audobject/', None),
     'audresample': ('https://audeering.github.io/audresample/', None),
+    'opensmile': ('https://audeering.github.io/opensmile-python/', None),
 }
 # Disable Gitlab as we need to sign in
 linkcheck_ignore = [

diff --git a/docs/index.rst b/docs/index.rst
@@ -8,6 +8,7 @@
     :hidden:
 
     installation
+    usage
 
 .. Warning: the usage of genindex is a hack to get a TOC entry, see
 .. https://stackoverflow.com/a/42310803. This might break the usage of sphinx if

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,4 +1,9 @@
+audb
 audeer
+auditok
+jupyter-sphinx
+librosa
 sphinx
 sphinx-audeering-theme
 sphinx-autodoc-typehints
+sphinx-copybutton
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -0,0 +1,263 @@
+.. Specify pandas format output in cells
+.. jupyter-execute::
+    :hide-code:
+    :hide-output:
+
+    import pandas as pd
+
+
+    def series_to_html(self):
+        df = self.to_frame()
+        df.columns = ['']
+        return df._repr_html_()
+
+
+    def index_to_html(self):
+        return self.to_frame(index=False).to_html(index=False)
+
+
+    setattr(pd.Series, '_repr_html_', series_to_html)
+    setattr(pd.Index, '_repr_html_', index_to_html)
+    pd.set_option('display.max_rows', 6)
+    pd.set_option('display.max_columns', 3)
+
+.. Specify version for storing and loading objects to YAML
+.. jupyter-execute::
+    :hide-code:
+
+    __version__ = '1.0.0'
+
+
+Usage
+=====
+
+The basic idea of :mod:`audinterface` is
+to provide easy and standardized interfaces
+to apply a machine learning model,
+or other digital signal processing algorithms
+to audio files.
+The only prerequisite is
+the algorithm provides a callable
+that takes at least the signal
+as a :class:`numpy.ndarray`
+and the sampling rate as input.
+
+The interface can then apply the algorithm
+on a list of files,
+a folder,
+or an index conform to the audformat_ database specification.
+Results are always returned containing a `segmented index`_.
+In the following we load three files from the emodb_ database
+and define a list of files,
+a folder,
+and an index.
+
+.. jupyter-execute::
+
+    import audb
+    import os
+
+    media = ['wav/03a01Fa.wav', 'wav/03a01Nc.wav', 'wav/03a01Wa.wav']
+    db = audb.load('emodb', version='1.2.0', media=media, verbose=False)
+
+    files = list(db.files)
+    folder = os.path.dirname(files[0])
+    index = db['emotion'].index
+
+
+Processing interface
+--------------------
+
+Let's assume we want to calculate the root mean square (RMS)
+value in dB.
+We first define the function
+and create an interface for it using :class:`audinterface.Process`.
+
+.. jupyter-execute::
+
+    import audinterface
+    import numpy as np
+
+    def rms(signal, sampling_rate):
+        return 20 * np.log10(np.sqrt(np.mean(signal ** 2)))
+
+    interface = audinterface.Process(process_func=rms)
+
+The following three commands
+apply the algorithm
+and all return the same result
+as a :class:`pandas.Series`.
+
+.. jupyter-execute::
+
+    y = interface.process_files(files)
+    y = interface.process_folder(folder)
+    y = interface.process_index(index)
+    y
+
+
+Feature interface
+-----------------
+
+When the result of the processing function has multiple dimensions
+it is recommended to use :class:`audinterface.Feature`,
+which returns a :class:`pandas.DataFrame`
+and assigns names to the dimensions/features.
+
+.. jupyter-execute::
+
+    def features(signal, sampling_rate):
+        return [signal.mean(), signal.std()]
+
+    interface = audinterface.Feature(
+        ['mean', 'std'],
+        process_func=features,
+    )
+
+    df = interface.process_index(index)
+    df
+
+
+Framewise feature interface
+---------------------------
+
+If a processing function does not return
+one set of features for the whole signal,
+but does return features
+in a framewise manner,
+you should specify the ``win_dur``
+and ``hop_dur`` arguments
+of :class:`audinterface.Feature`.
+It's also important the processing function
+returns the value in the correct shape,
+namely ``(num_channels, num_features, num_frames)``,
+whereas the first dimension is optionally.
+
+.. jupyter-execute::
+
+    import librosa
+
+    def features(signal, sampling_rate, win_dur, hop_dur, n_mfcc):
+        hop_length = int(hop_dur * sampling_rate)
+        win_length = int(win_dur * sampling_rate)
+        mfcc = librosa.feature.mfcc(
+            y=signal,
+            sr=sampling_rate,
+            n_mfcc=13,
+            hop_length=hop_length,
+            win_length=win_length,
+        )
+        return mfcc
+
+    win_dur = 0.02
+    hop_dur = 0.01
+    n_mfcc = 13
+    interface = audinterface.Feature(
+        [f'mfcc-{idx}' for idx in range(n_mfcc)],
+        process_func=features,
+        process_func_args={
+            'win_dur': win_dur,
+            'hop_dur': hop_dur,
+            'n_mfcc': n_mfcc,
+        },
+        hop_dur=hop_dur,
+        win_dur=win_dur,
+    )
+    df = interface.process_index(index)
+    df
+
+
+Serializable feature interface
+------------------------------
+
+To use a feature extractor as an input transform
+of a machine learning model
+it is recommend to provide it in a serializable way
+so it can be `stored as part of the model`_.
+One example of such a feature extractor is :class:`opensmile.Smile`.
+
+To create such a feature extractor,
+we create a class that inherits
+from :class:`audinterface.Feature`
+and :class:`audobject.Object`.
+
+.. jupyter-execute::
+
+    import audobject
+
+    class MeanStd(audinterface.Feature, audobject.Object):
+
+        def __init__(self):
+            super().__init__(
+                ['mean', 'std'],
+                process_func=self.features,
+            )
+
+        def features(self, signal, sampling_rate):
+            return [signal.mean(), signal.std()]
+
+    fex = MeanStd()
+    df = fex.process_index(index)
+    df
+
+The advantage of the feature extraction object is
+that we can save it to a YAML file
+and re-instantiate it from there.
+
+.. jupyter-execute::
+
+    fex.to_yaml('mean-std.yaml')
+    fex2 = audobject.from_yaml('mean-std.yaml')
+    df = fex2.process_index(index)
+    df
+
+
+Segmentation interface
+----------------------
+
+When the result of the processing function is an index
+it is recommended to use :class:`audinterface.Segment`,
+which returns a segmented index conform to audformat_.
+An example for such a processing function
+would be a voice activity detection algorithm.
+
+.. jupyter-execute::
+
+    import auditok
+
+    def segments(signal, sampling_rate):
+
+        # Convert floating point array to 16bit PCM little-endian
+        ints = (signal[0, :] * 32767).astype(np.int16)
+        little_endian = ints.astype('<u2')
+        signal = little_endian.tobytes()
+
+        regions = auditok.split(
+            signal,
+            sampling_rate=sampling_rate,
+            sample_width=2,
+            channels=1,
+            min_dur=0.2,
+            energy_threshold=70,
+        )
+        index = pd.MultiIndex.from_tuples(
+            [
+                (
+                    pd.Timedelta(region.meta.start, unit='s'),
+                    pd.Timedelta(region.meta.end, unit='s'),
+                )
+                for region in regions
+            ],
+            names=['start', 'end'],
+        )
+        return index
+
+    interface = audinterface.Segment(process_func=segments)
+    idx = interface.process_file(files[0])
+    idx
+
+
+.. _audformat: https://audeering.github.io/audformat/
+.. _emodb: http://emodb.bilderbar.info
+.. _segmented index: https://audeering.github.io/audformat/data-tables.html#segmented
+.. _stored as part of the model: https://audeering.github.io/audonnx/usage.html#export-model