diff --git a/docs/conf.py b/docs/conf.py index 4953dce..7f023ec 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,19 +23,23 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] pygments_style = None extensions = [ + 'jupyter_sphinx', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', # support for Google-style docstrings 'sphinx_autodoc_typehints', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel', + 'sphinx_copybutton', # for "copy to clipboard" buttons ] intersphinx_mapping = { 'python': ('https://docs.python.org/3/', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'audformat': ('https://audeering.github.io/audformat/', None), + 'audobject': ('https://audeering.github.io/audobject/', None), 'audresample': ('https://audeering.github.io/audresample/', None), + 'opensmile': ('https://audeering.github.io/opensmile-python/', None), } # Disable Gitlab as we need to sign in linkcheck_ignore = [ diff --git a/docs/index.rst b/docs/index.rst index c57560b..117be94 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,6 +8,7 @@ :hidden: installation + usage .. Warning: the usage of genindex is a hack to get a TOC entry, see .. https://stackoverflow.com/a/42310803. This might break the usage of sphinx if diff --git a/docs/requirements.txt b/docs/requirements.txt index 28e3961..025e103 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,9 @@ +audb audeer +auditok +jupyter-sphinx +librosa sphinx sphinx-audeering-theme sphinx-autodoc-typehints +sphinx-copybutton diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..47c9c70 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,263 @@ +.. Specify pandas format output in cells +.. jupyter-execute:: + :hide-code: + :hide-output: + + import pandas as pd + + + def series_to_html(self): + df = self.to_frame() + df.columns = [''] + return df._repr_html_() + + + def index_to_html(self): + return self.to_frame(index=False).to_html(index=False) + + + setattr(pd.Series, '_repr_html_', series_to_html) + setattr(pd.Index, '_repr_html_', index_to_html) + pd.set_option('display.max_rows', 6) + pd.set_option('display.max_columns', 3) + +.. Specify version for storing and loading objects to YAML +.. jupyter-execute:: + :hide-code: + + __version__ = '1.0.0' + + +Usage +===== + +The basic idea of :mod:`audinterface` is +to provide easy and standardized interfaces +to apply a machine learning model, +or other digital signal processing algorithms +to audio files. +The only prerequisite is +the algorithm provides a callable +that takes at least the signal +as a :class:`numpy.ndarray` +and the sampling rate as input. + +The interface can then apply the algorithm +on a list of files, +a folder, +or an index conform to the audformat_ database specification. +Results are always returned containing a `segmented index`_. +In the following we load three files from the emodb_ database +and define a list of files, +a folder, +and an index. + +.. jupyter-execute:: + + import audb + import os + + media = ['wav/03a01Fa.wav', 'wav/03a01Nc.wav', 'wav/03a01Wa.wav'] + db = audb.load('emodb', version='1.2.0', media=media, verbose=False) + + files = list(db.files) + folder = os.path.dirname(files[0]) + index = db['emotion'].index + + +Processing interface +-------------------- + +Let's assume we want to calculate the root mean square (RMS) +value in dB. +We first define the function +and create an interface for it using :class:`audinterface.Process`. + +.. jupyter-execute:: + + import audinterface + import numpy as np + + def rms(signal, sampling_rate): + return 20 * np.log10(np.sqrt(np.mean(signal ** 2))) + + interface = audinterface.Process(process_func=rms) + +The following three commands +apply the algorithm +and all return the same result +as a :class:`pandas.Series`. + +.. jupyter-execute:: + + y = interface.process_files(files) + y = interface.process_folder(folder) + y = interface.process_index(index) + y + + +Feature interface +----------------- + +When the result of the processing function has multiple dimensions +it is recommended to use :class:`audinterface.Feature`, +which returns a :class:`pandas.DataFrame` +and assigns names to the dimensions/features. + +.. jupyter-execute:: + + def features(signal, sampling_rate): + return [signal.mean(), signal.std()] + + interface = audinterface.Feature( + ['mean', 'std'], + process_func=features, + ) + + df = interface.process_index(index) + df + + +Framewise feature interface +--------------------------- + +If a processing function does not return +one set of features for the whole signal, +but does return features +in a framewise manner, +you should specify the ``win_dur`` +and ``hop_dur`` arguments +of :class:`audinterface.Feature`. +It's also important the processing function +returns the value in the correct shape, +namely ``(num_channels, num_features, num_frames)``, +whereas the first dimension is optionally. + +.. jupyter-execute:: + + import librosa + + def features(signal, sampling_rate, win_dur, hop_dur, n_mfcc): + hop_length = int(hop_dur * sampling_rate) + win_length = int(win_dur * sampling_rate) + mfcc = librosa.feature.mfcc( + y=signal, + sr=sampling_rate, + n_mfcc=13, + hop_length=hop_length, + win_length=win_length, + ) + return mfcc + + win_dur = 0.02 + hop_dur = 0.01 + n_mfcc = 13 + interface = audinterface.Feature( + [f'mfcc-{idx}' for idx in range(n_mfcc)], + process_func=features, + process_func_args={ + 'win_dur': win_dur, + 'hop_dur': hop_dur, + 'n_mfcc': n_mfcc, + }, + hop_dur=hop_dur, + win_dur=win_dur, + ) + df = interface.process_index(index) + df + + +Serializable feature interface +------------------------------ + +To use a feature extractor as an input transform +of a machine learning model +it is recommend to provide it in a serializable way +so it can be `stored as part of the model`_. +One example of such a feature extractor is :class:`opensmile.Smile`. + +To create such a feature extractor, +we create a class that inherits +from :class:`audinterface.Feature` +and :class:`audobject.Object`. + +.. jupyter-execute:: + + import audobject + + class MeanStd(audinterface.Feature, audobject.Object): + + def __init__(self): + super().__init__( + ['mean', 'std'], + process_func=self.features, + ) + + def features(self, signal, sampling_rate): + return [signal.mean(), signal.std()] + + fex = MeanStd() + df = fex.process_index(index) + df + +The advantage of the feature extraction object is +that we can save it to a YAML file +and re-instantiate it from there. + +.. jupyter-execute:: + + fex.to_yaml('mean-std.yaml') + fex2 = audobject.from_yaml('mean-std.yaml') + df = fex2.process_index(index) + df + + +Segmentation interface +---------------------- + +When the result of the processing function is an index +it is recommended to use :class:`audinterface.Segment`, +which returns a segmented index conform to audformat_. +An example for such a processing function +would be a voice activity detection algorithm. + +.. jupyter-execute:: + + import auditok + + def segments(signal, sampling_rate): + + # Convert floating point array to 16bit PCM little-endian + ints = (signal[0, :] * 32767).astype(np.int16) + little_endian = ints.astype('