From a7c5f63da02855b129540171f4237a9a9a85e4a3 Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:02:18 +0800 Subject: [PATCH 1/6] add times params --- python/audioflux/display/display.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/audioflux/display/display.py b/python/audioflux/display/display.py index 3ac17a0..b6cd03d 100644 --- a/python/audioflux/display/display.py +++ b/python/audioflux/display/display.py @@ -279,7 +279,7 @@ def fill_plot(x, y, axes=None, label='', is_legend=True, *, return axes -def fill_wave(data, samplate=32000, axes=None): +def fill_wave(data, samplate=32000, axes=None, times=None): """ Display a wave data @@ -288,11 +288,14 @@ def fill_wave(data, samplate=32000, axes=None): data: np.ndarray [shape=(n,)] Input audio data + samplate: int + Sampling rate of the incoming audio + axes: matplotlib.axes.Axes or None Axes to plot on instead of the default `plt.subplot()`. - samplate: int - Sampling rate of the incoming audio + times: np.ndarray [shape=(n,)] + time array Returns ------- @@ -301,7 +304,8 @@ def fill_wave(data, samplate=32000, axes=None): if data.ndim != 1: raise ValueError(f"data[ndim={data.ndim}] must be a 1D array") - times = np.arange(data.shape[-1]) / samplate + if times is None: + times = np.arange(data.shape[-1]) / samplate return fill_plot(times, data, axes=axes, x_lims=(times[0], times[-1]), is_legend=False, y_blank_threshold=0.15) From f5d60bbdfd5db686fe2c3e7eef0aac7becb7a491 Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:03:04 +0800 Subject: [PATCH 2/6] fix is_no_exp params error --- python/audioflux/feature/spectral.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/audioflux/feature/spectral.py b/python/audioflux/feature/spectral.py index 51370e2..c13b6de 100644 --- a/python/audioflux/feature/spectral.py +++ b/python/audioflux/feature/spectral.py @@ -194,7 +194,7 @@ def flatness(self, m_data_arr): ret_arr = revoke_channel(ret_arr, o_channel_shape, 1) return ret_arr - def flux(self, m_data_arr, step=1, p=2, is_positive=False, is_no_exp=True, tp=0): + def flux(self, m_data_arr, step=1, p=2, is_positive=False, is_exp=False, tp=0): """ Compute the spectral flux feature. @@ -219,7 +219,7 @@ def flux(self, m_data_arr, step=1, p=2, is_positive=False, is_no_exp=True, tp=0) is_positive: bool Whether to set negative numbers to 0 - is_no_exp: bool + is_exp: bool Whether to exp tp: int, 0 or 1 @@ -284,7 +284,7 @@ def flux(self, m_data_arr, step=1, p=2, is_positive=False, is_no_exp=True, tp=0) c_int(step), c_float(p), c_int(int(is_positive)), - pointer(c_int(int(is_no_exp))), + pointer(c_int(int(is_exp))), pointer(c_int(tp)), ret_arr) else: @@ -298,7 +298,7 @@ def flux(self, m_data_arr, step=1, p=2, is_positive=False, is_no_exp=True, tp=0) c_int(step), c_float(p), c_int(int(is_positive)), - pointer(c_int(int(is_no_exp))), + pointer(c_int(int(is_exp))), pointer(c_int(tp)), ret_arr[i]) ret_arr = revoke_channel(ret_arr, o_channel_shape, 1) From e204cdec5c0dcc487d6e50b163bc6565e8f226ff Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:08:12 +0800 Subject: [PATCH 3/6] add pitch cep/hps/lhs/ncf/pef/stft/yin algorithm add temproal_db function add cepstrogram --- python/audioflux/__init__.py | 4 + python/audioflux/__version__.py | 2 +- python/audioflux/cepstrogram.py | 227 ++++++++++++++++++ python/audioflux/mir/__init__.py | 8 +- python/audioflux/mir/pitch_cep.py | 163 +++++++++++++ python/audioflux/mir/pitch_hps.py | 171 +++++++++++++ python/audioflux/mir/pitch_lhs.py | 171 +++++++++++++ python/audioflux/mir/pitch_ncf.py | 160 ++++++++++++ python/audioflux/mir/pitch_pef.py | 224 +++++++++++++++++ python/audioflux/mir/pitch_stft.py | 165 +++++++++++++ .../audioflux/mir/{pitch.py => pitch_yin.py} | 90 ++++--- python/audioflux/utils/convert.py | 44 ++++ 12 files changed, 1379 insertions(+), 50 deletions(-) create mode 100644 python/audioflux/cepstrogram.py create mode 100644 python/audioflux/mir/pitch_cep.py create mode 100644 python/audioflux/mir/pitch_hps.py create mode 100644 python/audioflux/mir/pitch_lhs.py create mode 100644 python/audioflux/mir/pitch_ncf.py create mode 100644 python/audioflux/mir/pitch_pef.py create mode 100644 python/audioflux/mir/pitch_stft.py rename python/audioflux/mir/{pitch.py => pitch_yin.py} (66%) diff --git a/python/audioflux/__init__.py b/python/audioflux/__init__.py index c3bad71..16a5caa 100644 --- a/python/audioflux/__init__.py +++ b/python/audioflux/__init__.py @@ -1,4 +1,5 @@ from .bft import * +from .cepstrogram import * from .dsp import * from .mir import * from .cqt import * @@ -23,3 +24,6 @@ from .feature import * from .core import * from .spectrogram import MelSpectrogram, BarkSpectrogram, ErbSpectrogram +from . import utils +from . import type +from . import display diff --git a/python/audioflux/__version__.py b/python/audioflux/__version__.py index 332a815..e010ffe 100644 --- a/python/audioflux/__version__.py +++ b/python/audioflux/__version__.py @@ -1,3 +1,3 @@ __title__ = 'audioflux' __description__ = 'A library for audio and music analysis, feature extraction.' -__version__ = '0.1.6' +__version__ = '0.1.7' diff --git a/python/audioflux/cepstrogram.py b/python/audioflux/cepstrogram.py new file mode 100644 index 0000000..02d9aa1 --- /dev/null +++ b/python/audioflux/cepstrogram.py @@ -0,0 +1,227 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel, ascontiguous_swapaxex + +__all__ = ['Cepstrogram'] + + +class OpaqueCepstrogram(Structure): + _fields_ = [] + + +class Cepstrogram(Base): + """ + Cepstrogram algorithm + + Parameters + ---------- + radix2_exp: int + ``fft_length=2**radix2_exp`` + + samplate: int + Sampling rate of the incoming audio + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + slide_length: int or None + Window sliding length. + + Examples + -------- + + Read guitar chord audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('guitar_chord2') + >>> audio_arr, sr = af.read(audio_path) + + Extract Cepstrogram + + >>> from audioflux.type import ReassignType, WindowType + >>> import numpy as np + >>> obj = af.Cepstrogram(radix2_exp=12, samplate=sr) + >>> cepstrums_arr, envelope_arr, details_arr = obj.cepstrogram(audio_arr) + + Show Cepstrogram plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_spec + >>> audio_len = audio_arr.shape[-1] + >>> + >>> fig, ax = plt.subplots() + >>> img = fill_spec(cepstrums_arr, axes=ax, + >>> x_coords=obj.x_coords(audio_len), + >>> y_coords=obj.y_coords(), + >>> x_axis='time', y_axis='log', + >>> title='Cepstrogram - Cepstrums') + >>> fig.colorbar(img, ax=ax) + >>> + >>> fig, ax = plt.subplots() + >>> img = fill_spec(envelope_arr, axes=ax, + >>> x_coords=obj.x_coords(audio_len), + >>> y_coords=obj.y_coords(), + >>> x_axis='time', y_axis='log', + >>> title='Cepstrogram - envelope') + >>> fig.colorbar(img, ax=ax) + >>> + >>> fig, ax = plt.subplots() + >>> img = fill_spec(details_arr, axes=ax, + >>> x_coords=obj.x_coords(audio_len), + >>> y_coords=obj.y_coords(), + >>> x_axis='time', y_axis='log', + >>> title='Cepstrogram - details') + >>> fig.colorbar(img, ax=ax) + >>> + + """ + + def __init__(self, radix2_exp=12, samplate=32000, window_type=WindowType.RECT, slide_length=1024): + super(Cepstrogram, self).__init__(pointer(OpaqueCepstrogram())) + + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.samplate = samplate + + self.fft_length = 1 << radix2_exp + + fn = self._lib['cepstrogramObj_new'] + fn.argtypes = [POINTER(POINTER(OpaqueCepstrogram)), + c_int, + POINTER(c_int), + POINTER(c_int)] + fn(self._obj, + c_int(self.radix2_exp), + pointer(c_int(self.window_type.value)), + pointer(c_int(self.slide_length))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) / slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['cepstrogramObj_calTimeLength'] + fn.argtypes = [POINTER(OpaqueCepstrogram), c_int] + return fn(self._obj, c_int(data_length)) + + def cepstrogram(self, data_arr, cep_num=4): + """ + Get cepstrogram data + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio data + + cep_num: int, 4~128 + formant estimate number + + Returns + ------- + cepstrums: np.ndarray [shape=(..., fre, time), dtype=(np.float32)] + The matrix of cepstrums + + envelope: np.ndarray [shape=(..., fre, time), dtype=(np.float32)] + The matrix of envelope(formant) + + details: np.ndarray [shape=(..., fre, time), dtype=(np.float32)] + The matrix of details(tone) + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + data_len = data_arr.shape[-1] + + fn = self._lib['cepstrogramObj_cepstrogram'] + fn.argtypes = [POINTER(OpaqueCepstrogram), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=2, flags='C_CONTIGUOUS'), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=2, flags='C_CONTIGUOUS'), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=2, flags='C_CONTIGUOUS'), + ] + + time_len = self.cal_time_length(data_len) + c_cep_num = c_int(cep_num) + c_data_len = c_int(data_len) + + if data_arr.ndim == 1: + size = (time_len, self.fft_length // 2 + 1) + m_arr1 = np.zeros(size, dtype=np.float32) # coef + m_arr2 = np.zeros(size, dtype=np.float32) # envelope + m_arr3 = np.zeros(size, dtype=np.float32) # tone + fn(self._obj, c_cep_num, data_arr, c_data_len, m_arr1, m_arr2, m_arr3) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_len, self.fft_length // 2 + 1) + m_arr1 = np.zeros(size, dtype=np.float32) # envelope + tone + m_arr2 = np.zeros(size, dtype=np.float32) # envelope + m_arr3 = np.zeros(size, dtype=np.float32) # tone + for i in range(channel_num): + fn(self._obj, c_cep_num, data_arr[i], c_data_len, + m_arr1[i], m_arr2[i], m_arr3[i]) + m_arr1 = revoke_channel(m_arr1, o_channel_shape, 2) + m_arr2 = revoke_channel(m_arr2, o_channel_shape, 2) + m_arr3 = revoke_channel(m_arr3, o_channel_shape, 2) + m_arr1 = ascontiguous_swapaxex(m_arr1, -2, -1) + m_arr2 = ascontiguous_swapaxex(m_arr2, -2, -1) + m_arr3 = ascontiguous_swapaxex(m_arr3, -2, -1) + return m_arr1, m_arr2, m_arr3 + + def y_coords(self): + """ + Get the Y-axis coordinate + + Returns + ------- + out: np.ndarray [shape=(fre,)] + """ + y_coords = np.linspace(0, self.samplate / 2, int(self.fft_length / 2) + 2) + return y_coords + + def x_coords(self, data_length): + """ + Get the X-axis coordinate + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: np.ndarray [shape=(time,)] + """ + if data_length < self.fft_length: + raise ValueError(f'radix2_exp={self.radix2_exp}(fft_length={self.fft_length}) ' + f'is too large for data_length={data_length}') + x_coords = np.linspace(0, data_length / self.samplate, + self.cal_time_length(data_length) + 1) + return x_coords + + def __del__(self): + if self._is_created: + free_fn = self._lib['cepstrogramObj_free'] + free_fn.argtypes = [POINTER(OpaqueCepstrogram)] + free_fn.restype = c_void_p + free_fn(self._obj) diff --git a/python/audioflux/mir/__init__.py b/python/audioflux/mir/__init__.py index 7cf3cbb..033dc57 100644 --- a/python/audioflux/mir/__init__.py +++ b/python/audioflux/mir/__init__.py @@ -1,4 +1,10 @@ from .harmonic_ratio import * from .hpss import * from .onset import * -from .pitch import * +from .pitch_cep import * +from .pitch_hps import * +from .pitch_lhs import * +from .pitch_ncf import * +from .pitch_pef import * +from .pitch_stft import * +from .pitch_yin import * diff --git a/python/audioflux/mir/pitch_cep.py b/python/audioflux/mir/pitch_cep.py new file mode 100644 index 0000000..dfa8f5a --- /dev/null +++ b/python/audioflux/mir/pitch_cep.py @@ -0,0 +1,163 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel + +__all__ = ["PitchCEP"] + + +class OpaquePitchCEP(Structure): + _fields_ = [] + + +class PitchCEP(Base): + """ + Pitch CEP algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchCEP(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchCEP') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.HAMM): + super(PitchCEP, self).__init__(pointer(OpaquePitchCEP())) + + if low_fre >= high_fre: + raise ValueError(f'`low_fre` must be smaller than `high_fre`') + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.is_continue = False + + fn = self._lib['pitchCEPObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchCEP)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) // slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchCEPObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchCEP), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchCEPObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchCEP), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + return fre_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchCEPObj_free'] + fn.argtypes = [POINTER(OpaquePitchCEP)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch_hps.py b/python/audioflux/mir/pitch_hps.py new file mode 100644 index 0000000..7327a08 --- /dev/null +++ b/python/audioflux/mir/pitch_hps.py @@ -0,0 +1,171 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel + +__all__ = ["PitchHPS"] + + +class OpaquePitchHPS(Structure): + _fields_ = [] + + +class PitchHPS(Base): + """ + Pitch HPS algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + harmonic_count: int + Harmonic count. Default is `5`. + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchHPS(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchHPS') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.HAMM, + harmonic_count=5): + super(PitchHPS, self).__init__(pointer(OpaquePitchHPS())) + + if low_fre >= high_fre: + raise ValueError(f'`low_fre` must be smaller than `high_fre`') + if harmonic_count <= 0: + raise ValueError(f'`harmonic_count` must be greater than 0.') + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.harmonic_count = harmonic_count + self.is_continue = False + + fn = self._lib['pitchHPSObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchHPS)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int), POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_int(self.harmonic_count)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) // slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchHPSObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchHPS), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchHPSObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchHPS), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + return fre_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchHPSObj_free'] + fn.argtypes = [POINTER(OpaquePitchHPS)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch_lhs.py b/python/audioflux/mir/pitch_lhs.py new file mode 100644 index 0000000..24da9f8 --- /dev/null +++ b/python/audioflux/mir/pitch_lhs.py @@ -0,0 +1,171 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel + +__all__ = ["PitchLHS"] + + +class OpaquePitchLHS(Structure): + _fields_ = [] + + +class PitchLHS(Base): + """ + Pitch LHS algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + harmonic_count: int + Harmonic count. Default is `5`. + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchLHS(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchLHS') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.HAMM, + harmonic_count=5): + super(PitchLHS, self).__init__(pointer(OpaquePitchLHS())) + + if low_fre >= high_fre: + raise ValueError(f'`low_fre` must be smaller than `high_fre`') + if harmonic_count <= 0: + raise ValueError(f'`harmonic_count` must be greater than 0.') + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.harmonic_count = harmonic_count + self.is_continue = False + + fn = self._lib['pitchLHSObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchLHS)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int), POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_int(self.harmonic_count)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) / slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchLHSObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchLHS), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchLHSObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchLHS), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + return fre_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchLHSObj_free'] + fn.argtypes = [POINTER(OpaquePitchLHS)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch_ncf.py b/python/audioflux/mir/pitch_ncf.py new file mode 100644 index 0000000..0c0edf7 --- /dev/null +++ b/python/audioflux/mir/pitch_ncf.py @@ -0,0 +1,160 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel, note_to_hz + +__all__ = ["PitchNCF"] + + +class OpaquePitchNCF(Structure): + _fields_ = [] + + +class PitchNCF(Base): + """ + Pitch NCF algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchNCF(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchNCF') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.RECT): + super(PitchNCF, self).__init__(pointer(OpaquePitchNCF())) + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.is_continue = False + + fn = self._lib['pitchNCFObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchNCF)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) / slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchNCFObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchNCF), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchNCFObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchNCF), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + return fre_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchNCFObj_free'] + fn.argtypes = [POINTER(OpaquePitchNCF)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch_pef.py b/python/audioflux/mir/pitch_pef.py new file mode 100644 index 0000000..680a340 --- /dev/null +++ b/python/audioflux/mir/pitch_pef.py @@ -0,0 +1,224 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel + +__all__ = ["PitchPEF"] + + +class OpaquePitchPEF(Structure): + _fields_ = [] + + +class PitchPEF(Base): + """ + Pitch PEF algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + cut_fre: float + Cut frequency. Default is `4000.0`, and must be greater than `high_fre`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + alpha: float, > 0 + alpha. Default if `10.0`.. + + beta: float, 0~1 + beta. Default if `0.5`.. + + gamma: float, > 1 + gamma. Default if `1.8`. + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchPEF(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchPEF') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, cut_fre=4000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.HAMM, + alpha=10.0, beta=0.5, gamma=1.8): + super(PitchPEF, self).__init__(pointer(OpaquePitchPEF())) + + if low_fre >= high_fre: + raise ValueError(f'`low_fre` must be smaller than `high_fre`') + if high_fre >= cut_fre: + raise ValueError(f'`high_fre` must be smaller than `cut_fre`') + if alpha <= 0: + raise ValueError(f'`alpha` must be greater than 0.') + if beta < 0 or beta > 1: + raise ValueError(f'`beta` must be between 0 and 1.') + if gamma <= 1: + raise ValueError(f'`gamma` must be greater than 1.') + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.cut_fre = cut_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.alpha = alpha + self.beta = beta + self.gamma = gamma + self.is_continue = False + + fn = self._lib['pitchPEFObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchPEF)), POINTER(c_int), + POINTER(c_float), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_float), POINTER(c_float), POINTER(c_float), + POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_float(self.cut_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_float(self.alpha)), + pointer(c_float(self.beta)), + pointer(c_float(self.gamma)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) // slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchPEFObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchPEF), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def set_filter_params(self, alpha, beta, gamma): + """ + Set filter params + + Parameters + ---------- + alpha: float + alpha + + beta: float + beta + + gamma: float + gamma + + """ + if alpha <= 0: + raise ValueError(f'`alpha` must be greater than 0.') + if beta < 0 or beta > 1: + raise ValueError(f'`beta` must be between 0 and 1.') + if gamma <= 1: + raise ValueError(f'`gamma` must be greater than 1.') + + fn = self._lib['pitchPEFObj_setFilterParams'] + fn.argtypes = [POINTER(OpaquePitchPEF), c_float, c_float, c_float] + fn(self._obj, c_float(alpha), c_float(beta), c_float(gamma)) + + self.alpha = alpha + self.beta = beta + self.gamma = gamma + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchPEFObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchPEF), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + return fre_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchPEFObj_free'] + fn.argtypes = [POINTER(OpaquePitchPEF)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch_stft.py b/python/audioflux/mir/pitch_stft.py new file mode 100644 index 0000000..c469230 --- /dev/null +++ b/python/audioflux/mir/pitch_stft.py @@ -0,0 +1,165 @@ +import numpy as np +from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float +from audioflux.base import Base +from audioflux.type import WindowType +from audioflux.utils import check_audio, format_channel, revoke_channel + +__all__ = ["PitchSTFT"] + + +class OpaquePitchSTFT(Structure): + _fields_ = [] + + +class PitchSTFT(Base): + """ + Pitch STFT algorithm + + Parameters + ---------- + samplate: int + Sampling rate of the incoming audio. + + low_fre: float + Lowest frequency. Default is `32.0`. + + high_fre: float + Highest frequency. Default is `2000.0`. + + radix2_exp: int + ``fft_length=2**radix2_exp`` + + slide_length: int + Window sliding length. + + window_type: WindowType + Window type for each frame. + + See: `type.WindowType` + + Examples + -------- + + Read 220Hz audio data + + >>> import audioflux as af + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) + + Extract pitch + + >>> pitch_obj = af.PitchSTFT(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) + + Show pitch plot + + >>> import matplotlib.pyplot as plt + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchSTFT') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) + """ + + def __init__(self, samplate=32000, low_fre=32.0, high_fre=2000.0, + radix2_exp=12, slide_length=1024, window_type=WindowType.HAMM): + super(PitchSTFT, self).__init__(pointer(OpaquePitchSTFT())) + + self.samplate = samplate + self.low_fre = low_fre + self.high_fre = high_fre + self.radix2_exp = radix2_exp + self.slide_length = slide_length + self.window_type = window_type + self.is_continue = False + + fn = self._lib['pitchSTFTObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchSTFT)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int)] + + fn(self._obj, + pointer(c_int(self.samplate)), + pointer(c_float(self.low_fre)), + pointer(c_float(self.high_fre)), + pointer(c_int(self.radix2_exp)), + pointer(c_int(self.slide_length)), + pointer(c_int(self.window_type.value)), + pointer(c_int(int(self.is_continue)))) + self._is_created = True + + def cal_time_length(self, data_length): + """ + Calculate the length of a frame from audio data. + + - ``fft_length = 2 ** radix2_exp`` + - ``(data_length - fft_length) // slide_length + 1`` + + Parameters + ---------- + data_length: int + The length of the data to be calculated. + + Returns + ------- + out: int + """ + fn = self._lib['pitchSTFTObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchSTFT), c_int] + fn.restype = c_int + return fn(self._obj, c_int(data_length)) + + def pitch(self, data_arr): + """ + Compute pitch + + Parameters + ---------- + data_arr: np.ndarray [shape=(..., n)] + Input audio array + + Returns + ------- + fre_arr: np.ndarray [shape=(..., time)] + db_arr: np.ndarray [shape=(..., time)] + """ + data_arr = np.asarray(data_arr, dtype=np.float32, order='C') + check_audio(data_arr, is_mono=False) + + fn = self._lib['pitchSTFTObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchSTFT), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + ] + + data_len = data_arr.shape[-1] + time_length = self.cal_time_length(data_len) + + if data_arr.ndim == 1: + fre_arr = np.zeros(time_length, dtype=np.float32) + db_arr = np.zeros(time_length, dtype=np.float32) + fn(self._obj, data_arr, c_int(data_len), fre_arr, db_arr) + else: + data_arr, o_channel_shape = format_channel(data_arr, 1) + channel_num = data_arr.shape[0] + + size = (channel_num, time_length) + fre_arr = np.zeros(size, dtype=np.float32) + db_arr = np.zeros(size, dtype=np.float32) + for i in range(channel_num): + fn(self._obj, data_arr[i], c_int(data_len), fre_arr[i], db_arr[i]) + + fre_arr = revoke_channel(fre_arr, o_channel_shape, 1) + db_arr = revoke_channel(db_arr, o_channel_shape, 1) + return fre_arr, db_arr + + def __del__(self): + if self._is_created: + fn = self._lib['pitchSTFTObj_free'] + fn.argtypes = [POINTER(OpaquePitchSTFT)] + fn.restype = c_void_p + fn(self._obj) diff --git a/python/audioflux/mir/pitch.py b/python/audioflux/mir/pitch_yin.py similarity index 66% rename from python/audioflux/mir/pitch.py rename to python/audioflux/mir/pitch_yin.py index 15536e7..4158760 100644 --- a/python/audioflux/mir/pitch.py +++ b/python/audioflux/mir/pitch_yin.py @@ -1,33 +1,29 @@ import numpy as np from ctypes import Structure, POINTER, pointer, c_int, c_void_p, c_float from audioflux.base import Base -from audioflux.type import PitchType -from audioflux.utils import check_audio, format_channel, revoke_channel, note_to_hz +from audioflux.utils import check_audio, format_channel, revoke_channel -__all__ = ["Pitch"] +__all__ = ["PitchYIN"] -class OpaquePitch(Structure): +class OpaquePitchYIN(Structure): _fields_ = [] -class Pitch(Base): +class PitchYIN(Base): """ - Pitch - YIN, STFT, etc algorithm + Pitch YIN algorithm Parameters ---------- - pitch_type: PitchType - Pitch type - samplate: int Sampling rate of the incoming audio. low_fre: float - Lowest frequency. + Lowest frequency. Default is `27.0`. high_fre: float - Highest frequency. + Highest frequency. Default is `2000.0`. radix2_exp: int ``fft_length=2**radix2_exp`` @@ -36,59 +32,53 @@ class Pitch(Base): Window sliding length. auto_length: int - Auto length + Auto correlation length. Default is `2048`. Examples -------- - Get a 220Hz's audio file + Read 220Hz audio data >>> import audioflux as af - >>> audio_arr, sr = af.read(af.utils.sample_path('220')) - # >>> audio_arr = audio_arr[:8192] + >>> audio_path = af.utils.sample_path('220') + >>> audio_arr, sr = af.read(audio_path) - Create Pitch object and get frequency + Extract pitch - >>> from audioflux.type import PitchType - >>> obj = af.Pitch(pitch_type=PitchType.YIN) - >>> fre_arr, value_arr1, value_arr2 = obj.pitch(audio_arr) + >>> pitch_obj = af.PitchYIN(samplate=sr) + >>> fre_arr = pitch_obj.pitch(audio_arr) - Display plot + Show pitch plot >>> import matplotlib.pyplot as plt - >>> from audioflux.display import fill_wave, fill_plot - >>> import numpy as np - >>> audio_len = audio_arr.shape[-1] - >>> fig, axes = plt.subplots(nrows=2) - >>> fill_wave(audio_arr, samplate=sr, axes=axes[0]) - >>> - >>> ax = fill_plot(np.arange(len(fre_arr)), fre_arr, label='fre', axes=axes[1]) - >>> ax.set_ylabel('frequency(Hz)') + >>> from audioflux.display import fill_plot + >>> times = np.arange(fre_arr.shape[-1]) * (pitch_obj.slide_length / sr) + >>> fig, ax = plt.subplots() + >>> ax.set_title('PitchYIN') + >>> fill_plot(times, fre_arr, axes=ax) + >>> ax.set_ylim(0, 300) """ - def __init__(self, pitch_type=None, samplate=32000, - low_fre=note_to_hz('A0'), high_fre=note_to_hz('C7'), + def __init__(self, samplate=32000, low_fre=27.0, high_fre=2000.0, radix2_exp=12, slide_length=1024, auto_length=2048): - super(Pitch, self).__init__(pointer(OpaquePitch())) + super(PitchYIN, self).__init__(pointer(OpaquePitchYIN())) - self.pitch_type = pitch_type self.samplate = samplate self.low_fre = low_fre self.high_fre = high_fre self.radix2_exp = radix2_exp self.slide_length = slide_length self.auto_length = auto_length + self.thresh = 0.1 self.is_continue = False - fn = self._lib['pitchObj_new'] - fn.argtypes = [POINTER(POINTER(OpaquePitch)), - POINTER(c_int), POINTER(c_int), - POINTER(c_float), POINTER(c_float), - POINTER(c_int), POINTER(c_int), - POINTER(c_int), POINTER(c_int)] + fn = self._lib['pitchYINObj_new'] + fn.argtypes = [POINTER(POINTER(OpaquePitchYIN)), + POINTER(c_int), POINTER(c_float), POINTER(c_float), + POINTER(c_int), POINTER(c_int), POINTER(c_int), + POINTER(c_int)] fn(self._obj, - None if self.pitch_type is None else pointer(c_int(self.pitch_type.value)), pointer(c_int(self.samplate)), pointer(c_float(self.low_fre)), pointer(c_float(self.high_fre)), @@ -106,16 +96,20 @@ def set_thresh(self, thresh): ---------- thresh: float """ - fn = self._lib['pitchObj_setThresh'] - fn.argtypes = [POINTER(OpaquePitch), c_float] + if thresh <= 0.0 or thresh >= 1.0: + raise ValueError(f'`thresh` must be between 0.0 and 1.0.') + + fn = self._lib['pitchYINObj_setThresh'] + fn.argtypes = [POINTER(OpaquePitchYIN), c_float] fn(self._obj, c_float(thresh)) + self.thresh = thresh def cal_time_length(self, data_length): """ Calculate the length of a frame from audio data. - ``fft_length = 2 ** radix2_exp`` - - ``(data_length - fft_length) / slide_length + 1`` + - ``(data_length - fft_length) // slide_length + 1`` Parameters ---------- @@ -126,8 +120,8 @@ def cal_time_length(self, data_length): ------- out: int """ - fn = self._lib['pitchObj_calTimeLength'] - fn.argtypes = [POINTER(OpaquePitch), c_int] + fn = self._lib['pitchYINObj_calTimeLength'] + fn.argtypes = [POINTER(OpaquePitchYIN), c_int] fn.restype = c_int return fn(self._obj, c_int(data_length)) @@ -149,8 +143,8 @@ def pitch(self, data_arr): data_arr = np.asarray(data_arr, dtype=np.float32, order='C') check_audio(data_arr, is_mono=False) - fn = self._lib['pitchObj_pitch'] - fn.argtypes = [POINTER(OpaquePitch), + fn = self._lib['pitchYINObj_pitch'] + fn.argtypes = [POINTER(OpaquePitchYIN), np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), c_int, np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), @@ -184,7 +178,7 @@ def pitch(self, data_arr): def __del__(self): if self._is_created: - fn = self._lib['pitchObj_free'] - fn.argtypes = [POINTER(OpaquePitch)] + fn = self._lib['pitchYINObj_free'] + fn.argtypes = [POINTER(OpaquePitchYIN)] fn.restype = c_void_p fn(self._obj) diff --git a/python/audioflux/utils/convert.py b/python/audioflux/utils/convert.py index c20bfc2..5dfbeb7 100644 --- a/python/audioflux/utils/convert.py +++ b/python/audioflux/utils/convert.py @@ -11,6 +11,7 @@ 'mag_to_abs_db', 'log_compress', 'log10_compress', + 'temproal_db', 'delta', 'get_phase', 'note_to_midi', @@ -223,6 +224,49 @@ def log10_compress(X, gamma=1.0): return ret_arr +def temproal_db(X, base=18.): + """ + Calculate the time domain DB + + Parameters + ---------- + X: np.ndarray [shape=(frame,)] + Input array + + base: float + default: 18. + + Returns + ------- + max_Db: float + max Db + avgDb: float + average Db + percent: float + The current domain is smaller than-based db percentage + """ + X = np.asarray(X, dtype=np.float32, order='C') + if X.ndim != 1: + raise ValueError(f"X[ndim={x.ndim}] must be a 1D array") + + fn = get_fft_lib()['util_temproal'] + fn.argtypes = [ + np.ctypeslib.ndpointer(dtype=np.float32, ndim=1, flags='C_CONTIGUOUS'), + c_int, + c_float, + POINTER(c_float), + POINTER(c_float) + ] + fn.restype = c_float + + avg_Db_p = pointer(c_float(0)) + percent_p = pointer(c_float(0)) + max_Db = fn(X, c_int(X.shape[0]), c_float(base), avg_Db_p, percent_p) + avg_Db = avg_Db_p.contents.value + percent = percent_p.contents.value + return max_Db, avg_Db, percent + + def delta(X, order=9): """ Compute delta features From 96aa94b07ba8b74e4710f22e7c0d3f6143a1da86 Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:10:59 +0800 Subject: [PATCH 4/6] add include --- include/cepstrogram_algorithm.h | 45 +++++++++++++++++++++++++++ include/mir/_pitch_cep.h | 43 ++++++++++++++++++++++++++ include/mir/_pitch_hps.h | 46 ++++++++++++++++++++++++++++ include/mir/_pitch_lhs.h | 47 ++++++++++++++++++++++++++++ include/mir/_pitch_ncf.h | 43 ++++++++++++++++++++++++++ include/mir/_pitch_pef.h | 49 ++++++++++++++++++++++++++++++ include/mir/pitch_algorithm.h | 54 --------------------------------- 7 files changed, 273 insertions(+), 54 deletions(-) create mode 100644 include/cepstrogram_algorithm.h create mode 100644 include/mir/_pitch_cep.h create mode 100644 include/mir/_pitch_hps.h create mode 100644 include/mir/_pitch_lhs.h create mode 100644 include/mir/_pitch_ncf.h create mode 100644 include/mir/_pitch_pef.h delete mode 100644 include/mir/pitch_algorithm.h diff --git a/include/cepstrogram_algorithm.h b/include/cepstrogram_algorithm.h new file mode 100644 index 0000000..41d88fc --- /dev/null +++ b/include/cepstrogram_algorithm.h @@ -0,0 +1,45 @@ + + +#ifndef CEPSTROGRAM_ALGORITHM_H +#define CEPSTROGRAM_ALGORITHM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include "flux_base.h" + +typedef struct OpaqueCepstrogram *CepstrogramObj; + +/*** + radix2Exp 12 + WindowType "rect" + slideLength 1024 +****/ +int cepstrogramObj_new(CepstrogramObj *cepstrogramObj,int radix2Exp,WindowType *windowType,int *slideLength); + +int cepstrogramObj_calTimeLength(CepstrogramObj cepstrogramObj,int dataLength); + +/*** + cepNum 4~128 ,formant estimate number + mDataArr1 cepstrums ,timeLength*(fftLength/2+1) + mDataArr2 envelope(formant) ,timeLength*(fftLength/2+1) + mDataArr3 details(tone) ,timeLength*(fftLength/2+1) +****/ +void cepstrogramObj_cepstrogram(CepstrogramObj cepstrogramObj,int cepNum,float *dataArr,int dataLength, + float *mDataArr1,float *mDataArr2,float *mDataArr3); + +void cepstrogramObj_cepstrogram2(CepstrogramObj cepstrogramObj,int cepNum,float *mRealArr,float *mImageArr,int nLength, + float *mDataArr1,float *mDataArr2,float *mDataArr3); + +void cepstrogramObj_enableDebug(CepstrogramObj cepstrogramObj,int flag); + +void cepstrogramObj_free(CepstrogramObj cepstrogramObj); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/include/mir/_pitch_cep.h b/include/mir/_pitch_cep.h new file mode 100644 index 0000000..c4f87dc --- /dev/null +++ b/include/mir/_pitch_cep.h @@ -0,0 +1,43 @@ + + +#ifndef _PITCH_CEP_H +#define _PITCH_CEP_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "../flux_base.h" + +typedef struct OpaquePitchCEP *PitchCEPObj; + +/*** + samplate 32000 + lowFre 32, + highFre 2000 + radix2Exp 12 + WindowType Hamm + slideLength (1< +#include + +#include "../flux_base.h" + +typedef struct OpaquePitchHPS *PitchHPSObj; + +/*** + samplate 32000 + lowFre 32, + highFre 2000 + radix2Exp 12 + WindowType Hamm + slideLength (1<0 + isContinue 0 +****/ +int pitchHPSObj_new(PitchHPSObj *pitchHPSObj, + int *samplate,float *lowFre,float *highFre, + int *radix2Exp,int *slideLength,WindowType *windowType, + int *harmonicCount, + int *isContinue); + +int pitchHPSObj_calTimeLength(PitchHPSObj pitchHPSObj,int dataLength); + +void pitchHPSObj_pitch(PitchHPSObj pitchHPSObj,float *dataArr,int dataLength, + float *freArr); + +void pitchHPSObj_enableDebug(PitchHPSObj pitchHPSObj,int isDebug); +void pitchHPSObj_free(PitchHPSObj pitchHPSObj); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/include/mir/_pitch_lhs.h b/include/mir/_pitch_lhs.h new file mode 100644 index 0000000..5de63a0 --- /dev/null +++ b/include/mir/_pitch_lhs.h @@ -0,0 +1,47 @@ + + +#ifndef _PITCH_LHS_H +#define _PITCH_LHS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "../flux_base.h" + +typedef struct OpaquePitchLHS *PitchLHSObj; + +/*** + samplate 32000 + lowFre 32, + highFre 2000 + + radix2Exp 12 + WindowType hamm + slideLength (1<0 + isContinue 0 +****/ +int pitchLHSObj_new(PitchLHSObj *pitchLHSObj, + int *samplate,float *lowFre,float *highFre, + int *radix2Exp,int *slideLength,WindowType *windowType, + int *harmonicCount, + int *isContinue); + +int pitchLHSObj_calTimeLength(PitchLHSObj pitchLHSObj,int dataLength); + +void pitchLHSObj_pitch(PitchLHSObj pitchLHSObj,float *dataArr,int dataLength, + float *freArr); + +void pitchLHSObj_enableDebug(PitchLHSObj pitchLHSObj,int isDebug); +void pitchLHSObj_free(PitchLHSObj pitchLHSObj); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/include/mir/_pitch_ncf.h b/include/mir/_pitch_ncf.h new file mode 100644 index 0000000..0cfa2f6 --- /dev/null +++ b/include/mir/_pitch_ncf.h @@ -0,0 +1,43 @@ + + +#ifndef _PITCH_NCF_H +#define _PITCH_NCF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "../flux_base.h" + +typedef struct OpaquePitchNCF *PitchNCFObj; + +/*** + samplate 32000 + lowFre 32, + highFre 2000 + radix2Exp 12 + WindowType rect + slideLength (1< +#include + +#include "../flux_base.h" + +typedef struct OpaquePitchPEF *PitchPEFObj; + +/*** + samplate 32000 + lowFre 32, + highFre 2000 + cutFre 4000, >highFre + + radix2Exp 12 + WindowType hamm + slideLength (1<0, beta 0.5 0~1, gamma 1.8 >1 + + isContinue 0 +****/ +int pitchPEFObj_new(PitchPEFObj *pitchPEFObj, + int *samplate,float *lowFre,float *highFre,float *cutFre, + int *radix2Exp,int *slideLength,WindowType *windowType, + float *alpha,float *beta,float *gamma, + int *isContinue); + +int pitchPEFObj_calTimeLength(PitchPEFObj pitchPEFObj,int dataLength); +void pitchPEFObj_setFilterParams(PitchPEFObj pitchPEFObj,float alpha,float beta,float gamma); + +void pitchPEFObj_pitch(PitchPEFObj pitchPEFObj,float *dataArr,int dataLength, + float *freArr); + +void pitchPEFObj_enableDebug(PitchPEFObj pitchPEFObj,int isDebug); +void pitchPEFObj_free(PitchPEFObj pitchPEFObj); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/include/mir/pitch_algorithm.h b/include/mir/pitch_algorithm.h deleted file mode 100644 index 4cad8c7..0000000 --- a/include/mir/pitch_algorithm.h +++ /dev/null @@ -1,54 +0,0 @@ - - -#ifndef PITCH_ALGORITHM_H -#define PITCH_ALGORITHM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -typedef enum{ - Pitch_YIN=0, - Pitch_STFT, - Pitch_NCF, - Pitch_PEF, - -} PitchType; - -typedef struct OpaquePitch *PitchObj; - -/*** - type None - samplate 32000 - lowFre 27 - highFre 2000 - - radix2Exp 12 - slideLength (1<0&&thresh<1 -void pitchObj_setThresh(PitchObj pitchObj,float thresh); -int pitchObj_calTimeLength(PitchObj pitchObj,int dataLength); - -void pitchObj_pitch(PitchObj pitchObj,float *dataArr,int dataLength, - float *freArr,float *valueArr1,float *valueArr2); - -void pitchObj_enableDebug(PitchObj pitchObj,int isDebug); -void pitchObj_free(PitchObj pitchObj); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file From ef8447aa533df7fd0f1124739756e2b998634e9d Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:59:13 +0800 Subject: [PATCH 5/6] update docs --- conda/meta.yaml | 2 +- docs/feature/cepstrogram.rst | 5 +++++ docs/feature/feature.rst | 1 + docs/mir/pitch.rst | 20 +++++++++++++++++++- python/audioflux/mir/pitch_stft.py | 2 +- python/audioflux/mir/pitch_yin.py | 2 +- 6 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 docs/feature/cepstrogram.rst diff --git a/conda/meta.yaml b/conda/meta.yaml index 369517b..080c5ba 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "0.1.6" %} +{% set version = "0.1.7" %} package: name: audioflux diff --git a/docs/feature/cepstrogram.rst b/docs/feature/cepstrogram.rst new file mode 100644 index 0000000..fa3fe7c --- /dev/null +++ b/docs/feature/cepstrogram.rst @@ -0,0 +1,5 @@ +Cepstrogram +=========== + +.. autoclass:: audioflux.Cepstrogram + :members: diff --git a/docs/feature/feature.rst b/docs/feature/feature.rst index b1c4c8f..4e3c1e6 100644 --- a/docs/feature/feature.rst +++ b/docs/feature/feature.rst @@ -9,6 +9,7 @@ The feature module contains the following algorithms: spectral xxcc deconv + cepstrogram temporal featureExtractor diff --git a/docs/mir/pitch.rst b/docs/mir/pitch.rst index 4aa991b..42956b2 100644 --- a/docs/mir/pitch.rst +++ b/docs/mir/pitch.rst @@ -1,5 +1,23 @@ Pitch ===== -.. autoclass:: audioflux.Pitch +.. autoclass:: audioflux.PitchCEP + :members: + +.. autoclass:: audioflux.PitchHPS + :members: + +.. autoclass:: audioflux.PitchLHS + :members: + +.. autoclass:: audioflux.PitchNCF + :members: + +.. autoclass:: audioflux.PitchPEF + :members: + +.. autoclass:: audioflux.PitchSTFT + :members: + +.. autoclass:: audioflux.PitchYIN :members: diff --git a/python/audioflux/mir/pitch_stft.py b/python/audioflux/mir/pitch_stft.py index c469230..c5c6486 100644 --- a/python/audioflux/mir/pitch_stft.py +++ b/python/audioflux/mir/pitch_stft.py @@ -49,7 +49,7 @@ class PitchSTFT(Base): Extract pitch >>> pitch_obj = af.PitchSTFT(samplate=sr) - >>> fre_arr = pitch_obj.pitch(audio_arr) + >>> fre_arr, db_arr = pitch_obj.pitch(audio_arr) Show pitch plot diff --git a/python/audioflux/mir/pitch_yin.py b/python/audioflux/mir/pitch_yin.py index 4158760..6c9d192 100644 --- a/python/audioflux/mir/pitch_yin.py +++ b/python/audioflux/mir/pitch_yin.py @@ -46,7 +46,7 @@ class PitchYIN(Base): Extract pitch >>> pitch_obj = af.PitchYIN(samplate=sr) - >>> fre_arr = pitch_obj.pitch(audio_arr) + >>> fre_arr, v1_arr, v2_arr = pitch_obj.pitch(audio_arr) Show pitch plot From 44f670cdbe6eb7f1d51408748f9e3ba72cf4cd52 Mon Sep 17 00:00:00 2001 From: wtq2255 Date: Sat, 16 Dec 2023 16:59:16 +0800 Subject: [PATCH 6/6] update docs --- docs/changelog.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 7ed98a3..2b651da 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,15 @@ ChangeLog ========= +v0.1.7 +------ +* New features: + * Add Cepstrogram algorithm. + * Add PitchCEP/PitchHPS/PitchLHS/PitchNCF/PitchPEF/PitchSTFT/PitchYIN algorithm. +* Modified API: + * `audioflux.display.fill_wave` add `times` params. +* Fix bug: + * Fix `flux` param: `is_no_exp` to `is_exp`. + v0.1.6 ------ * Fix bug: