Skip to content

Commit

Permalink
Merge pull request #83 from interactive-sonification/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
wiccy46 authored Dec 6, 2023
2 parents e6357ea + 2bb479b commit 2ec364c
Show file tree
Hide file tree
Showing 28 changed files with 412 additions and 409 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/pya-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ on:
jobs:
unit-test:
runs-on: ${{ matrix.os }}
timeout-minutes: 120
strategy:
matrix:
os: ["ubuntu-latest", "macos-latest"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Install portaudio Ubuntu
Expand Down
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 0.5.2 (Nov 2023)
* #82, `pyaudio` is now optional: If you plan to use `PyAudioBackend`, install `pya` with `pip install pya[pyaudio]`.
* Fix audio device bug
* #77, Improve code type hint
* #79, Use pyamapping

## 0.5.1 (Dec 2022)
* Now support Python3.10
* Bugfix #67: When the channels argument of Aserver and Arecorder has not been set it was determined by the default device instead of the actual device.
Expand Down
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ At this time pya is more suitable for offline rendering than realtime.

## Installation

`pya` requires `portaudio` and its Python wrapper `PyAudio` to play and record audio.
Install using
```
pip install pya
```

However to play and record audio you need a backend.

- `pip install pya[remote]` for a web based Jupyter backend
- `pip install pya[pyaudio]` for `portaudio` and its Python wrapper `PyAudio`

### Using Conda

Expand Down Expand Up @@ -101,7 +109,7 @@ For Apple ARM Chip, if you failed to install the PyAudio dependency, you can fol
Try `sudo apt-get install portaudio19-dev` or equivalent to your distro, then
```
pip isntall pya
pip install pya
```
### Using PIP (Windows)
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ branches:

environment:
matrix:
- PYTHON_VERSION: 3.7
- PYTHON_VERSION: 3.10
MINICONDA: C:\Miniconda3-x64

init:
Expand All @@ -24,7 +24,7 @@ install:
- conda config --append channels conda-forge
- "conda create -q -n test-environment python=%PYTHON_VERSION% ffmpeg coverage --file=requirements_remote.txt --file=requirements_test.txt"
- activate test-environment
- "pip install -r requirements.txt"
- "pip install -r requirements.txt -r requirements_pyaudio.txt"

test_script:
- pytest
1 change: 1 addition & 0 deletions dev/generate_doc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def generate_tree(out_folder, publish=False, branches=['master', 'develop'], tag
res = os.system(f'git -C {doctree_root} checkout {target} -f' + SILENCE)
if res != 0:
raise RuntimeError(f'Could not checkout {d}. Git returned status code {res}!')
os.system(f'cp {template_folder}/../conf.py {doctree_root}/docs/')
if template_folder:
os.system(f'cp {template_folder}/* {doctree_root}/docs/_templates')

Expand Down
5 changes: 3 additions & 2 deletions examples/pya-examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
" sys.path.insert(0, cmd_folder)\n",
"\n",
"# ... the part relevant for pya usage starts here\n",
"from pya import *"
"from pya import *\n",
"from pyamapping import dbamp, ampdb"
]
},
{
Expand Down Expand Up @@ -2123,7 +2124,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.14"
"version": "3.10.7"
},
"toc": {
"base_numbering": 1,
Expand Down
69 changes: 39 additions & 30 deletions pya/amfcc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import numpy as np
from typing import Optional, Union
from warnings import warn
from .helper import next_pow2, signal_to_frame, round_half_up, magspec
from .helper import mel2hz, hz2mel, is_pow2
from .helper import basicplot

import numpy as np
from pyamapping import mel_to_hz, hz_to_mel
from scipy.signal import get_window
from scipy.fftpack import dct

from .helper import next_pow2, signal_to_frame, round_half_up, magspec
from .helper import is_pow2
from .helper import basicplot
import pya.asig
import logging

Expand Down Expand Up @@ -64,10 +68,12 @@ class Amfcc:
An array of the MFCC coeffcient, size: nframes x ncep
"""

def __init__(self, x, sr=None, label='', n_per_frame=None,
hopsize=None, nfft=None, window='hann', nfilters=26,
ncep=13, ceplifter=22, preemph=0.95,
append_energy=True, cn=None):
def __init__(self, x: Union[pya.Asig, np.ndarray], sr: Optional[int] = None,
label: str = '', n_per_frame: Optional[int] = None,
hopsize: Optional[int] = None, nfft: Optional[int] = None,
window: str = 'hann', nfilters: int = 26,
ncep: int = 13, ceplifter: int = 22, preemph: float = 0.95,
append_energy: bool = True, cn: Optional[list] = None):
"""Initialize Amfcc object
Parameters
Expand Down Expand Up @@ -103,14 +109,14 @@ def __init__(self, x, sr=None, label='', n_per_frame=None,
append_energy : bool
If true, the zeroth cepstral coefficient is replaced with the log
of the total frame energy.
cn : list
A list of channel name based on the Asig.
cn : list or None
A list of channel names, size should match the channels.
"""
# ----------Prepare attributes ------------`-------------
# First prepare for parameters
# x represent the audio signal, which can be Asig object or np.array.
self.im = None
if type(x) == pya.asig.Asig:
if isinstance(x, pya.asig.Asig):
self.sr = x.sr
self.x = x.sig
self.label = ''.join([x.label, "_mfccs"])
Expand All @@ -129,9 +135,9 @@ def __init__(self, x, sr=None, label='', n_per_frame=None,
self.duration = np.shape(x)[0] / self.sr
self.label = label
self.channels = 1 if self.x.ndim == 1 else self.x.shape[1]
self.cn = None
self.cn = cn
else:
msg = "x can only be either a numpy.ndarray or pya.Asig object."
msg = "x can only be either a numpy.ndarray or Asig object."
raise TypeError(msg)

# default 25ms length window.
Expand Down Expand Up @@ -211,7 +217,7 @@ def __repr__(self):
return f"Amfcc({self.label}): sr {self.sr}, length: {self.duration} s"

@staticmethod
def preemphasis(x, coeff=0.97):
def preemphasis(x: np.ndarray, coeff: float = 0.97):
"""Pre-emphasis filter to whiten the spectrum.
Pre-emphasis is a way of compensating for the
rapid decaying spectrum of speech.
Expand All @@ -233,7 +239,8 @@ def preemphasis(x, coeff=0.97):
return np.append(x[0], x[1:] - coeff * x[:-1])

@staticmethod
def mel_filterbanks(sr, nfilters=26, nfft=512, lowfreq=0, highfreq=None):
def mel_filterbanks(sr: int, nfilters: int = 26, nfft: int = 512,
lowfreq: float = 0, highfreq: Optional[float] = None):
"""Compute a Mel-filterbank. The filters are stored in the rows,
the columns correspond to fft bins. The filters are returned as
an array of size nfilt * (nfft/2 + 1)
Expand All @@ -246,9 +253,9 @@ def mel_filterbanks(sr, nfilters=26, nfft=512, lowfreq=0, highfreq=None):
The number of filters, default 20
nfft : int
The size of FFT, default 512
lowfreq : int or float
lowfreq : float
The lowest band edge of the mel filters, default 0 Hz
highfreq : int or float
highfreq : float
The highest band edge of the mel filters, default sr // 2
Returns
Expand All @@ -260,12 +267,12 @@ def mel_filterbanks(sr, nfilters=26, nfft=512, lowfreq=0, highfreq=None):
highfreq = highfreq or sr // 2

# compute points evenly spaced in mels
lowmel = hz2mel(lowfreq)
highmel = hz2mel(highfreq)
lowmel = hz_to_mel(lowfreq)
highmel = hz_to_mel(highfreq)
melpoints = np.linspace(lowmel, highmel, nfilters + 2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = np.floor((nfft + 1) * mel2hz(melpoints) / sr)
bin = np.floor((nfft + 1) * mel_to_hz(melpoints) / sr)

filter_banks = np.zeros([nfilters, nfft // 2 + 1])
for j in range(0, nfilters):
Expand All @@ -276,7 +283,7 @@ def mel_filterbanks(sr, nfilters=26, nfft=512, lowfreq=0, highfreq=None):
return filter_banks

@staticmethod
def lifter(cepstra, L=22):
def lifter(cepstra: np.ndarray, L: int = 22):
"""Apply a cepstral lifter the the matrix of cepstra.
This has the effect of increasing the magnitude of
the high frequency DCT coeffs.
Expand Down Expand Up @@ -315,21 +322,23 @@ def lifter(cepstra, L=22):
# values of L <= 0, do nothing
return cepstra

def plot(self, cmap='inferno', show_bar=True,
offset=0, scale=1., xlim=None, ylim=None,
x_as_time=True, nxlabel=8, ax=None, **kwargs):
def plot(self, show_bar: bool = True, offset: int = 0, scale: float = 1.,
xlim: Optional[float] = None, ylim: Optional[float] = None,
x_as_time: bool = True, nxlabel: int = 8, ax=None, **kwargs):
"""Plot Amfcc.features via matshow, x is frames/time, y is the MFCCs
Parameters
----------
figsize : (float, float), optional, default: None
Figure size, width, height in inches, Default = [6.4, 4.8]
cmap : str
colormap for matplotlib. Default is 'inferno'.
show_bar : bool, optional
Default is True, show colorbar.
x_as_time : bool, optional
Default is True, show x axis as time or sample index.
offset: int
It is the spacing between channel, without setting it every channel will be overlayed onto each other.
scale: float
Visual scaling for improve visibility
xlim: float, optional
x axis value range limit
ylim: float, optional
y axis value range limit
nxlabel : int, optional
The amountt of labels on the x axis. Default is 8 .
"""
Expand Down
40 changes: 26 additions & 14 deletions pya/arecorder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Arecorder class
import time
import logging
import numbers
from warnings import warn
import time
from typing import Optional, Union

import numpy as np
import pyaudio
from . import Asig
from . import Aserver
from .helper import dbamp
from pyamapping import db_to_amp


_LOGGER = logging.getLogger(__name__)
Expand All @@ -30,19 +30,33 @@ class Arecorder(Aserver):
[Asig(''): ... x ... @ 44100Hz = ...
"""

def __init__(self, sr=44100, bs=256, device=None, channels=None, backend=None, **kwargs):
def __init__(self, sr: int = 44100, bs: int = 256, device: Optional[int] = None,
channels: Optional[int] = None, backend=None, **kwargs):
super().__init__(sr=sr, bs=bs, device=device,
backend=backend, **kwargs)
self.record_buffer = []
self.recordings = [] # store recorded Asigs, time stamp in label
self._recording = False
self._record_all = True
self.gains = np.ones(self.channels)
self.tracks = slice(None)
self._device = device or self.backend.get_default_input_device_info()['index']
self.channels = channels or self.backend.get_device_info_by_index(self._device)['maxInputChannels']
self._device = self.backend.get_default_input_device_info()['index'] if device is None else device
self._channels = channels or self.max_in_chn
self.gains = np.ones(self._channels)

@property
def channels(self):
return self._channels

@channels.setter
def channels(self, val: int):
"""
Set the number of channels. Aserver needs reboot.
"""
if val > self.max_in_chn:
raise ValueError(f"AServer: channels {val} > max {self.max_in_chn}")
self._channels = val

def set_tracks(self, tracks, gains):
def set_tracks(self, tracks: Union[list, np.ndarray], gains: Union[list, np.ndarray]):
"""Define the number of track to be recorded and their gains.
parameters
Expand All @@ -58,10 +72,10 @@ def set_tracks(self, tracks, gains):
elif len(tracks) > self.channels or len(gains) > self.channels:
raise AttributeError("argument cannot be larger than channels.")
self.tracks = tracks
self.gains = np.array([dbamp(g) for g in gains], dtype="float32")
self.gains = np.array([db_to_amp(g) for g in gains], dtype="float32")
elif isinstance(tracks, numbers.Number) and isinstance(gains, numbers.Number):
self.tracks = [tracks]
self.gains = dbamp(gains)
self.gains = db_to_amp(gains)
else:
raise TypeError("Arguments need to be both list or both number.")

Expand All @@ -70,8 +84,6 @@ def reset(self):
self.gains = np.ones(self.channels)

def boot(self):
"""boot recorder"""
# when input = True, the channels refers to the input channels.
self.boot_time = time.time()
self.block_time = self.boot_time
# self.block_cnt = 0
Expand All @@ -95,7 +107,7 @@ def _recorder_callback(self, in_data, frame_count, time_info, flag):
self.record_buffer.append(data_float)
# E = 10 * np.log10(np.mean(data_float ** 2)) # energy in dB
# os.write(1, f"\r{E} | {self.block_cnt}".encode())
return None, pyaudio.paContinue
return self.backend.process_buffer(None)

def record(self):
"""Activate recording"""
Expand Down
Loading

0 comments on commit 2ec364c

Please sign in to comment.