Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech directivity #317

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
Speech directivity
  • Loading branch information
pkendrick committed Jun 4, 2023
commit 771dc040e4c398b83476297aacb6708d8751b5f1
2 changes: 1 addition & 1 deletion pyroomacoustics/acoustics.py
Original file line number Diff line number Diff line change
@@ -415,7 +415,7 @@ def bands_hz2s(bands_hz, Fs, N, transform="dft"):
else:
j += 1

return np.array(bands_s, dtype=np.int)
return np.array(bands_s, dtype=int)


def melscale(f):
6 changes: 3 additions & 3 deletions pyroomacoustics/beamforming.py
Original file line number Diff line number Diff line change
@@ -450,7 +450,7 @@ def record(self, signals, fs):
else:
self.signals = signals

def to_wav(self, filename, mono=False, norm=False, bitdepth=np.float):
def to_wav(self, filename, mono=False, norm=False, bitdepth=float):
"""
Save all the signals to wav files.

@@ -475,7 +475,7 @@ def to_wav(self, filename, mono=False, norm=False, bitdepth=np.float):
else:
signal = self.signals.T # each column is a channel

float_types = [float, np.float, np.float32, np.float64]
float_types = [float, np.float32, np.float64]

if bitdepth in float_types:
bits = None
@@ -858,7 +858,7 @@ def plot_beam_response(self):
f_0 = np.floor(self.fs / 8000.0)
for i in np.arange(1, 5):
yticks[i - 1] = np.argmin(np.abs(freq - 1000.0 * i * f_0))
# yticks = np.array(plt.getp(plt.gca(), 'yticks'), dtype=np.int)
# yticks = np.array(plt.getp(plt.gca(), 'yticks'), dtype=int)
plt.setp(plt.gca(), "yticks", yticks)
plt.setp(plt.gca(), "yticklabels", np.arange(1, 5) * f_0)

133 changes: 123 additions & 10 deletions pyroomacoustics/directivities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import abc
import os
import warnings
from pathlib import Path
import numpy as np
@@ -315,11 +316,11 @@ class SpeechDirectivity(Directivity):
"""
def __init__(self, orientation, pattern_name="running_speech", gain=1.0):
Directivity.__init__(self, orientation)
datafile = Path("./data/speech_directivity.json")
datafile = os.path.join(os.path.dirname(__file__), "data/speech_directivity.json")
with open(datafile, "r") as f:
speech_data = json.load(f)
self.centre_freqs = np.array(speech_data["centre_freqs"])
self.angles = np.array(speech_data["angles"])
self.pattern_angles = np.array(speech_data["angles"])
self.speech_data = np.array(speech_data["data"])
self._gain = gain
self._pattern_name = pattern_name
@@ -330,7 +331,7 @@ def directivity_pattern(self):
return self._pattern_name

def get_response(
self, azimuth, colatitude=None, magnitude=True, frequency=None, degrees=True
self, azimuth, colatitude=None, magnitude=True, frequency=None, degrees=True,
):
"""
Get response for provided angles.
@@ -353,22 +354,134 @@ def get_response(
resp : :py:class:`~numpy.ndarray`
Response at provided angles.
"""
if not degrees:
raise RuntimeError("SpeechDirectivity requires angles in degrees.")
if degrees:
azimuth = np.radians(azimuth)
degrees = False
if frequency is None:
raise RuntimeError("SpeechDirectivity requires frequency input.")
raise RuntimeError("SpeechDirectivity requires a frequency input.")
if not magnitude:
warnings.warn("SpeechDirectivity does not return phase response, magnitiude only.")
if colatitude is not None:
if colatitude is None:
warnings.warn("SpeechDirectivity assumes X-Y plane only.")
# find the freq closest to the avaiable 1/3rd octave band
freq_ind = np.argmin(np.abs(frequency - self.centre_freqs))
azimuth = wrap_degrees(azimuth)
azimuth = np.abs(azimuth) # speech response is mirroed
directivity_at_angle_db = np.interp(self.angles, self.speech_data[freq_ind, :], azimuth)

# get coords on the unit sphere for the input azimuth / colatitude
coord = spher2cart(azimuth=azimuth, colatitude=colatitude, degrees=False)
# use the internal orientation to project the coords of the input angles onto the appropriate direction
cos_theta = np.matmul(self._orientation.unit_vector, coord)
# extract the angle(s) we are computing the gains for
directivity_angle = np.arccos(cos_theta) # speech response is mirrored so angle sign doesnt matter
directivity_angle_deg = directivity_angle / np.pi * 180
directivity_angle_deg = wrap_degrees(directivity_angle_deg)
directivity_angle_deg = np.abs(directivity_angle_deg) # speech response is mirrored
# linear interpolation to estimate gain at requested angle(s)
if isinstance(directivity_angle_deg, (list, np.ndarray)):
directivity_at_angle_db = np.array([np.interp(angle, self.pattern_angles, self.speech_data[freq_ind, :]) for angle in directivity_angle_deg])
else:
directivity_at_angle_db = np.interp(directivity_angle_deg, self.pattern_angles, self.speech_data[freq_ind, :])
directivity_at_angle = np.power(10, directivity_at_angle_db / 20)
return directivity_at_angle

@requires_matplotlib
def plot_response(
self, azimuth, colatitude=None, degrees=True, ax=None, offset=None, frequency=1000
):
"""
Plot directivity response at specified angles.

Parameters
----------
azimuth : array_like
Azimuth values for plotting.
colatitude : array_like, optional
Colatitude values for plotting. If not provided, 2D plot.
degrees : bool
Whether provided values are in degrees (True) or radians (False).
ax : axes object
offset : list
3-D coordinates of the point where the response needs to be plotted.

Return
------
ax : :py:class:`~matplotlib.axes.Axes`
"""
import matplotlib.pyplot as plt

if offset is not None:
x_offset = offset[0]
y_offset = offset[1]
else:
x_offset = 0
y_offset = 0

if degrees:
azimuth = np.radians(azimuth)

if colatitude is not None:

if degrees:
colatitude = np.radians(colatitude)

if ax is None:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection="3d")

if offset is not None:
z_offset = offset[2]
else:
z_offset = 0

spher_coord = all_combinations(azimuth, colatitude)
azi_flat = spher_coord[:, 0]
col_flat = spher_coord[:, 1]

# compute response
resp = self.get_response(
azimuth=azi_flat, colatitude=col_flat, magnitude=True, degrees=False, frequency=frequency,
)
RESP = resp.reshape(len(azimuth), len(colatitude))

# create surface plot, need cartesian coordinates
AZI, COL = np.meshgrid(azimuth, colatitude)
X = RESP.T * np.sin(COL) * np.cos(AZI) + x_offset
Y = RESP.T * np.sin(COL) * np.sin(AZI) + y_offset
Z = RESP.T * np.cos(COL) + z_offset

ax.plot_surface(X, Y, Z)

if ax is None:
ax.set_title(
"{}, azimuth={}, colatitude={}".format(
self.directivity_pattern,
self.get_azimuth(),
self.get_colatitude(),
)
)
else:
ax.set_title("Directivity Plot")

ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")

else:

if ax is None:
fig = plt.figure()
ax = plt.subplot(111)

# compute response
resp = self.get_response(azimuth=azimuth, magnitude=True, degrees=False, frequency=frequency)
RESP = resp

# create surface plot, need cartesian coordinates
X = RESP.T * np.cos(azimuth) + x_offset
Y = RESP.T * np.sin(azimuth) + y_offset
ax.plot(X, Y)

return ax

def cardioid_func(x, direction, coef, gain=1.0, normalize=True, magnitude=False):
"""
One-shot function for computing cardioid response.
9 changes: 4 additions & 5 deletions pyroomacoustics/doa/doa.py
Original file line number Diff line number Diff line change
@@ -103,7 +103,6 @@ def __getitem__(self, ref):
# we use this to test if an integer is passed
integer = (
int,
np.int,
np.int16,
np.int32,
np.int64,
@@ -212,7 +211,7 @@ def __init__(

self.num_src = self._check_num_src(num_src)
self.sources = np.zeros([self.D, self.num_src])
self.src_idx = np.zeros(self.num_src, dtype=np.int)
self.src_idx = np.zeros(self.num_src, dtype=int)
self.azimuth_recon = None
self.colatitude_recon = None
self.alpha_recon = None
@@ -330,7 +329,7 @@ def locate_sources(
if num_src is not None and num_src != self.num_src:
self.num_src = self._check_num_src(num_src)
self.sources = np.zeros([self.num_src, self.D])
self.src_idx = np.zeros(self.num_src, dtype=np.int)
self.src_idx = np.zeros(self.num_src, dtype=int)
self.angle_of_arrival = None
if X.shape[0] != self.M:
raise ValueError(
@@ -343,12 +342,12 @@ def locate_sources(

# frequency bins on which to apply DOA
if freq_bins is not None:
self.freq_bins = np.array(freq_bins, dtype=np.int)
self.freq_bins = np.array(freq_bins, dtype=int)
elif freq_hz is not None:
self.freq_bins = [int(np.round(f / self.fs * self.nfft)) for f in freq_bins]
else:
freq_range = [int(np.round(f / self.fs * self.nfft)) for f in freq_range]
self.freq_bins = np.arange(freq_range[0], freq_range[1], dtype=np.int)
self.freq_bins = np.arange(freq_range[0], freq_range[1], dtype=int)

self.freq_bins = self.freq_bins[self.freq_bins < self.max_bin]
self.freq_bins = self.freq_bins[self.freq_bins >= 0]
8 changes: 4 additions & 4 deletions pyroomacoustics/doa/tools_fri_doa_plane.py
Original file line number Diff line number Diff line change
@@ -296,13 +296,13 @@ def output_shrink(K, L):
"""
out_len = L - K
if out_len % 2 == 0:
half_out_len = np.int(out_len / 2.0)
half_out_len = int(out_len / 2.0)
mtx_r = np.hstack(
(np.eye(half_out_len), np.zeros((half_out_len, half_out_len)))
)
mtx_i = mtx_r
else:
half_out_len = np.int((out_len + 1) / 2.0)
half_out_len = int((out_len + 1) / 2.0)
mtx_r = np.hstack(
(np.eye(half_out_len), np.zeros((half_out_len, half_out_len - 1)))
)
@@ -322,11 +322,11 @@ def coef_expan_mtx(K):
number of Dirac. The filter size is K + 1
"""
if K % 2 == 0:
D0 = np.eye(np.int(K / 2.0 + 1))
D0 = np.eye(int(K / 2.0 + 1))
D1 = np.vstack((D0, D0[1:, ::-1]))
D2 = np.vstack((D0, -D0[1:, ::-1]))[:, :-1]
else:
D0 = np.eye(np.int((K + 1) / 2.0))
D0 = np.eye(int((K + 1) / 2.0))
D1 = np.vstack((D0, D0[:, ::-1]))
D2 = np.vstack((D0, -D0[:, ::-1]))
return D1, D2
18 changes: 8 additions & 10 deletions pyroomacoustics/room.py
Original file line number Diff line number Diff line change
@@ -584,7 +584,7 @@ def callback_mix(premix, snr=0, sir=0, ref_mic=0, n_src=None, n_tgt=None):
from . import libroom
from .acoustics import OctaveBandsFactory, rt60_eyring, rt60_sabine
from .beamforming import MicrophoneArray
from .directivities import CardioidFamily, source_angle_shoebox
from .directivities import CardioidFamily, source_angle_shoebox, SpeechDirectivity
from .experimental import measure_rt60
from .libroom import Wall, Wall2D
from .parameters import Material, Physics, constants, eps, make_materials
@@ -633,7 +633,7 @@ def sequence_generation(volume, duration, c, fs, max_rate=10000):
times.append(times[-1] + dt)

# convert from continuous to discrete time
indices = (np.array(times) * fs).astype(np.int)
indices = (np.array(times) * fs).astype(int)
seq = np.zeros(indices[-1] + 1)
seq[indices] = np.random.choice([1, -1], size=len(indices))

@@ -1892,13 +1892,13 @@ def add_source(self, position, signal=None, delay=0, directivity=None):

if isinstance(position, SoundSource):
if directivity is not None:
assert isinstance(directivity, CardioidFamily)
assert isinstance(directivity, (CardioidFamily, SpeechDirectivity))
return self.add(SoundSource(position, directivity=directivity))
else:
return self.add(position)
else:
if directivity is not None:
assert isinstance(directivity, CardioidFamily)
assert isinstance(directivity, (CardioidFamily, SpeechDirectivity))
return self.add(
SoundSource(
position, signal=signal, delay=delay, directivity=directivity
@@ -2061,9 +2061,9 @@ def compute_rir(self):
# Do band-wise RIR construction
is_multi_band = self.is_multi_band
bws = self.octave_bands.get_bw() if is_multi_band else [self.fs / 2]
centre_freqs = self.octave_bands.centers if is_multi_band else [self.fs / 4]
rir_bands = []

for b, bw in enumerate(bws):
for b, (bw, centre_freq) in enumerate(zip(bws, centre_freqs)):

ir_loc = np.zeros_like(ir)

@@ -2077,18 +2077,17 @@ def compute_rir(self):
alpha *= self.mic_array.directivity[m].get_response(
azimuth=azimuth,
colatitude=colatitude,
frequency=bw,
frequency=centre_freq,
degrees=False,
)

if self.sources[s].directivity is not None:
alpha *= self.sources[s].directivity.get_response(
azimuth=azimuth_s,
colatitude=colatitude_s,
frequency=bw,
frequency=centre_freq,
degrees=False,
)

# Use the Cython extension for the fractional delays
from .build_rir import fast_rir_builder

@@ -2102,7 +2101,6 @@ def compute_rir(self):

if is_multi_band:
ir_loc = self.octave_bands.analysis(ir_loc, band=b)

ir += ir_loc

# Ray Tracing
2 changes: 1 addition & 1 deletion pyroomacoustics/utilities.py
Original file line number Diff line number Diff line change
@@ -598,7 +598,7 @@ def fractional_delay_filter_bank(delays):
bank_flat = np.zeros(N * filter_length)

# separate delays in integer and fractional parts
di = np.floor(delays).astype(np.int)
di = np.floor(delays).astype(int)
df = delays - di

# broadcasting tricks to compute at once all the locations
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -177,7 +177,7 @@ def build_extensions(self):
# Libroom C extension
ext_modules=ext_modules,
# Necessary to keep the source files
package_data={"pyroomacoustics": ["*.pxd", "*.pyx", "data/materials.json"]},
package_data={"pyroomacoustics": ["*.pxd", "*.pyx", "data/materials.json", "data/speech_directivity.json" ]},
install_requires=[
"Cython",
"numpy",