-
Notifications
You must be signed in to change notification settings - Fork 443
/
features.py
executable file
·161 lines (118 loc) · 5.62 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
# FMA: A Dataset For Music Analysis
# Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson, EPFL LTS2.
# All features are extracted using [librosa](https://github.com/librosa/librosa).
# Alternatives:
# * [Essentia](http://essentia.upf.edu) (C++ with Python bindings)
# * [MARSYAS](https://github.com/marsyas/marsyas) (C++ with Python bindings)
# * [RP extract](http://www.ifs.tuwien.ac.at/mir/downloads.html) (Matlab, Java, Python)
# * [jMIR jAudio](http://jmir.sourceforge.net) (Java)
# * [MIRtoolbox](https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/mirtoolbox) (Matlab)
import os
import multiprocessing
import warnings
import numpy as np
from scipy import stats
import pandas as pd
import librosa
from tqdm import tqdm
import utils
def columns():
feature_sizes = dict(chroma_stft=12, chroma_cqt=12, chroma_cens=12,
tonnetz=6, mfcc=20, rmse=1, zcr=1,
spectral_centroid=1, spectral_bandwidth=1,
spectral_contrast=7, spectral_rolloff=1)
moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')
columns = []
for name, size in feature_sizes.items():
for moment in moments:
it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
columns.extend(it)
names = ('feature', 'statistics', 'number')
columns = pd.MultiIndex.from_tuples(columns, names=names)
# More efficient to slice if indexes are sorted.
return columns.sort_values()
def compute_features(tid):
features = pd.Series(index=columns(), dtype=np.float32, name=tid)
# Catch warnings as exceptions (audioread leaks file descriptors).
warnings.filterwarnings('error', module='librosa')
def feature_stats(name, values):
features[name, 'mean'] = np.mean(values, axis=1)
features[name, 'std'] = np.std(values, axis=1)
features[name, 'skew'] = stats.skew(values, axis=1)
features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
features[name, 'median'] = np.median(values, axis=1)
features[name, 'min'] = np.min(values, axis=1)
features[name, 'max'] = np.max(values, axis=1)
try:
filepath = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid)
x, sr = librosa.load(filepath, sr=None, mono=True) # kaiser_fast
f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
feature_stats('zcr', f)
cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
n_bins=7*12, tuning=None))
assert cqt.shape[0] == 7 * 12
assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1
f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
feature_stats('chroma_cqt', f)
f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
feature_stats('chroma_cens', f)
f = librosa.feature.tonnetz(chroma=f)
feature_stats('tonnetz', f)
del cqt
stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
assert stft.shape[0] == 1 + 2048 // 2
assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
del x
f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
feature_stats('chroma_stft', f)
f = librosa.feature.rmse(S=stft)
feature_stats('rmse', f)
f = librosa.feature.spectral_centroid(S=stft)
feature_stats('spectral_centroid', f)
f = librosa.feature.spectral_bandwidth(S=stft)
feature_stats('spectral_bandwidth', f)
f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
feature_stats('spectral_contrast', f)
f = librosa.feature.spectral_rolloff(S=stft)
feature_stats('spectral_rolloff', f)
mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
del stft
f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
feature_stats('mfcc', f)
except Exception as e:
print('{}: {}'.format(tid, repr(e)))
return features
def main():
tracks = utils.load('tracks.csv')
features = pd.DataFrame(index=tracks.index,
columns=columns(), dtype=np.float32)
# More than usable CPUs to be CPU bound, not I/O bound. Beware memory.
nb_workers = int(1.5 * len(os.sched_getaffinity(0)))
# Longest is ~11,000 seconds. Limit processes to avoid memory errors.
table = ((5000, 1), (3000, 3), (2000, 5), (1000, 10), (0, nb_workers))
for duration, nb_workers in table:
print('Working with {} processes.'.format(nb_workers))
tids = tracks[tracks['track', 'duration'] >= duration].index
tracks.drop(tids, axis=0, inplace=True)
pool = multiprocessing.Pool(nb_workers)
it = pool.imap_unordered(compute_features, tids)
for i, row in enumerate(tqdm(it, total=len(tids))):
features.loc[row.name] = row
if i % 1000 == 0:
save(features, 10)
save(features, 10)
test(features, 10)
def save(features, ndigits):
# Should be done already, just to be sure.
features.sort_index(axis=0, inplace=True)
features.sort_index(axis=1, inplace=True)
features.to_csv('features.csv', float_format='%.{}e'.format(ndigits))
def test(features, ndigits):
indices = features[features.isnull().any(axis=1)].index
if len(indices) > 0:
print('Failed tracks: {}'.format(', '.join(str(i) for i in indices)))
tmp = utils.load('features.csv')
np.testing.assert_allclose(tmp.values, features.values, rtol=10**-ndigits)
if __name__ == "__main__":
main()