-
Notifications
You must be signed in to change notification settings - Fork 0
/
MDCT.py
142 lines (113 loc) · 3.74 KB
/
MDCT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
from scipy.signal.windows import kaiser
def mdct4_mono(x): # alpha is the window shape parameter for Kaiser-Bessel Derived window
"""
Perform MDCT (Modified Discrete Cosine Transform) on mono audio signal.
Parameters:
x (np.ndarray): Mono audio signal as a 1D NumPy array. The length must be a multiple of 4.
Returns:
np.ndarray: MDCT coefficients.
"""
if len(x) % 4 != 0:
raise ValueError("MDCT requires the input length to be a multiple of 4.")
N = len(x)
M = N // 2
N4 = N // 4
# Apply pre-rotation
rot = np.roll(x, N4)
rot[:N4] = -rot[:N4]
# Compute MDCT
t = np.arange(0, N4)
w = np.exp(-1j * 2 * np.pi * (t + 1. / 8.) / N)
c = np.take(rot, 2 * t) - np.take(rot, N - 2 * t - 1) \
- 1j * (np.take(rot, M + 2 * t) - np.take(rot, M - 2 * t - 1))
c = (2. / np.sqrt(N)) * w * np.fft.fft(0.5 * c * w, N4)
# Pack coefficients
y = np.zeros(M)
y[2 * t] = np.real(c[t])
y[M - 2 * t - 1] = -np.imag(c[t])
return y
def imdct4_mono(x):
"""
Perform IMDCT (Inverse Modified Discrete Cosine Transform) on MDCT coefficients.
Parameters:
x (np.ndarray): MDCT coefficients as a 1D NumPy array. The length must be even.
Returns:
np.ndarray: Reconstructed mono audio signal.
"""
if len(x) % 2 != 0:
raise ValueError("IMDCT requires the input length to be even.")
N = len(x)
M = N // 2
N2 = N * 2
# Compute IMDCT
t = np.arange(0, M)
w = np.exp(-1j * 2 * np.pi * (t + 1. / 8.) / N2)
c = np.take(x, 2 * t) + 1j * np.take(x, N - 2 * t - 1)
c = 0.5 * w * c
c = np.fft.fft(c, M)
c = ((8 / np.sqrt(N2)) * w) * c
# Prepare rotated output
rot = np.zeros(N2)
rot[2 * t] = np.real(c[t])
rot[N + 2 * t] = np.imag(c[t])
t = np.arange(1, N2, 2)
rot[t] = -rot[N2 - t - 1]
# Post-rotation
y = np.zeros(N2)
t = np.arange(0, 3 * M)
y[t] = rot[t + M]
t = np.arange(3 * M, N2)
y[t] = -rot[t - 3 * M]
return y
def mdct4(x):
N = x.shape[0]
print(N)
if N % 4 != 0:
raise ValueError("MDCT4 only defined for vectors of length multiple of four.")
M = N // 2
N4 = N // 4
rot = np.roll(x, N4)
rot[:N4] = -rot[:N4]
t = np.arange(0, N4)
w = np.exp(-1j * 2 * np.pi * (t + 1. / 8.) / N)
c = np.take(rot, 2 * t) - np.take(rot, N - 2 * t - 1) - 1j * (np.take(rot, M + 2 * t) - np.take(rot, M - 2 * t - 1))
c = (2. / np.sqrt(N)) * w * np.fft.fft(0.5 * c * w, N4)
y = np.zeros(M)
y[2 * t] = np.real(c[t])
y[M - 2 * t - 1] = -np.imag(c[t])
return y
def imdct4(x):
N = x.shape[0]
if N % 2 != 0:
raise ValueError("iMDCT4 only defined for even-length vectors.")
M = N // 2
N2 = N * 2
t = np.arange(0, M)
w = np.exp(-1j * 2 * np.pi * (t + 1. / 8.) / N2)
c = np.take(x, 2 * t) + 1j * np.take(x, N - 2 * t - 1)
c = 0.5 * w * c
c = np.fft.fft(c, M)
c = ((8 / np.sqrt(N2)) * w) * c
rot = np.zeros(N2)
rot[2 * t] = np.real(c[t])
rot[N + 2 * t] = np.imag(c[t])
t = np.arange(1, N2, 2)
rot[t] = -rot[N2 - t - 1]
t = np.arange(0, 3 * M)
y = np.zeros(N2)
y[t] = rot[t + M]
t = np.arange(3 * M, N2)
y[t] = -rot[t - 3 * M]
return y
def mdct(x):
N = len(x)
M = N // 2
# Perform FFT, skipping the cosine window step
return np.fft.rfft(x)[1:M+1] # Return the positive frequencies
def imdct(X):
M = len(X)
N = M * 2
# Reconstruct the signal by applying inverse FFT without the window
x_reconstructed = np.fft.irfft(np.hstack(([0], X, [0]))) # Apply inverse FFT with zero padding
return x_reconstructed[:N] # Return the full signal without windowing