-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cf0ed24
commit 1010f29
Showing
5 changed files
with
101 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import tempfile | ||
import av | ||
|
||
|
||
response_format_to_encoder_decoder_map = { | ||
"mp3": "libmp3lame", | ||
"opus": "libopus", | ||
"aac": "aac", | ||
"flac": "flac", | ||
"wav": "pcm_s16le", | ||
"pcm": "pcm_s16le", | ||
} | ||
|
||
response_format_to_suffix_map = { | ||
"mp3": ".mp3", | ||
"opus": ".ogg", | ||
"aac": ".aac", | ||
"flac": ".flac", | ||
"wav": ".wav", | ||
"pcm": ".pcm", | ||
} | ||
|
||
|
||
def convert( | ||
input_file_path: str, | ||
response_format: str, | ||
speed: float = 1, | ||
) -> str: | ||
suffix = response_format_to_suffix_map.get(response_format) | ||
with tempfile.NamedTemporaryFile( | ||
suffix=f"{suffix}", delete=False | ||
) as output_temp_file: | ||
output_file_path = output_temp_file.name | ||
input_container = av.open(input_file_path) | ||
input_stream = input_container.streams.audio[0] | ||
|
||
if response_format == "pcm": | ||
convert_to_pcm(input_stream, output_file_path, speed) | ||
else: | ||
convert_to_format(input_stream, output_file_path, response_format, speed) | ||
|
||
input_container.close() | ||
return output_file_path | ||
|
||
|
||
def convert_to_pcm(input_stream, output_file_path: str, speed: float): | ||
# Bare PCM data should not have any container structure, need to ensure the output is purely raw audio data stream. | ||
with open(output_file_path, "wb") as output_file: | ||
resampler = av.AudioResampler( | ||
format="s16", # 16-bit PCM | ||
layout=input_stream.layout, | ||
rate=int(input_stream.rate * speed), | ||
) | ||
|
||
for frame in input_stream.container.decode(input_stream): | ||
frame.pts = None # Reset PTS to avoid issues with frame timing | ||
resampled_frames = resampler.resample(frame) | ||
for resampled_frame in resampled_frames: | ||
# convert the audio frame into a NumPy array. The array format is usually (samples, channels), | ||
# where 'samples' is the number of sample points per frame, and 'channels' is the number of channels (e.g., stereo has 2 channels, mono has 1). | ||
pcm_data = resampled_frame.to_ndarray() | ||
# convert the NumPy array into a byte stream, then written to the file to generate raw PCM data. | ||
output_file.write(pcm_data.tobytes()) | ||
|
||
|
||
def convert_to_format( | ||
input_stream, output_file_path: str, response_format: str, speed: float | ||
): | ||
output_container = av.open(output_file_path, mode="w") | ||
output_stream = output_container.add_stream( | ||
codec_name=response_format_to_encoder_decoder_map.get(response_format), | ||
rate=int(input_stream.rate * speed), | ||
channels=input_stream.channels, | ||
) | ||
|
||
resampler = av.AudioResampler( | ||
format=output_stream.format, | ||
layout=output_stream.layout, | ||
rate=output_stream.rate, | ||
) | ||
|
||
for frame in input_stream.container.decode(input_stream): | ||
# Reset PTS to avoid issues with frame timing | ||
frame.pts = None | ||
frames = resampler.resample(frame) | ||
for resampled_frame in frames: | ||
for packet in output_stream.encode(resampled_frame): | ||
output_container.mux(packet) | ||
|
||
# Flush encoder | ||
for packet in output_stream.encode(): | ||
output_container.mux(packet) | ||
|
||
output_container.close() |
This file was deleted.
Oops, something went wrong.