diff --git a/.editorconfig b/.editorconfig index 5d0905a..000ca64 100644 --- a/.editorconfig +++ b/.editorconfig @@ -3,8 +3,11 @@ root = true [*] indent_style = space -indent_size = 2 +indent_size = 4 end_of_line = lf charset = utf-8 trim_trailing_whitespace = false insert_final_newline = false + +[*.{yml,yaml}] +indent_size = 2 diff --git a/ffmpeg_normalize/__main__.py b/ffmpeg_normalize/__main__.py index 159fdde..d747064 100644 --- a/ffmpeg_normalize/__main__.py +++ b/ffmpeg_normalize/__main__.py @@ -235,6 +235,20 @@ def create_parser() -> argparse.ArgumentParser: default=0.0, ) + group_ebu.add_argument( + "--lower-only", + action="store_true", + help=textwrap.dedent( + """\ + Whether the audio should not increase in loudness. + + If the measured loudness from the first pass is lower than the target + loudness then normalization pass will be skipped for the measured audio + source. + """ + ), + ) + group_ebu.add_argument( "--dual-mono", action="store_true", @@ -514,6 +528,7 @@ def _split_options(opts: str) -> list[str]: keep_lra_above_loudness_range_target=cli_args.keep_lra_above_loudness_range_target, true_peak=cli_args.true_peak, offset=cli_args.offset, + lower_only=cli_args.lower_only, dual_mono=cli_args.dual_mono, dynamic=cli_args.dynamic, audio_codec=cli_args.audio_codec, diff --git a/ffmpeg_normalize/_ffmpeg_normalize.py b/ffmpeg_normalize/_ffmpeg_normalize.py index a1d3caa..67ae5f8 100644 --- a/ffmpeg_normalize/_ffmpeg_normalize.py +++ b/ffmpeg_normalize/_ffmpeg_normalize.py @@ -3,6 +3,8 @@ import json import logging import os +import sys +from itertools import chain from typing import TYPE_CHECKING, Literal from tqdm import tqdm @@ -58,6 +60,7 @@ class FFmpegNormalize: keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False. true_peak (float, optional): True peak. Defaults to -2.0. offset (float, optional): Offset. Defaults to 0.0. + lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False. dual_mono (bool, optional): Dual mono. Defaults to False. dynamic (bool, optional): Dynamic. Defaults to False. audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le". @@ -94,6 +97,7 @@ def __init__( keep_lra_above_loudness_range_target: bool = False, true_peak: float = -2.0, offset: float = 0.0, + lower_only: bool = False, dual_mono: bool = False, dynamic: bool = False, audio_codec: str = "pcm_s16le", @@ -164,6 +168,7 @@ def __init__( self.true_peak = check_range(true_peak, -9, 0, name="true_peak") self.offset = check_range(offset, -99, 99, name="offset") + self.lower_only = lower_only # Ensure library user is passing correct types assert isinstance(dual_mono, bool), "dual_mono must be bool" @@ -254,5 +259,6 @@ def run_normalization(self) -> None: _logger.info(f"Normalized file written to {media_file.output_file}") - if self.print_stats and self.stats: - print(json.dumps(self.stats, indent=4)) + if self.print_stats: + json.dump(list(chain.from_iterable(media_file.get_stats() for media_file in self.media_files)), sys.stdout, indent=4) + print() diff --git a/ffmpeg_normalize/_media_file.py b/ffmpeg_normalize/_media_file.py index 279150b..5e3ddf7 100644 --- a/ffmpeg_normalize/_media_file.py +++ b/ffmpeg_normalize/_media_file.py @@ -6,13 +6,13 @@ import shlex from shutil import move, rmtree from tempfile import mkdtemp -from typing import TYPE_CHECKING, Iterator, Literal, TypedDict +from typing import TYPE_CHECKING, Iterable, Iterator, Literal, TypedDict from tqdm import tqdm from ._cmd_utils import DUR_REGEX, NUL, CommandRunner from ._errors import FFmpegNormalizeError -from ._streams import AudioStream, SubtitleStream, VideoStream +from ._streams import AudioStream, SubtitleStream, VideoStream, LoudnessStatisticsWithMetadata if TYPE_CHECKING: from ffmpeg_normalize import FFmpegNormalize @@ -240,11 +240,6 @@ def _first_pass(self) -> None: for _ in fun(): pass - # set initial stats (for dry-runs, this is the only thing we need to do) - self.ffmpeg_normalize.stats = [ - audio_stream.get_stats() for audio_stream in self.streams["audio"].values() - ] - def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: """ Return the audio filter command and output labels needed. @@ -256,10 +251,35 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: output_labels = [] for audio_stream in self.streams["audio"].values(): - if self.ffmpeg_normalize.normalization_type == "ebu": - normalization_filter = audio_stream.get_second_pass_opts_ebu() + skip_normalization = False + if self.ffmpeg_normalize.lower_only: + if self.ffmpeg_normalize.normalization_type == "ebu": + if ( + audio_stream.loudness_statistics["ebu_pass1"] is not None and + audio_stream.loudness_statistics["ebu_pass1"]["input_i"] < self.ffmpeg_normalize.target_level + ): + skip_normalization = True + elif self.ffmpeg_normalize.normalization_type == "peak": + if ( + audio_stream.loudness_statistics["max"] is not None and + audio_stream.loudness_statistics["max"] < self.ffmpeg_normalize.target_level + ): + skip_normalization = True + elif self.ffmpeg_normalize.normalization_type == "rms": + if ( + audio_stream.loudness_statistics["mean"] is not None and + audio_stream.loudness_statistics["mean"] < self.ffmpeg_normalize.target_level + ): + skip_normalization = True + + if skip_normalization: + _logger.info(f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization.") + normalization_filter = "acopy" else: - normalization_filter = audio_stream.get_second_pass_opts_peakrms() + if self.ffmpeg_normalize.normalization_type == "ebu": + normalization_filter = audio_stream.get_second_pass_opts_ebu() + else: + normalization_filter = audio_stream.get_second_pass_opts_peakrms() input_label = f"[0:{audio_stream.stream_id}]" output_label = f"[norm{audio_stream.stream_id}]" @@ -421,16 +441,10 @@ def _second_pass(self) -> Iterator[float]: # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the # overall output (which includes multiple loudnorm stats) if self.ffmpeg_normalize.normalization_type == "ebu": - all_stats = AudioStream.prune_and_parse_loudnorm_output( - output, num_stats=len(self.streams["audio"]) - ) - for idx, audio_stream in enumerate(self.streams["audio"].values()): - audio_stream.set_second_pass_stats(all_stats[idx]) - - # collect all stats for the final report, again (overwrite the input) - self.ffmpeg_normalize.stats = [ - audio_stream.get_stats() for audio_stream in self.streams["audio"].values() - ] + all_stats = AudioStream.prune_and_parse_loudnorm_output(output) + for stream_id, audio_stream in self.streams["audio"].items(): + if stream_id in all_stats: + audio_stream.set_second_pass_stats(all_stats[stream_id]) # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic" if self.ffmpeg_normalize.dynamic is False: @@ -446,3 +460,6 @@ def _second_pass(self) -> Iterator[float]: ) _logger.debug("Normalization finished") + + def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]: + return (audio_stream.get_stats() for audio_stream in self.streams["audio"].values()) \ No newline at end of file diff --git a/ffmpeg_normalize/_streams.py b/ffmpeg_normalize/_streams.py index 5a0c490..f7cc2f2 100644 --- a/ffmpeg_normalize/_streams.py +++ b/ffmpeg_normalize/_streams.py @@ -15,6 +15,7 @@ _logger = logging.getLogger(__name__) +_loudnorm_pattern = re.compile(r"\[Parsed_loudnorm_(\d+)") class EbuLoudnessStatistics(TypedDict): input_i: float @@ -320,58 +321,36 @@ def parse_loudnorm_stats(self) -> Iterator[float]: f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" ) - self.loudness_statistics["ebu_pass1"] = ( - AudioStream.prune_and_parse_loudnorm_output( - output, num_stats=1 - )[0] # only one stream - ) + # only one stream + self.loudness_statistics["ebu_pass1"] = next(iter(AudioStream.prune_and_parse_loudnorm_output(output).values())) @staticmethod def prune_and_parse_loudnorm_output( - output: str, num_stats: int = 1 - ) -> List[EbuLoudnessStatistics]: + output: str + ) -> dict[int, EbuLoudnessStatistics]: """ Prune ffmpeg progress lines from output and parse the loudnorm filter output. There may be multiple outputs if multiple streams were processed. Args: output (str): The output from ffmpeg. - num_stats (int): The number of loudnorm statistics to parse. Returns: list: The EBU loudness statistics. """ pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) output_lines = [line.strip() for line in pruned_output.split("\n")] - - ret = [] - idx = 0 - while True: - _logger.debug(f"Parsing loudnorm stats for stream {idx}") - loudnorm_stats = AudioStream._parse_loudnorm_output( - output_lines, stream_index=idx - ) - idx += 1 - - if loudnorm_stats is None: - continue - ret.append(loudnorm_stats) - - if len(ret) >= num_stats: - break - - return ret + return AudioStream._parse_loudnorm_output(output_lines) @staticmethod def _parse_loudnorm_output( - output_lines: list[str], stream_index: Optional[int] = None - ) -> Optional[EbuLoudnessStatistics]: + output_lines: list[str] + ) -> dict[int, EbuLoudnessStatistics]: """ Parse the output of a loudnorm filter to get the EBU loudness statistics. Args: output_lines (list[str]): The output lines of the loudnorm filter. - stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used. Raises: FFmpegNormalizeError: When the output could not be parsed. @@ -379,64 +358,58 @@ def _parse_loudnorm_output( Returns: EbuLoudnessStatistics: The EBU loudness statistics, if found. """ + result = dict[int, EbuLoudnessStatistics]() + stream_index = -1 loudnorm_start = 0 - loudnorm_end = 0 for index, line in enumerate(output_lines): - if line.startswith(f"[Parsed_loudnorm_{stream_index}"): - loudnorm_start = index + 1 - continue - if loudnorm_start and line.startswith("}"): - loudnorm_end = index + 1 - break - - if not (loudnorm_start and loudnorm_end): - if stream_index is not None: - # not an error - return None - - raise FFmpegNormalizeError( - "Could not parse loudnorm stats; no loudnorm-related output found" - ) - - try: - loudnorm_stats = json.loads( - "\n".join(output_lines[loudnorm_start:loudnorm_end]) - ) - - _logger.debug( - f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}" - ) - - for key in [ - "input_i", - "input_tp", - "input_lra", - "input_thresh", - "output_i", - "output_tp", - "output_lra", - "output_thresh", - "target_offset", - "normalization_type", - ]: - if key not in loudnorm_stats: - continue - if key == "normalization_type": - loudnorm_stats[key] = loudnorm_stats[key].lower() - # handle infinite values - elif float(loudnorm_stats[key]) == -float("inf"): - loudnorm_stats[key] = -99 - elif float(loudnorm_stats[key]) == float("inf"): - loudnorm_stats[key] = 0 - else: - # convert to floats - loudnorm_stats[key] = float(loudnorm_stats[key]) - - return cast(EbuLoudnessStatistics, loudnorm_stats) - except Exception as e: - raise FFmpegNormalizeError( - f"Could not parse loudnorm stats; wrong JSON format in string: {e}" - ) + if stream_index < 0: + if m := _loudnorm_pattern.match(line): + loudnorm_start = index + 1 + stream_index = int(m.group(1)) + else: + if line.startswith("}"): + loudnorm_end = index + 1 + loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end]) + + try: + loudnorm_stats = json.loads(loudnorm_data) + + _logger.debug( + f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}" + ) + + for key in [ + "input_i", + "input_tp", + "input_lra", + "input_thresh", + "output_i", + "output_tp", + "output_lra", + "output_thresh", + "target_offset", + "normalization_type", + ]: + if key not in loudnorm_stats: + continue + if key == "normalization_type": + loudnorm_stats[key] = loudnorm_stats[key].lower() + # handle infinite values + elif float(loudnorm_stats[key]) == -float("inf"): + loudnorm_stats[key] = -99 + elif float(loudnorm_stats[key]) == float("inf"): + loudnorm_stats[key] = 0 + else: + # convert to floats + loudnorm_stats[key] = float(loudnorm_stats[key]) + + result[stream_index] = cast(EbuLoudnessStatistics, loudnorm_stats) + stream_index = -1 + except Exception as e: + raise FFmpegNormalizeError( + f"Could not parse loudnorm stats; wrong JSON format in string: {e}" + ) + return result def get_second_pass_opts_ebu(self) -> str: """ @@ -515,19 +488,19 @@ def get_second_pass_opts_ebu(self) -> str: "lra": self.media_file.ffmpeg_normalize.loudness_range_target, "tp": self.media_file.ffmpeg_normalize.true_peak, "offset": self._constrain( - float(stats["target_offset"]), -99, 99, name="target_offset" + stats["target_offset"], -99, 99, name="target_offset" ), "measured_i": self._constrain( - float(stats["input_i"]), -99, 0, name="input_i" + stats["input_i"], -99, 0, name="input_i" ), "measured_lra": self._constrain( - float(stats["input_lra"]), 0, 99, name="input_lra" + stats["input_lra"], 0, 99, name="input_lra" ), "measured_tp": self._constrain( - float(stats["input_tp"]), -99, 99, name="input_tp" + stats["input_tp"], -99, 99, name="input_tp" ), "measured_thresh": self._constrain( - float(stats["input_thresh"]), -99, 0, name="input_thresh" + stats["input_thresh"], -99, 0, name="input_thresh" ), "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true", "print_format": "json",