From 9ed31edb28550793226ffc2f5cac875729946f93 Mon Sep 17 00:00:00 2001 From: Werner Robitza Date: Mon, 13 May 2024 12:06:06 +0200 Subject: [PATCH] warn if dynamic mode is used but linear specified See: https://github.com/slhck/ffmpeg-normalize/issues/254 --- README.md | 3 +- ffmpeg_normalize/__main__.py | 6 +- ffmpeg_normalize/_media_file.py | 46 ++++++++++--- ffmpeg_normalize/_streams.py | 113 +++++++++++++++++++++++++------- test/test.py | 29 +++++--- 5 files changed, 151 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 929df8e..dc9761d 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Read on for more info. - [Requirements](#requirements) - [ffmpeg](#ffmpeg) - [Installation](#installation) +- [Docker Build](#docker-build) - [Usage](#usage) - [Description](#description) - [Examples](#examples) @@ -237,7 +238,7 @@ Some containers (like MP4) also cannot handle PCM audio. If you want to use such Otherwise, the range is -99 to 0. -- `-p, --print-stats`: Print first pass loudness statistics formatted as JSON to stdout. +- `-p, --print-stats`: Print loudness statistics for both passes formatted as JSON to stdout. ### EBU R128 Normalization diff --git a/ffmpeg_normalize/__main__.py b/ffmpeg_normalize/__main__.py index 1b42ea7..f45e6b4 100644 --- a/ffmpeg_normalize/__main__.py +++ b/ffmpeg_normalize/__main__.py @@ -24,9 +24,7 @@ def create_parser() -> argparse.ArgumentParser: description=textwrap.dedent( """\ ffmpeg-normalize v{} -- command line tool for normalizing audio files - """.format( - __version__ - ) + """.format(__version__) ), # usage="%(prog)s INPUT [INPUT ...] [-o OUTPUT [OUTPUT ...]] [options]", formatter_class=argparse.RawTextHelpFormatter, @@ -157,7 +155,7 @@ def create_parser() -> argparse.ArgumentParser: "-p", "--print-stats", action="store_true", - help="Print first pass loudness statistics formatted as JSON to stdout", + help="Print loudness statistics for both passes formatted as JSON to stdout.", ) # group_normalization.add_argument( diff --git a/ffmpeg_normalize/_media_file.py b/ffmpeg_normalize/_media_file.py index 444c561..7de50a3 100644 --- a/ffmpeg_normalize/_media_file.py +++ b/ffmpeg_normalize/_media_file.py @@ -232,12 +232,10 @@ def _first_pass(self) -> None: for _ in fun(): pass - if self.ffmpeg_normalize.print_stats: - stats = [ - audio_stream.get_stats() - for audio_stream in self.streams["audio"].values() - ] - self.ffmpeg_normalize.stats.extend(stats) + # set initial stats (for dry-runs, this is the only thing we need to do) + self.ffmpeg_normalize.stats = [ + audio_stream.get_stats() for audio_stream in self.streams["audio"].values() + ] def _get_audio_filter_cmd(self) -> tuple[str, list[str]]: """ @@ -390,12 +388,14 @@ def _second_pass(self) -> Iterator[float]: temp_file = os.path.join(temp_dir, f"out.{self.output_ext}") cmd.append(temp_file) + cmd_runner = CommandRunner() try: try: - yield from CommandRunner().run_ffmpeg_command(cmd) + yield from cmd_runner.run_ffmpeg_command(cmd) except Exception as e: - cmd_str = " ".join([shlex.quote(c) for c in cmd]) - _logger.error(f"Error while running command {cmd_str}! Error: {e}") + _logger.error( + f"Error while running command {shlex.join(cmd)}! Error: {e}" + ) raise e else: _logger.debug( @@ -407,4 +407,32 @@ def _second_pass(self) -> Iterator[float]: rmtree(temp_dir, ignore_errors=True) raise e + output = cmd_runner.get_output() + # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the + # overall output (which includes multiple loudnorm stats) + if self.ffmpeg_normalize.normalization_type == "ebu": + all_stats = AudioStream.prune_and_parse_loudnorm_output( + output, num_stats=len(self.streams["audio"]) + ) + for idx, audio_stream in self.streams["audio"].items(): + audio_stream.set_second_pass_stats(all_stats[idx]) + + # collect all stats for the final report, again (overwrite the input) + self.ffmpeg_normalize.stats = [ + audio_stream.get_stats() for audio_stream in self.streams["audio"].values() + ] + + # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic" + if self.ffmpeg_normalize.dynamic is False: + for audio_stream in self.streams["audio"].values(): + pass2_stats = audio_stream.get_stats()["ebu_pass2"] + if pass2_stats is None: + continue + if pass2_stats["normalization_type"] == "dynamic": + _logger.warning( + "You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. " + "This may lead to unexpected results." + "Consider your input settings, e.g. choose a lower target level or higher target loudness range." + ) + _logger.debug("Normalization finished") diff --git a/ffmpeg_normalize/_streams.py b/ffmpeg_normalize/_streams.py index 5f33217..5a0c490 100644 --- a/ffmpeg_normalize/_streams.py +++ b/ffmpeg_normalize/_streams.py @@ -4,7 +4,7 @@ import logging import os import re -from typing import TYPE_CHECKING, Iterator, Literal, TypedDict, cast +from typing import TYPE_CHECKING, Iterator, List, Literal, Optional, TypedDict, cast from ._cmd_utils import NUL, CommandRunner, dict_to_filter_opts from ._errors import FFmpegNormalizeError @@ -26,10 +26,12 @@ class EbuLoudnessStatistics(TypedDict): output_lra: float output_thresh: float target_offset: float + normalization_type: str class LoudnessStatistics(TypedDict): - ebu: EbuLoudnessStatistics | None + ebu_pass1: EbuLoudnessStatistics | None + ebu_pass2: EbuLoudnessStatistics | None mean: float | None max: float | None @@ -107,7 +109,8 @@ def __init__( super().__init__(ffmpeg_normalize, media_file, "audio", stream_id) self.loudness_statistics: LoudnessStatistics = { - "ebu": None, + "ebu_pass1": None, + "ebu_pass2": None, "mean": None, "max": None, } @@ -156,12 +159,22 @@ def get_stats(self) -> LoudnessStatisticsWithMetadata: "input_file": self.media_file.input_file, "output_file": self.media_file.output_file, "stream_id": self.stream_id, - "ebu": self.loudness_statistics["ebu"], + "ebu_pass1": self.loudness_statistics["ebu_pass1"], + "ebu_pass2": self.loudness_statistics["ebu_pass2"], "mean": self.loudness_statistics["mean"], "max": self.loudness_statistics["max"], } return stats + def set_second_pass_stats(self, stats: EbuLoudnessStatistics): + """ + Set the EBU loudness statistics for the second pass. + + Args: + stats (dict): The EBU loudness statistics. + """ + self.loudness_statistics["ebu_pass2"] = stats + def get_pcm_codec(self) -> str: """ Get the PCM codec string for the stream. @@ -288,6 +301,8 @@ def parse_loudnorm_stats(self) -> Iterator[float]: "-y", "-i", self.media_file.input_file, + "-map", + f"0:{self.stream_id}", "-filter_complex", filter_str, "-vn", @@ -305,30 +320,69 @@ def parse_loudnorm_stats(self) -> Iterator[float]: f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}" ) - output_lines = [line.strip() for line in output.split("\n")] - - self.loudness_statistics["ebu"] = AudioStream._parse_loudnorm_output( - output_lines + self.loudness_statistics["ebu_pass1"] = ( + AudioStream.prune_and_parse_loudnorm_output( + output, num_stats=1 + )[0] # only one stream ) @staticmethod - def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics: + def prune_and_parse_loudnorm_output( + output: str, num_stats: int = 1 + ) -> List[EbuLoudnessStatistics]: + """ + Prune ffmpeg progress lines from output and parse the loudnorm filter output. + There may be multiple outputs if multiple streams were processed. + + Args: + output (str): The output from ffmpeg. + num_stats (int): The number of loudnorm statistics to parse. + + Returns: + list: The EBU loudness statistics. + """ + pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output) + output_lines = [line.strip() for line in pruned_output.split("\n")] + + ret = [] + idx = 0 + while True: + _logger.debug(f"Parsing loudnorm stats for stream {idx}") + loudnorm_stats = AudioStream._parse_loudnorm_output( + output_lines, stream_index=idx + ) + idx += 1 + + if loudnorm_stats is None: + continue + ret.append(loudnorm_stats) + + if len(ret) >= num_stats: + break + + return ret + + @staticmethod + def _parse_loudnorm_output( + output_lines: list[str], stream_index: Optional[int] = None + ) -> Optional[EbuLoudnessStatistics]: """ Parse the output of a loudnorm filter to get the EBU loudness statistics. Args: output_lines (list[str]): The output lines of the loudnorm filter. + stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used. Raises: FFmpegNormalizeError: When the output could not be parsed. Returns: - EbuLoudnessStatistics: The EBU loudness statistics. + EbuLoudnessStatistics: The EBU loudness statistics, if found. """ loudnorm_start = 0 loudnorm_end = 0 for index, line in enumerate(output_lines): - if line.startswith("[Parsed_loudnorm"): + if line.startswith(f"[Parsed_loudnorm_{stream_index}"): loudnorm_start = index + 1 continue if loudnorm_start and line.startswith("}"): @@ -336,6 +390,10 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics: break if not (loudnorm_start and loudnorm_end): + if stream_index is not None: + # not an error + return None + raise FFmpegNormalizeError( "Could not parse loudnorm stats; no loudnorm-related output found" ) @@ -345,7 +403,9 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics: "\n".join(output_lines[loudnorm_start:loudnorm_end]) ) - _logger.debug(f"Loudnorm stats parsed: {json.dumps(loudnorm_stats)}") + _logger.debug( + f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}" + ) for key in [ "input_i", @@ -357,9 +417,14 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics: "output_lra", "output_thresh", "target_offset", + "normalization_type", ]: + if key not in loudnorm_stats: + continue + if key == "normalization_type": + loudnorm_stats[key] = loudnorm_stats[key].lower() # handle infinite values - if float(loudnorm_stats[key]) == -float("inf"): + elif float(loudnorm_stats[key]) == -float("inf"): loudnorm_stats[key] = -99 elif float(loudnorm_stats[key]) == float("inf"): loudnorm_stats[key] = 0 @@ -378,17 +443,17 @@ def get_second_pass_opts_ebu(self) -> str: Return second pass loudnorm filter options string for ffmpeg """ - if not self.loudness_statistics["ebu"]: + if not self.loudness_statistics["ebu_pass1"]: raise FFmpegNormalizeError( "First pass not run, you must call parse_loudnorm_stats first" ) - if float(self.loudness_statistics["ebu"]["input_i"]) > 0: + if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0: _logger.warning( "Input file had measured input loudness greater than zero " - f"({self.loudness_statistics['ebu']['input_i']}), capping at 0" + f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0" ) - self.loudness_statistics["ebu"]["input_i"] = 0 + self.loudness_statistics["ebu_pass1"]["input_i"] = 0 will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic @@ -396,7 +461,7 @@ def get_second_pass_opts_ebu(self) -> str: _logger.debug( "Keeping target loudness range in second pass loudnorm filter" ) - input_lra = self.loudness_statistics["ebu"]["input_lra"] + input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"] if input_lra < 1 or input_lra > 50: _logger.warning( "Input file had measured loudness range outside of [1,50] " @@ -404,12 +469,12 @@ def get_second_pass_opts_ebu(self) -> str: ) self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain( - self.loudness_statistics["ebu"]["input_lra"], 1, 50 + self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50 ) if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target: if ( - self.loudness_statistics["ebu"]["input_lra"] + self.loudness_statistics["ebu_pass1"]["input_lra"] <= self.media_file.ffmpeg_normalize.loudness_range_target ): _logger.debug( @@ -417,7 +482,7 @@ def get_second_pass_opts_ebu(self) -> str: ) else: self.media_file.ffmpeg_normalize.loudness_range_target = ( - self.loudness_statistics["ebu"]["input_lra"] + self.loudness_statistics["ebu_pass1"]["input_lra"] ) _logger.debug( "Keeping target loudness range in second pass loudnorm filter" @@ -425,11 +490,11 @@ def get_second_pass_opts_ebu(self) -> str: if ( self.media_file.ffmpeg_normalize.loudness_range_target - < self.loudness_statistics["ebu"]["input_lra"] + < self.loudness_statistics["ebu_pass1"]["input_lra"] and not will_use_dynamic_mode ): _logger.warning( - f"Input file had loudness range of {self.loudness_statistics['ebu']['input_lra']}. " + f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. " f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). " "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. " "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from " @@ -443,7 +508,7 @@ def get_second_pass_opts_ebu(self) -> str: "Specify -ar/--sample-rate to override it." ) - stats = self.loudness_statistics["ebu"] + stats = self.loudness_statistics["ebu_pass1"] opts = { "i": self.media_file.ffmpeg_normalize.target_level, diff --git a/test/test.py b/test/test.py index e35f50c..11dec8e 100644 --- a/test/test.py +++ b/test/test.py @@ -1,5 +1,6 @@ import json import os +import shlex import shutil import subprocess import sys @@ -14,6 +15,7 @@ def ffmpeg_normalize_call(args: List[str]) -> Tuple[str, str]: cmd = [sys.executable, "-m", "ffmpeg_normalize"] cmd.extend(args) + print(shlex.join(cmd)) try: p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True @@ -28,6 +30,9 @@ def ffmpeg_normalize_call(args: List[str]) -> Tuple[str, str]: def _get_stats( input_file: str, normalization_type: Literal["ebu", "rms", "peak"] = "ebu" ) -> Dict: + """ + Get the statistics from an existing output file without converting it. + """ stdout, _ = ffmpeg_normalize_call( [input_file, "-f", "-n", "--print-stats", "-nt", normalization_type] ) @@ -192,7 +197,8 @@ def test_peak(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 1, - "ebu": None, + "ebu_pass1": None, + "ebu_pass2": None, "mean": -14.8, "max": -0.0, }, @@ -200,7 +206,8 @@ def test_peak(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 2, - "ebu": None, + "ebu_pass1": None, + "ebu_pass2": None, "mean": -19.3, "max": -0.0, }, @@ -217,7 +224,8 @@ def test_rms(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 1, - "ebu": None, + "ebu_pass1": None, + "ebu_pass2": None, "mean": -15.0, "max": -0.2, }, @@ -225,7 +233,8 @@ def test_rms(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 2, - "ebu": None, + "ebu_pass1": None, + "ebu_pass2": None, "mean": -15.1, "max": 0.0, }, @@ -242,7 +251,7 @@ def test_ebu(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 1, - "ebu": { + "ebu_pass1": { "input_i": -23.00, "input_tp": -10.32, "input_lra": 2.40, @@ -254,6 +263,7 @@ def test_ebu(self): "normalization_type": "dynamic", "target_offset": -0.97, }, + "ebu_pass2": None, "mean": None, "max": None, }, @@ -261,7 +271,7 @@ def test_ebu(self): "input_file": "normalized/test.mkv", "output_file": "normalized/test.mkv", "stream_id": 2, - "ebu": { + "ebu_pass1": { "input_i": -22.98, "input_tp": -10.72, "input_lra": 2.10, @@ -273,6 +283,7 @@ def test_ebu(self): "normalization_type": "dynamic", "target_offset": -0.84, }, + "ebu_pass2": None, "mean": None, "max": None, }, @@ -388,7 +399,7 @@ def test_pre_filters(self): "input_file": "normalized/test2.wav", "output_file": "normalized/test2.mkv", "stream_id": 0, - "ebu": { + "ebu_pass1": { "input_i": -23.01, "input_tp": -10.75, "input_lra": 2.20, @@ -400,6 +411,7 @@ def test_pre_filters(self): "normalization_type": "dynamic", "target_offset": -0.84, }, + "ebu_pass2": None, "mean": None, "max": None, } @@ -424,7 +436,7 @@ def test_post_filters(self): "input_file": "normalized/test2.wav", "output_file": "normalized/test2.mkv", "stream_id": 0, - "ebu": { + "ebu_pass1": { "input_i": -35.02, "input_tp": -22.76, "input_lra": 2.20, @@ -436,6 +448,7 @@ def test_post_filters(self): "normalization_type": "dynamic", "target_offset": -0.84, }, + "ebu_pass2": None, "mean": None, "max": None, }