From 9ed31edb28550793226ffc2f5cac875729946f93 Mon Sep 17 00:00:00 2001
From: Werner Robitza <werner.robitza@gmail.com>
Date: Mon, 13 May 2024 12:06:06 +0200
Subject: [PATCH] warn if dynamic mode is used but linear specified

See: https://github.com/slhck/ffmpeg-normalize/issues/254
---
 README.md                       |   3 +-
 ffmpeg_normalize/__main__.py    |   6 +-
 ffmpeg_normalize/_media_file.py |  46 ++++++++++---
 ffmpeg_normalize/_streams.py    | 113 +++++++++++++++++++++++++-------
 test/test.py                    |  29 +++++---
 5 files changed, 151 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 929df8e..dc9761d 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ Read on for more info.
 - [Requirements](#requirements)
   - [ffmpeg](#ffmpeg)
 - [Installation](#installation)
+- [Docker Build](#docker-build)
 - [Usage](#usage)
 - [Description](#description)
 - [Examples](#examples)
@@ -237,7 +238,7 @@ Some containers (like MP4) also cannot handle PCM audio. If you want to use such
 
     Otherwise, the range is -99 to 0.
 
-- `-p, --print-stats`: Print first pass loudness statistics formatted as JSON to stdout.
+- `-p, --print-stats`: Print loudness statistics for both passes formatted as JSON to stdout.
 
 ### EBU R128 Normalization
 
diff --git a/ffmpeg_normalize/__main__.py b/ffmpeg_normalize/__main__.py
index 1b42ea7..f45e6b4 100644
--- a/ffmpeg_normalize/__main__.py
+++ b/ffmpeg_normalize/__main__.py
@@ -24,9 +24,7 @@ def create_parser() -> argparse.ArgumentParser:
         description=textwrap.dedent(
             """\
             ffmpeg-normalize v{} -- command line tool for normalizing audio files
-            """.format(
-                __version__
-            )
+            """.format(__version__)
         ),
         # usage="%(prog)s INPUT [INPUT ...] [-o OUTPUT [OUTPUT ...]] [options]",
         formatter_class=argparse.RawTextHelpFormatter,
@@ -157,7 +155,7 @@ def create_parser() -> argparse.ArgumentParser:
         "-p",
         "--print-stats",
         action="store_true",
-        help="Print first pass loudness statistics formatted as JSON to stdout",
+        help="Print loudness statistics for both passes formatted as JSON to stdout.",
     )
 
     # group_normalization.add_argument(
diff --git a/ffmpeg_normalize/_media_file.py b/ffmpeg_normalize/_media_file.py
index 444c561..7de50a3 100644
--- a/ffmpeg_normalize/_media_file.py
+++ b/ffmpeg_normalize/_media_file.py
@@ -232,12 +232,10 @@ def _first_pass(self) -> None:
                 for _ in fun():
                     pass
 
-        if self.ffmpeg_normalize.print_stats:
-            stats = [
-                audio_stream.get_stats()
-                for audio_stream in self.streams["audio"].values()
-            ]
-            self.ffmpeg_normalize.stats.extend(stats)
+        # set initial stats (for dry-runs, this is the only thing we need to do)
+        self.ffmpeg_normalize.stats = [
+            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
+        ]
 
     def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
         """
@@ -390,12 +388,14 @@ def _second_pass(self) -> Iterator[float]:
         temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
         cmd.append(temp_file)
 
+        cmd_runner = CommandRunner()
         try:
             try:
-                yield from CommandRunner().run_ffmpeg_command(cmd)
+                yield from cmd_runner.run_ffmpeg_command(cmd)
             except Exception as e:
-                cmd_str = " ".join([shlex.quote(c) for c in cmd])
-                _logger.error(f"Error while running command {cmd_str}! Error: {e}")
+                _logger.error(
+                    f"Error while running command {shlex.join(cmd)}! Error: {e}"
+                )
                 raise e
             else:
                 _logger.debug(
@@ -407,4 +407,32 @@ def _second_pass(self) -> Iterator[float]:
             rmtree(temp_dir, ignore_errors=True)
             raise e
 
+        output = cmd_runner.get_output()
+        # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
+        # overall output (which includes multiple loudnorm stats)
+        if self.ffmpeg_normalize.normalization_type == "ebu":
+            all_stats = AudioStream.prune_and_parse_loudnorm_output(
+                output, num_stats=len(self.streams["audio"])
+            )
+            for idx, audio_stream in self.streams["audio"].items():
+                audio_stream.set_second_pass_stats(all_stats[idx])
+
+        # collect all stats for the final report, again (overwrite the input)
+        self.ffmpeg_normalize.stats = [
+            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
+        ]
+
+        # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
+        if self.ffmpeg_normalize.dynamic is False:
+            for audio_stream in self.streams["audio"].values():
+                pass2_stats = audio_stream.get_stats()["ebu_pass2"]
+                if pass2_stats is None:
+                    continue
+                if pass2_stats["normalization_type"] == "dynamic":
+                    _logger.warning(
+                        "You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. "
+                        "This may lead to unexpected results."
+                        "Consider your input settings, e.g. choose a lower target level or higher target loudness range."
+                    )
+
         _logger.debug("Normalization finished")
diff --git a/ffmpeg_normalize/_streams.py b/ffmpeg_normalize/_streams.py
index 5f33217..5a0c490 100644
--- a/ffmpeg_normalize/_streams.py
+++ b/ffmpeg_normalize/_streams.py
@@ -4,7 +4,7 @@
 import logging
 import os
 import re
-from typing import TYPE_CHECKING, Iterator, Literal, TypedDict, cast
+from typing import TYPE_CHECKING, Iterator, List, Literal, Optional, TypedDict, cast
 
 from ._cmd_utils import NUL, CommandRunner, dict_to_filter_opts
 from ._errors import FFmpegNormalizeError
@@ -26,10 +26,12 @@ class EbuLoudnessStatistics(TypedDict):
     output_lra: float
     output_thresh: float
     target_offset: float
+    normalization_type: str
 
 
 class LoudnessStatistics(TypedDict):
-    ebu: EbuLoudnessStatistics | None
+    ebu_pass1: EbuLoudnessStatistics | None
+    ebu_pass2: EbuLoudnessStatistics | None
     mean: float | None
     max: float | None
 
@@ -107,7 +109,8 @@ def __init__(
         super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)
 
         self.loudness_statistics: LoudnessStatistics = {
-            "ebu": None,
+            "ebu_pass1": None,
+            "ebu_pass2": None,
             "mean": None,
             "max": None,
         }
@@ -156,12 +159,22 @@ def get_stats(self) -> LoudnessStatisticsWithMetadata:
             "input_file": self.media_file.input_file,
             "output_file": self.media_file.output_file,
             "stream_id": self.stream_id,
-            "ebu": self.loudness_statistics["ebu"],
+            "ebu_pass1": self.loudness_statistics["ebu_pass1"],
+            "ebu_pass2": self.loudness_statistics["ebu_pass2"],
             "mean": self.loudness_statistics["mean"],
             "max": self.loudness_statistics["max"],
         }
         return stats
 
+    def set_second_pass_stats(self, stats: EbuLoudnessStatistics):
+        """
+        Set the EBU loudness statistics for the second pass.
+
+        Args:
+            stats (dict): The EBU loudness statistics.
+        """
+        self.loudness_statistics["ebu_pass2"] = stats
+
     def get_pcm_codec(self) -> str:
         """
         Get the PCM codec string for the stream.
@@ -288,6 +301,8 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
             "-y",
             "-i",
             self.media_file.input_file,
+            "-map",
+            f"0:{self.stream_id}",
             "-filter_complex",
             filter_str,
             "-vn",
@@ -305,30 +320,69 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
             f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
         )
 
-        output_lines = [line.strip() for line in output.split("\n")]
-
-        self.loudness_statistics["ebu"] = AudioStream._parse_loudnorm_output(
-            output_lines
+        self.loudness_statistics["ebu_pass1"] = (
+            AudioStream.prune_and_parse_loudnorm_output(
+                output, num_stats=1
+            )[0]  # only one stream
         )
 
     @staticmethod
-    def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
+    def prune_and_parse_loudnorm_output(
+        output: str, num_stats: int = 1
+    ) -> List[EbuLoudnessStatistics]:
+        """
+        Prune ffmpeg progress lines from output and parse the loudnorm filter output.
+        There may be multiple outputs if multiple streams were processed.
+
+        Args:
+            output (str): The output from ffmpeg.
+            num_stats (int): The number of loudnorm statistics to parse.
+
+        Returns:
+            list: The EBU loudness statistics.
+        """
+        pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
+        output_lines = [line.strip() for line in pruned_output.split("\n")]
+
+        ret = []
+        idx = 0
+        while True:
+            _logger.debug(f"Parsing loudnorm stats for stream {idx}")
+            loudnorm_stats = AudioStream._parse_loudnorm_output(
+                output_lines, stream_index=idx
+            )
+            idx += 1
+
+            if loudnorm_stats is None:
+                continue
+            ret.append(loudnorm_stats)
+
+            if len(ret) >= num_stats:
+                break
+
+        return ret
+
+    @staticmethod
+    def _parse_loudnorm_output(
+        output_lines: list[str], stream_index: Optional[int] = None
+    ) -> Optional[EbuLoudnessStatistics]:
         """
         Parse the output of a loudnorm filter to get the EBU loudness statistics.
 
         Args:
             output_lines (list[str]): The output lines of the loudnorm filter.
+            stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used.
 
         Raises:
             FFmpegNormalizeError: When the output could not be parsed.
 
         Returns:
-            EbuLoudnessStatistics: The EBU loudness statistics.
+            EbuLoudnessStatistics: The EBU loudness statistics, if found.
         """
         loudnorm_start = 0
         loudnorm_end = 0
         for index, line in enumerate(output_lines):
-            if line.startswith("[Parsed_loudnorm"):
+            if line.startswith(f"[Parsed_loudnorm_{stream_index}"):
                 loudnorm_start = index + 1
                 continue
             if loudnorm_start and line.startswith("}"):
@@ -336,6 +390,10 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
                 break
 
         if not (loudnorm_start and loudnorm_end):
+            if stream_index is not None:
+                # not an error
+                return None
+
             raise FFmpegNormalizeError(
                 "Could not parse loudnorm stats; no loudnorm-related output found"
             )
@@ -345,7 +403,9 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
                 "\n".join(output_lines[loudnorm_start:loudnorm_end])
             )
 
-            _logger.debug(f"Loudnorm stats parsed: {json.dumps(loudnorm_stats)}")
+            _logger.debug(
+                f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}"
+            )
 
             for key in [
                 "input_i",
@@ -357,9 +417,14 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
                 "output_lra",
                 "output_thresh",
                 "target_offset",
+                "normalization_type",
             ]:
+                if key not in loudnorm_stats:
+                    continue
+                if key == "normalization_type":
+                    loudnorm_stats[key] = loudnorm_stats[key].lower()
                 # handle infinite values
-                if float(loudnorm_stats[key]) == -float("inf"):
+                elif float(loudnorm_stats[key]) == -float("inf"):
                     loudnorm_stats[key] = -99
                 elif float(loudnorm_stats[key]) == float("inf"):
                     loudnorm_stats[key] = 0
@@ -378,17 +443,17 @@ def get_second_pass_opts_ebu(self) -> str:
         Return second pass loudnorm filter options string for ffmpeg
         """
 
-        if not self.loudness_statistics["ebu"]:
+        if not self.loudness_statistics["ebu_pass1"]:
             raise FFmpegNormalizeError(
                 "First pass not run, you must call parse_loudnorm_stats first"
             )
 
-        if float(self.loudness_statistics["ebu"]["input_i"]) > 0:
+        if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
             _logger.warning(
                 "Input file had measured input loudness greater than zero "
-                f"({self.loudness_statistics['ebu']['input_i']}), capping at 0"
+                f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
             )
-            self.loudness_statistics["ebu"]["input_i"] = 0
+            self.loudness_statistics["ebu_pass1"]["input_i"] = 0
 
         will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic
 
@@ -396,7 +461,7 @@ def get_second_pass_opts_ebu(self) -> str:
             _logger.debug(
                 "Keeping target loudness range in second pass loudnorm filter"
             )
-            input_lra = self.loudness_statistics["ebu"]["input_lra"]
+            input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
             if input_lra < 1 or input_lra > 50:
                 _logger.warning(
                     "Input file had measured loudness range outside of [1,50] "
@@ -404,12 +469,12 @@ def get_second_pass_opts_ebu(self) -> str:
                 )
 
             self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
-                self.loudness_statistics["ebu"]["input_lra"], 1, 50
+                self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
             )
 
         if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
             if (
-                self.loudness_statistics["ebu"]["input_lra"]
+                self.loudness_statistics["ebu_pass1"]["input_lra"]
                 <= self.media_file.ffmpeg_normalize.loudness_range_target
             ):
                 _logger.debug(
@@ -417,7 +482,7 @@ def get_second_pass_opts_ebu(self) -> str:
                 )
             else:
                 self.media_file.ffmpeg_normalize.loudness_range_target = (
-                    self.loudness_statistics["ebu"]["input_lra"]
+                    self.loudness_statistics["ebu_pass1"]["input_lra"]
                 )
                 _logger.debug(
                     "Keeping target loudness range in second pass loudnorm filter"
@@ -425,11 +490,11 @@ def get_second_pass_opts_ebu(self) -> str:
 
         if (
             self.media_file.ffmpeg_normalize.loudness_range_target
-            < self.loudness_statistics["ebu"]["input_lra"]
+            < self.loudness_statistics["ebu_pass1"]["input_lra"]
             and not will_use_dynamic_mode
         ):
             _logger.warning(
-                f"Input file had loudness range of {self.loudness_statistics['ebu']['input_lra']}. "
+                f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
                 f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
                 "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
                 "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
@@ -443,7 +508,7 @@ def get_second_pass_opts_ebu(self) -> str:
                 "Specify -ar/--sample-rate to override it."
             )
 
-        stats = self.loudness_statistics["ebu"]
+        stats = self.loudness_statistics["ebu_pass1"]
 
         opts = {
             "i": self.media_file.ffmpeg_normalize.target_level,
diff --git a/test/test.py b/test/test.py
index e35f50c..11dec8e 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,5 +1,6 @@
 import json
 import os
+import shlex
 import shutil
 import subprocess
 import sys
@@ -14,6 +15,7 @@ def ffmpeg_normalize_call(args: List[str]) -> Tuple[str, str]:
     cmd = [sys.executable, "-m", "ffmpeg_normalize"]
     cmd.extend(args)
 
+    print(shlex.join(cmd))
     try:
         p = subprocess.Popen(
             cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
@@ -28,6 +30,9 @@ def ffmpeg_normalize_call(args: List[str]) -> Tuple[str, str]:
 def _get_stats(
     input_file: str, normalization_type: Literal["ebu", "rms", "peak"] = "ebu"
 ) -> Dict:
+    """
+    Get the statistics from an existing output file without converting it.
+    """
     stdout, _ = ffmpeg_normalize_call(
         [input_file, "-f", "-n", "--print-stats", "-nt", normalization_type]
     )
@@ -192,7 +197,8 @@ def test_peak(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 1,
-                    "ebu": None,
+                    "ebu_pass1": None,
+                    "ebu_pass2": None,
                     "mean": -14.8,
                     "max": -0.0,
                 },
@@ -200,7 +206,8 @@ def test_peak(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 2,
-                    "ebu": None,
+                    "ebu_pass1": None,
+                    "ebu_pass2": None,
                     "mean": -19.3,
                     "max": -0.0,
                 },
@@ -217,7 +224,8 @@ def test_rms(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 1,
-                    "ebu": None,
+                    "ebu_pass1": None,
+                    "ebu_pass2": None,
                     "mean": -15.0,
                     "max": -0.2,
                 },
@@ -225,7 +233,8 @@ def test_rms(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 2,
-                    "ebu": None,
+                    "ebu_pass1": None,
+                    "ebu_pass2": None,
                     "mean": -15.1,
                     "max": 0.0,
                 },
@@ -242,7 +251,7 @@ def test_ebu(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 1,
-                    "ebu": {
+                    "ebu_pass1": {
                         "input_i": -23.00,
                         "input_tp": -10.32,
                         "input_lra": 2.40,
@@ -254,6 +263,7 @@ def test_ebu(self):
                         "normalization_type": "dynamic",
                         "target_offset": -0.97,
                     },
+                    "ebu_pass2": None,
                     "mean": None,
                     "max": None,
                 },
@@ -261,7 +271,7 @@ def test_ebu(self):
                     "input_file": "normalized/test.mkv",
                     "output_file": "normalized/test.mkv",
                     "stream_id": 2,
-                    "ebu": {
+                    "ebu_pass1": {
                         "input_i": -22.98,
                         "input_tp": -10.72,
                         "input_lra": 2.10,
@@ -273,6 +283,7 @@ def test_ebu(self):
                         "normalization_type": "dynamic",
                         "target_offset": -0.84,
                     },
+                    "ebu_pass2": None,
                     "mean": None,
                     "max": None,
                 },
@@ -388,7 +399,7 @@ def test_pre_filters(self):
                     "input_file": "normalized/test2.wav",
                     "output_file": "normalized/test2.mkv",
                     "stream_id": 0,
-                    "ebu": {
+                    "ebu_pass1": {
                         "input_i": -23.01,
                         "input_tp": -10.75,
                         "input_lra": 2.20,
@@ -400,6 +411,7 @@ def test_pre_filters(self):
                         "normalization_type": "dynamic",
                         "target_offset": -0.84,
                     },
+                    "ebu_pass2": None,
                     "mean": None,
                     "max": None,
                 }
@@ -424,7 +436,7 @@ def test_post_filters(self):
                     "input_file": "normalized/test2.wav",
                     "output_file": "normalized/test2.mkv",
                     "stream_id": 0,
-                    "ebu": {
+                    "ebu_pass1": {
                         "input_i": -35.02,
                         "input_tp": -22.76,
                         "input_lra": 2.20,
@@ -436,6 +448,7 @@ def test_post_filters(self):
                         "normalization_type": "dynamic",
                         "target_offset": -0.84,
                     },
+                    "ebu_pass2": None,
                     "mean": None,
                     "max": None,
                 }