slhck · ahmetsait · Nov 18, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.editorconfig b/.editorconfig
@@ -3,8 +3,11 @@ root = true
 
 [*]
 indent_style = space
-indent_size = 2
+indent_size = 4
 end_of_line = lf
 charset = utf-8
 trim_trailing_whitespace = false
 insert_final_newline = false
+
+[*.{yml,yaml}]
+indent_size = 2
diff --git a/ffmpeg_normalize/__main__.py b/ffmpeg_normalize/__main__.py
@@ -235,6 +235,20 @@ def create_parser() -> argparse.ArgumentParser:
         default=0.0,
     )
 
+    group_ebu.add_argument(
+        "--lower-only",
+        action="store_true",
+        help=textwrap.dedent(
+            """\
+        Whether the audio should not increase in loudness.
+
+        If the measured loudness from the first pass is lower than the target
+        loudness then normalization pass will be skipped for the measured audio
+        source.
+        """
+        ),
+    )
+
     group_ebu.add_argument(
         "--dual-mono",
         action="store_true",
@@ -514,6 +528,7 @@ def _split_options(opts: str) -> list[str]:
         keep_lra_above_loudness_range_target=cli_args.keep_lra_above_loudness_range_target,
         true_peak=cli_args.true_peak,
         offset=cli_args.offset,
+        lower_only=cli_args.lower_only,
         dual_mono=cli_args.dual_mono,
         dynamic=cli_args.dynamic,
         audio_codec=cli_args.audio_codec,

diff --git a/ffmpeg_normalize/_ffmpeg_normalize.py b/ffmpeg_normalize/_ffmpeg_normalize.py
@@ -3,6 +3,8 @@
 import json
 import logging
 import os
+import sys
+from itertools import chain
 from typing import TYPE_CHECKING, Literal
 
 from tqdm import tqdm
@@ -58,6 +60,7 @@ class FFmpegNormalize:
         keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
         true_peak (float, optional): True peak. Defaults to -2.0.
         offset (float, optional): Offset. Defaults to 0.0.
+        lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
         dual_mono (bool, optional): Dual mono. Defaults to False.
         dynamic (bool, optional): Dynamic. Defaults to False.
         audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
@@ -94,6 +97,7 @@ def __init__(
         keep_lra_above_loudness_range_target: bool = False,
         true_peak: float = -2.0,
         offset: float = 0.0,
+        lower_only: bool = False,
         dual_mono: bool = False,
         dynamic: bool = False,
         audio_codec: str = "pcm_s16le",
@@ -164,6 +168,7 @@ def __init__(
 
         self.true_peak = check_range(true_peak, -9, 0, name="true_peak")
         self.offset = check_range(offset, -99, 99, name="offset")
+        self.lower_only = lower_only
 
         # Ensure library user is passing correct types
         assert isinstance(dual_mono, bool), "dual_mono must be bool"
@@ -254,5 +259,6 @@ def run_normalization(self) -> None:
 
             _logger.info(f"Normalized file written to {media_file.output_file}")
 
-        if self.print_stats and self.stats:
-            print(json.dumps(self.stats, indent=4))
+        if self.print_stats:
+            json.dump(list(chain.from_iterable(media_file.get_stats() for media_file in self.media_files)), sys.stdout, indent=4)
+            print()
diff --git a/ffmpeg_normalize/_media_file.py b/ffmpeg_normalize/_media_file.py
@@ -6,13 +6,13 @@
 import shlex
 from shutil import move, rmtree
 from tempfile import mkdtemp
-from typing import TYPE_CHECKING, Iterator, Literal, TypedDict
+from typing import TYPE_CHECKING, Iterable, Iterator, Literal, TypedDict
 
 from tqdm import tqdm
 
 from ._cmd_utils import DUR_REGEX, NUL, CommandRunner
 from ._errors import FFmpegNormalizeError
-from ._streams import AudioStream, SubtitleStream, VideoStream
+from ._streams import AudioStream, SubtitleStream, VideoStream, LoudnessStatisticsWithMetadata
 
 if TYPE_CHECKING:
     from ffmpeg_normalize import FFmpegNormalize
@@ -240,11 +240,6 @@ def _first_pass(self) -> None:
                 for _ in fun():
                     pass
 
-        # set initial stats (for dry-runs, this is the only thing we need to do)
-        self.ffmpeg_normalize.stats = [
-            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
-        ]
-
     def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
         """
         Return the audio filter command and output labels needed.
@@ -256,10 +251,35 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
         output_labels = []
 
         for audio_stream in self.streams["audio"].values():
-            if self.ffmpeg_normalize.normalization_type == "ebu":
-                normalization_filter = audio_stream.get_second_pass_opts_ebu()
+            skip_normalization = False
+            if self.ffmpeg_normalize.lower_only:
+                if self.ffmpeg_normalize.normalization_type == "ebu":
+                    if (
+                        audio_stream.loudness_statistics["ebu_pass1"] is not None and
+                        audio_stream.loudness_statistics["ebu_pass1"]["input_i"] < self.ffmpeg_normalize.target_level
+                    ):
+                        skip_normalization = True
+                elif self.ffmpeg_normalize.normalization_type == "peak":
+                    if (
+                        audio_stream.loudness_statistics["max"] is not None and
+                        audio_stream.loudness_statistics["max"] < self.ffmpeg_normalize.target_level
+                    ):
+                        skip_normalization = True
+                elif self.ffmpeg_normalize.normalization_type == "rms":
+                    if (
+                        audio_stream.loudness_statistics["mean"] is not None and
+                        audio_stream.loudness_statistics["mean"] < self.ffmpeg_normalize.target_level
+                    ):
+                        skip_normalization = True
+
+            if skip_normalization:
+                _logger.info(f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization.")
+                normalization_filter = "acopy"
             else:
-                normalization_filter = audio_stream.get_second_pass_opts_peakrms()
+                if self.ffmpeg_normalize.normalization_type == "ebu":
+                    normalization_filter = audio_stream.get_second_pass_opts_ebu()
+                else:
+                    normalization_filter = audio_stream.get_second_pass_opts_peakrms()
 
             input_label = f"[0:{audio_stream.stream_id}]"
             output_label = f"[norm{audio_stream.stream_id}]"
@@ -421,16 +441,10 @@ def _second_pass(self) -> Iterator[float]:
         # in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
         # overall output (which includes multiple loudnorm stats)
         if self.ffmpeg_normalize.normalization_type == "ebu":
-            all_stats = AudioStream.prune_and_parse_loudnorm_output(
-                output, num_stats=len(self.streams["audio"])
-            )
-            for idx, audio_stream in enumerate(self.streams["audio"].values()):
-                audio_stream.set_second_pass_stats(all_stats[idx])
-
-        # collect all stats for the final report, again (overwrite the input)
-        self.ffmpeg_normalize.stats = [
-            audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
-        ]
+            all_stats = AudioStream.prune_and_parse_loudnorm_output(output)
+            for stream_id, audio_stream in self.streams["audio"].items():
+                if stream_id in all_stats:
+                    audio_stream.set_second_pass_stats(all_stats[stream_id])
 
         # warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
         if self.ffmpeg_normalize.dynamic is False:
@@ -446,3 +460,6 @@ def _second_pass(self) -> Iterator[float]:
                     )
 
         _logger.debug("Normalization finished")
+
+    def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]:
+        return (audio_stream.get_stats() for audio_stream in self.streams["audio"].values())
diff --git a/ffmpeg_normalize/_streams.py b/ffmpeg_normalize/_streams.py
@@ -15,6 +15,7 @@
 
 _logger = logging.getLogger(__name__)
 
+_loudnorm_pattern = re.compile(r"\[Parsed_loudnorm_(\d+)")
 
 class EbuLoudnessStatistics(TypedDict):
     input_i: float
@@ -320,123 +321,95 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
             f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
         )
 
-        self.loudness_statistics["ebu_pass1"] = (
-            AudioStream.prune_and_parse_loudnorm_output(
-                output, num_stats=1
-            )[0]  # only one stream
-        )
+        # only one stream
+        self.loudness_statistics["ebu_pass1"] = next(iter(AudioStream.prune_and_parse_loudnorm_output(output).values()))
 
     @staticmethod
     def prune_and_parse_loudnorm_output(
-        output: str, num_stats: int = 1
-    ) -> List[EbuLoudnessStatistics]:
+        output: str
+    ) -> dict[int, EbuLoudnessStatistics]:
         """
         Prune ffmpeg progress lines from output and parse the loudnorm filter output.
         There may be multiple outputs if multiple streams were processed.
 
         Args:
             output (str): The output from ffmpeg.
-            num_stats (int): The number of loudnorm statistics to parse.
 
         Returns:
             list: The EBU loudness statistics.
         """
         pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
         output_lines = [line.strip() for line in pruned_output.split("\n")]
-
-        ret = []
-        idx = 0
-        while True:
-            _logger.debug(f"Parsing loudnorm stats for stream {idx}")
-            loudnorm_stats = AudioStream._parse_loudnorm_output(
-                output_lines, stream_index=idx
-            )
-            idx += 1
-
-            if loudnorm_stats is None:
-                continue
-            ret.append(loudnorm_stats)
-
-            if len(ret) >= num_stats:
-                break
-
-        return ret
+        return AudioStream._parse_loudnorm_output(output_lines)
 
     @staticmethod
     def _parse_loudnorm_output(
-        output_lines: list[str], stream_index: Optional[int] = None
-    ) -> Optional[EbuLoudnessStatistics]:
+        output_lines: list[str]
+    ) -> dict[int, EbuLoudnessStatistics]:
         """
         Parse the output of a loudnorm filter to get the EBU loudness statistics.
 
         Args:
             output_lines (list[str]): The output lines of the loudnorm filter.
-            stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used.
 
         Raises:
             FFmpegNormalizeError: When the output could not be parsed.
 
         Returns:
             EbuLoudnessStatistics: The EBU loudness statistics, if found.
         """
+        result = dict[int, EbuLoudnessStatistics]()
+        stream_index = -1
         loudnorm_start = 0
-        loudnorm_end = 0
         for index, line in enumerate(output_lines):
-            if line.startswith(f"[Parsed_loudnorm_{stream_index}"):
-                loudnorm_start = index + 1
-                continue
-            if loudnorm_start and line.startswith("}"):
-                loudnorm_end = index + 1
-                break
-
-        if not (loudnorm_start and loudnorm_end):
-            if stream_index is not None:
-                # not an error
-                return None
-
-            raise FFmpegNormalizeError(
-                "Could not parse loudnorm stats; no loudnorm-related output found"
-            )
-
-        try:
-            loudnorm_stats = json.loads(
-                "\n".join(output_lines[loudnorm_start:loudnorm_end])
-            )
-
-            _logger.debug(
-                f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}"
-            )
-
-            for key in [
-                "input_i",
-                "input_tp",
-                "input_lra",
-                "input_thresh",
-                "output_i",
-                "output_tp",
-                "output_lra",
-                "output_thresh",
-                "target_offset",
-                "normalization_type",
-            ]:
-                if key not in loudnorm_stats:
-                    continue
-                if key == "normalization_type":
-                    loudnorm_stats[key] = loudnorm_stats[key].lower()
-                # handle infinite values
-                elif float(loudnorm_stats[key]) == -float("inf"):
-                    loudnorm_stats[key] = -99
-                elif float(loudnorm_stats[key]) == float("inf"):
-                    loudnorm_stats[key] = 0
-                else:
-                    # convert to floats
-                    loudnorm_stats[key] = float(loudnorm_stats[key])
-
-            return cast(EbuLoudnessStatistics, loudnorm_stats)
-        except Exception as e:
-            raise FFmpegNormalizeError(
-                f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
-            )
+            if stream_index < 0:
+                if m := _loudnorm_pattern.match(line):
+                    loudnorm_start = index + 1
+                    stream_index = int(m.group(1))
+            else:
+                if line.startswith("}"):
+                    loudnorm_end = index + 1
+                    loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end])
+
+                    try:
+                        loudnorm_stats = json.loads(loudnorm_data)
+
+                        _logger.debug(
+                            f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}"
+                        )
+
+                        for key in [
+                            "input_i",
+                            "input_tp",
+                            "input_lra",
+                            "input_thresh",
+                            "output_i",
+                            "output_tp",
+                            "output_lra",
+                            "output_thresh",
+                            "target_offset",
+                            "normalization_type",
+                        ]:
+                            if key not in loudnorm_stats:
+                                continue
+                            if key == "normalization_type":
+                                loudnorm_stats[key] = loudnorm_stats[key].lower()
+                            # handle infinite values
+                            elif float(loudnorm_stats[key]) == -float("inf"):
+                                loudnorm_stats[key] = -99
+                            elif float(loudnorm_stats[key]) == float("inf"):
+                                loudnorm_stats[key] = 0
+                            else:
+                                # convert to floats
+                                loudnorm_stats[key] = float(loudnorm_stats[key])
+
+                        result[stream_index] = cast(EbuLoudnessStatistics, loudnorm_stats)
+                        stream_index = -1
+                    except Exception as e:
+                        raise FFmpegNormalizeError(
+                            f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
+                        )
+        return result
 
     def get_second_pass_opts_ebu(self) -> str:
         """
@@ -515,19 +488,19 @@ def get_second_pass_opts_ebu(self) -> str:
             "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
             "tp": self.media_file.ffmpeg_normalize.true_peak,
             "offset": self._constrain(
-                float(stats["target_offset"]), -99, 99, name="target_offset"
+                stats["target_offset"], -99, 99, name="target_offset"
             ),
             "measured_i": self._constrain(
-                float(stats["input_i"]), -99, 0, name="input_i"
+                stats["input_i"], -99, 0, name="input_i"
             ),
             "measured_lra": self._constrain(
-                float(stats["input_lra"]), 0, 99, name="input_lra"
+                stats["input_lra"], 0, 99, name="input_lra"
             ),
             "measured_tp": self._constrain(
-                float(stats["input_tp"]), -99, 99, name="input_tp"
+                stats["input_tp"], -99, 99, name="input_tp"
             ),
             "measured_thresh": self._constrain(
-                float(stats["input_thresh"]), -99, 0, name="input_thresh"
+                stats["input_thresh"], -99, 0, name="input_thresh"
             ),
             "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true",
             "print_format": "json",