Skip to content

Commit

Permalink
warn if dynamic mode is used but linear specified (#256)
Browse files Browse the repository at this point in the history
See: #254
  • Loading branch information
slhck authored May 13, 2024
1 parent d293324 commit fe96734
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 46 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Read on for more info.
- [Requirements](#requirements)
- [ffmpeg](#ffmpeg)
- [Installation](#installation)
- [Docker Build](#docker-build)
- [Usage](#usage)
- [Description](#description)
- [Examples](#examples)
Expand Down Expand Up @@ -237,7 +238,7 @@ Some containers (like MP4) also cannot handle PCM audio. If you want to use such
Otherwise, the range is -99 to 0.
- `-p, --print-stats`: Print first pass loudness statistics formatted as JSON to stdout.
- `-p, --print-stats`: Print loudness statistics for both passes formatted as JSON to stdout.
### EBU R128 Normalization
Expand Down
6 changes: 2 additions & 4 deletions ffmpeg_normalize/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ def create_parser() -> argparse.ArgumentParser:
description=textwrap.dedent(
"""\
ffmpeg-normalize v{} -- command line tool for normalizing audio files
""".format(
__version__
)
""".format(__version__)
),
# usage="%(prog)s INPUT [INPUT ...] [-o OUTPUT [OUTPUT ...]] [options]",
formatter_class=argparse.RawTextHelpFormatter,
Expand Down Expand Up @@ -157,7 +155,7 @@ def create_parser() -> argparse.ArgumentParser:
"-p",
"--print-stats",
action="store_true",
help="Print first pass loudness statistics formatted as JSON to stdout",
help="Print loudness statistics for both passes formatted as JSON to stdout.",
)

# group_normalization.add_argument(
Expand Down
46 changes: 37 additions & 9 deletions ffmpeg_normalize/_media_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,10 @@ def _first_pass(self) -> None:
for _ in fun():
pass

if self.ffmpeg_normalize.print_stats:
stats = [
audio_stream.get_stats()
for audio_stream in self.streams["audio"].values()
]
self.ffmpeg_normalize.stats.extend(stats)
# set initial stats (for dry-runs, this is the only thing we need to do)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]

def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
"""
Expand Down Expand Up @@ -390,12 +388,14 @@ def _second_pass(self) -> Iterator[float]:
temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
cmd.append(temp_file)

cmd_runner = CommandRunner()
try:
try:
yield from CommandRunner().run_ffmpeg_command(cmd)
yield from cmd_runner.run_ffmpeg_command(cmd)
except Exception as e:
cmd_str = " ".join([shlex.quote(c) for c in cmd])
_logger.error(f"Error while running command {cmd_str}! Error: {e}")
_logger.error(
f"Error while running command {shlex.join(cmd)}! Error: {e}"
)
raise e
else:
_logger.debug(
Expand All @@ -407,4 +407,32 @@ def _second_pass(self) -> Iterator[float]:
rmtree(temp_dir, ignore_errors=True)
raise e

output = cmd_runner.get_output()
# in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
# overall output (which includes multiple loudnorm stats)
if self.ffmpeg_normalize.normalization_type == "ebu":
all_stats = AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=len(self.streams["audio"])
)
for idx, audio_stream in self.streams["audio"].items():
audio_stream.set_second_pass_stats(all_stats[idx])

# collect all stats for the final report, again (overwrite the input)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]

# warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
if self.ffmpeg_normalize.dynamic is False:
for audio_stream in self.streams["audio"].values():
pass2_stats = audio_stream.get_stats()["ebu_pass2"]
if pass2_stats is None:
continue
if pass2_stats["normalization_type"] == "dynamic":
_logger.warning(
"You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. "
"This may lead to unexpected results."
"Consider your input settings, e.g. choose a lower target level or higher target loudness range."
)

_logger.debug("Normalization finished")
113 changes: 89 additions & 24 deletions ffmpeg_normalize/_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import os
import re
from typing import TYPE_CHECKING, Iterator, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Iterator, List, Literal, Optional, TypedDict, cast

from ._cmd_utils import NUL, CommandRunner, dict_to_filter_opts
from ._errors import FFmpegNormalizeError
Expand All @@ -26,10 +26,12 @@ class EbuLoudnessStatistics(TypedDict):
output_lra: float
output_thresh: float
target_offset: float
normalization_type: str


class LoudnessStatistics(TypedDict):
ebu: EbuLoudnessStatistics | None
ebu_pass1: EbuLoudnessStatistics | None
ebu_pass2: EbuLoudnessStatistics | None
mean: float | None
max: float | None

Expand Down Expand Up @@ -107,7 +109,8 @@ def __init__(
super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)

self.loudness_statistics: LoudnessStatistics = {
"ebu": None,
"ebu_pass1": None,
"ebu_pass2": None,
"mean": None,
"max": None,
}
Expand Down Expand Up @@ -156,12 +159,22 @@ def get_stats(self) -> LoudnessStatisticsWithMetadata:
"input_file": self.media_file.input_file,
"output_file": self.media_file.output_file,
"stream_id": self.stream_id,
"ebu": self.loudness_statistics["ebu"],
"ebu_pass1": self.loudness_statistics["ebu_pass1"],
"ebu_pass2": self.loudness_statistics["ebu_pass2"],
"mean": self.loudness_statistics["mean"],
"max": self.loudness_statistics["max"],
}
return stats

def set_second_pass_stats(self, stats: EbuLoudnessStatistics):
"""
Set the EBU loudness statistics for the second pass.
Args:
stats (dict): The EBU loudness statistics.
"""
self.loudness_statistics["ebu_pass2"] = stats

def get_pcm_codec(self) -> str:
"""
Get the PCM codec string for the stream.
Expand Down Expand Up @@ -288,6 +301,8 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
"-y",
"-i",
self.media_file.input_file,
"-map",
f"0:{self.stream_id}",
"-filter_complex",
filter_str,
"-vn",
Expand All @@ -305,37 +320,80 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
)

output_lines = [line.strip() for line in output.split("\n")]

self.loudness_statistics["ebu"] = AudioStream._parse_loudnorm_output(
output_lines
self.loudness_statistics["ebu_pass1"] = (
AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=1
)[0] # only one stream
)

@staticmethod
def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
def prune_and_parse_loudnorm_output(
output: str, num_stats: int = 1
) -> List[EbuLoudnessStatistics]:
"""
Prune ffmpeg progress lines from output and parse the loudnorm filter output.
There may be multiple outputs if multiple streams were processed.
Args:
output (str): The output from ffmpeg.
num_stats (int): The number of loudnorm statistics to parse.
Returns:
list: The EBU loudness statistics.
"""
pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
output_lines = [line.strip() for line in pruned_output.split("\n")]

ret = []
idx = 0
while True:
_logger.debug(f"Parsing loudnorm stats for stream {idx}")
loudnorm_stats = AudioStream._parse_loudnorm_output(
output_lines, stream_index=idx
)
idx += 1

if loudnorm_stats is None:
continue
ret.append(loudnorm_stats)

if len(ret) >= num_stats:
break

return ret

@staticmethod
def _parse_loudnorm_output(
output_lines: list[str], stream_index: Optional[int] = None
) -> Optional[EbuLoudnessStatistics]:
"""
Parse the output of a loudnorm filter to get the EBU loudness statistics.
Args:
output_lines (list[str]): The output lines of the loudnorm filter.
stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used.
Raises:
FFmpegNormalizeError: When the output could not be parsed.
Returns:
EbuLoudnessStatistics: The EBU loudness statistics.
EbuLoudnessStatistics: The EBU loudness statistics, if found.
"""
loudnorm_start = 0
loudnorm_end = 0
for index, line in enumerate(output_lines):
if line.startswith("[Parsed_loudnorm"):
if line.startswith(f"[Parsed_loudnorm_{stream_index}"):
loudnorm_start = index + 1
continue
if loudnorm_start and line.startswith("}"):
loudnorm_end = index + 1
break

if not (loudnorm_start and loudnorm_end):
if stream_index is not None:
# not an error
return None

raise FFmpegNormalizeError(
"Could not parse loudnorm stats; no loudnorm-related output found"
)
Expand All @@ -345,7 +403,9 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
"\n".join(output_lines[loudnorm_start:loudnorm_end])
)

_logger.debug(f"Loudnorm stats parsed: {json.dumps(loudnorm_stats)}")
_logger.debug(
f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}"
)

for key in [
"input_i",
Expand All @@ -357,9 +417,14 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
"output_lra",
"output_thresh",
"target_offset",
"normalization_type",
]:
if key not in loudnorm_stats:
continue
if key == "normalization_type":
loudnorm_stats[key] = loudnorm_stats[key].lower()
# handle infinite values
if float(loudnorm_stats[key]) == -float("inf"):
elif float(loudnorm_stats[key]) == -float("inf"):
loudnorm_stats[key] = -99
elif float(loudnorm_stats[key]) == float("inf"):
loudnorm_stats[key] = 0
Expand All @@ -378,58 +443,58 @@ def get_second_pass_opts_ebu(self) -> str:
Return second pass loudnorm filter options string for ffmpeg
"""

if not self.loudness_statistics["ebu"]:
if not self.loudness_statistics["ebu_pass1"]:
raise FFmpegNormalizeError(
"First pass not run, you must call parse_loudnorm_stats first"
)

if float(self.loudness_statistics["ebu"]["input_i"]) > 0:
if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
_logger.warning(
"Input file had measured input loudness greater than zero "
f"({self.loudness_statistics['ebu']['input_i']}), capping at 0"
f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
)
self.loudness_statistics["ebu"]["input_i"] = 0
self.loudness_statistics["ebu_pass1"]["input_i"] = 0

will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic

if self.media_file.ffmpeg_normalize.keep_loudness_range_target:
_logger.debug(
"Keeping target loudness range in second pass loudnorm filter"
)
input_lra = self.loudness_statistics["ebu"]["input_lra"]
input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
if input_lra < 1 or input_lra > 50:
_logger.warning(
"Input file had measured loudness range outside of [1,50] "
f"({input_lra}), capping to allowed range"
)

self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
self.loudness_statistics["ebu"]["input_lra"], 1, 50
self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
)

if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
if (
self.loudness_statistics["ebu"]["input_lra"]
self.loudness_statistics["ebu_pass1"]["input_lra"]
<= self.media_file.ffmpeg_normalize.loudness_range_target
):
_logger.debug(
"Setting loudness range target in second pass loudnorm filter"
)
else:
self.media_file.ffmpeg_normalize.loudness_range_target = (
self.loudness_statistics["ebu"]["input_lra"]
self.loudness_statistics["ebu_pass1"]["input_lra"]
)
_logger.debug(
"Keeping target loudness range in second pass loudnorm filter"
)

if (
self.media_file.ffmpeg_normalize.loudness_range_target
< self.loudness_statistics["ebu"]["input_lra"]
< self.loudness_statistics["ebu_pass1"]["input_lra"]
and not will_use_dynamic_mode
):
_logger.warning(
f"Input file had loudness range of {self.loudness_statistics['ebu']['input_lra']}. "
f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
"Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
"Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
Expand All @@ -443,7 +508,7 @@ def get_second_pass_opts_ebu(self) -> str:
"Specify -ar/--sample-rate to override it."
)

stats = self.loudness_statistics["ebu"]
stats = self.loudness_statistics["ebu_pass1"]

opts = {
"i": self.media_file.ffmpeg_normalize.target_level,
Expand Down
Loading

0 comments on commit fe96734

Please sign in to comment.