Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

warn if dynamic mode is used but linear specified #256

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Read on for more info.
- [Requirements](#requirements)
- [ffmpeg](#ffmpeg)
- [Installation](#installation)
- [Docker Build](#docker-build)
- [Usage](#usage)
- [Description](#description)
- [Examples](#examples)
Expand Down Expand Up @@ -237,7 +238,7 @@ Some containers (like MP4) also cannot handle PCM audio. If you want to use such

Otherwise, the range is -99 to 0.

- `-p, --print-stats`: Print first pass loudness statistics formatted as JSON to stdout.
- `-p, --print-stats`: Print loudness statistics for both passes formatted as JSON to stdout.

### EBU R128 Normalization

Expand Down
6 changes: 2 additions & 4 deletions ffmpeg_normalize/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ def create_parser() -> argparse.ArgumentParser:
description=textwrap.dedent(
"""\
ffmpeg-normalize v{} -- command line tool for normalizing audio files
""".format(
__version__
)
""".format(__version__)
),
# usage="%(prog)s INPUT [INPUT ...] [-o OUTPUT [OUTPUT ...]] [options]",
formatter_class=argparse.RawTextHelpFormatter,
Expand Down Expand Up @@ -157,7 +155,7 @@ def create_parser() -> argparse.ArgumentParser:
"-p",
"--print-stats",
action="store_true",
help="Print first pass loudness statistics formatted as JSON to stdout",
help="Print loudness statistics for both passes formatted as JSON to stdout.",
)

# group_normalization.add_argument(
Expand Down
46 changes: 37 additions & 9 deletions ffmpeg_normalize/_media_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,10 @@ def _first_pass(self) -> None:
for _ in fun():
pass

if self.ffmpeg_normalize.print_stats:
stats = [
audio_stream.get_stats()
for audio_stream in self.streams["audio"].values()
]
self.ffmpeg_normalize.stats.extend(stats)
# set initial stats (for dry-runs, this is the only thing we need to do)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]

def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
"""
Expand Down Expand Up @@ -390,12 +388,14 @@ def _second_pass(self) -> Iterator[float]:
temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
cmd.append(temp_file)

cmd_runner = CommandRunner()
try:
try:
yield from CommandRunner().run_ffmpeg_command(cmd)
yield from cmd_runner.run_ffmpeg_command(cmd)
except Exception as e:
cmd_str = " ".join([shlex.quote(c) for c in cmd])
_logger.error(f"Error while running command {cmd_str}! Error: {e}")
_logger.error(
f"Error while running command {shlex.join(cmd)}! Error: {e}"
)
raise e
else:
_logger.debug(
Expand All @@ -407,4 +407,32 @@ def _second_pass(self) -> Iterator[float]:
rmtree(temp_dir, ignore_errors=True)
raise e

output = cmd_runner.get_output()
# in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
# overall output (which includes multiple loudnorm stats)
if self.ffmpeg_normalize.normalization_type == "ebu":
all_stats = AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=len(self.streams["audio"])
)
for idx, audio_stream in self.streams["audio"].items():
audio_stream.set_second_pass_stats(all_stats[idx])

# collect all stats for the final report, again (overwrite the input)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]

# warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
if self.ffmpeg_normalize.dynamic is False:
for audio_stream in self.streams["audio"].values():
pass2_stats = audio_stream.get_stats()["ebu_pass2"]
if pass2_stats is None:
continue
if pass2_stats["normalization_type"] == "dynamic":
_logger.warning(
"You specified linear normalization, but the loudnorm filter reverted to dynamic normalization. "
"This may lead to unexpected results."
"Consider your input settings, e.g. choose a lower target level or higher target loudness range."
)

_logger.debug("Normalization finished")
113 changes: 89 additions & 24 deletions ffmpeg_normalize/_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import os
import re
from typing import TYPE_CHECKING, Iterator, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Iterator, List, Literal, Optional, TypedDict, cast

from ._cmd_utils import NUL, CommandRunner, dict_to_filter_opts
from ._errors import FFmpegNormalizeError
Expand All @@ -26,10 +26,12 @@ class EbuLoudnessStatistics(TypedDict):
output_lra: float
output_thresh: float
target_offset: float
normalization_type: str


class LoudnessStatistics(TypedDict):
ebu: EbuLoudnessStatistics | None
ebu_pass1: EbuLoudnessStatistics | None
ebu_pass2: EbuLoudnessStatistics | None
mean: float | None
max: float | None

Expand Down Expand Up @@ -107,7 +109,8 @@ def __init__(
super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)

self.loudness_statistics: LoudnessStatistics = {
"ebu": None,
"ebu_pass1": None,
"ebu_pass2": None,
"mean": None,
"max": None,
}
Expand Down Expand Up @@ -156,12 +159,22 @@ def get_stats(self) -> LoudnessStatisticsWithMetadata:
"input_file": self.media_file.input_file,
"output_file": self.media_file.output_file,
"stream_id": self.stream_id,
"ebu": self.loudness_statistics["ebu"],
"ebu_pass1": self.loudness_statistics["ebu_pass1"],
"ebu_pass2": self.loudness_statistics["ebu_pass2"],
"mean": self.loudness_statistics["mean"],
"max": self.loudness_statistics["max"],
}
return stats

def set_second_pass_stats(self, stats: EbuLoudnessStatistics):
"""
Set the EBU loudness statistics for the second pass.

Args:
stats (dict): The EBU loudness statistics.
"""
self.loudness_statistics["ebu_pass2"] = stats

def get_pcm_codec(self) -> str:
"""
Get the PCM codec string for the stream.
Expand Down Expand Up @@ -288,6 +301,8 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
"-y",
"-i",
self.media_file.input_file,
"-map",
f"0:{self.stream_id}",
"-filter_complex",
filter_str,
"-vn",
Expand All @@ -305,37 +320,80 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
)

output_lines = [line.strip() for line in output.split("\n")]

self.loudness_statistics["ebu"] = AudioStream._parse_loudnorm_output(
output_lines
self.loudness_statistics["ebu_pass1"] = (
AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=1
)[0] # only one stream
)

@staticmethod
def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
def prune_and_parse_loudnorm_output(
output: str, num_stats: int = 1
) -> List[EbuLoudnessStatistics]:
"""
Prune ffmpeg progress lines from output and parse the loudnorm filter output.
There may be multiple outputs if multiple streams were processed.

Args:
output (str): The output from ffmpeg.
num_stats (int): The number of loudnorm statistics to parse.

Returns:
list: The EBU loudness statistics.
"""
pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
output_lines = [line.strip() for line in pruned_output.split("\n")]

ret = []
idx = 0
while True:
_logger.debug(f"Parsing loudnorm stats for stream {idx}")
loudnorm_stats = AudioStream._parse_loudnorm_output(
output_lines, stream_index=idx
)
idx += 1

if loudnorm_stats is None:
continue
ret.append(loudnorm_stats)

if len(ret) >= num_stats:
break

return ret

@staticmethod
def _parse_loudnorm_output(
output_lines: list[str], stream_index: Optional[int] = None
) -> Optional[EbuLoudnessStatistics]:
"""
Parse the output of a loudnorm filter to get the EBU loudness statistics.

Args:
output_lines (list[str]): The output lines of the loudnorm filter.
stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used.

Raises:
FFmpegNormalizeError: When the output could not be parsed.

Returns:
EbuLoudnessStatistics: The EBU loudness statistics.
EbuLoudnessStatistics: The EBU loudness statistics, if found.
"""
loudnorm_start = 0
loudnorm_end = 0
for index, line in enumerate(output_lines):
if line.startswith("[Parsed_loudnorm"):
if line.startswith(f"[Parsed_loudnorm_{stream_index}"):
loudnorm_start = index + 1
continue
if loudnorm_start and line.startswith("}"):
loudnorm_end = index + 1
break

if not (loudnorm_start and loudnorm_end):
if stream_index is not None:
# not an error
return None

raise FFmpegNormalizeError(
"Could not parse loudnorm stats; no loudnorm-related output found"
)
Expand All @@ -345,7 +403,9 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
"\n".join(output_lines[loudnorm_start:loudnorm_end])
)

_logger.debug(f"Loudnorm stats parsed: {json.dumps(loudnorm_stats)}")
_logger.debug(
f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}"
)

for key in [
"input_i",
Expand All @@ -357,9 +417,14 @@ def _parse_loudnorm_output(output_lines: list[str]) -> EbuLoudnessStatistics:
"output_lra",
"output_thresh",
"target_offset",
"normalization_type",
]:
if key not in loudnorm_stats:
continue
if key == "normalization_type":
loudnorm_stats[key] = loudnorm_stats[key].lower()
# handle infinite values
if float(loudnorm_stats[key]) == -float("inf"):
elif float(loudnorm_stats[key]) == -float("inf"):
loudnorm_stats[key] = -99
elif float(loudnorm_stats[key]) == float("inf"):
loudnorm_stats[key] = 0
Expand All @@ -378,58 +443,58 @@ def get_second_pass_opts_ebu(self) -> str:
Return second pass loudnorm filter options string for ffmpeg
"""

if not self.loudness_statistics["ebu"]:
if not self.loudness_statistics["ebu_pass1"]:
raise FFmpegNormalizeError(
"First pass not run, you must call parse_loudnorm_stats first"
)

if float(self.loudness_statistics["ebu"]["input_i"]) > 0:
if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
_logger.warning(
"Input file had measured input loudness greater than zero "
f"({self.loudness_statistics['ebu']['input_i']}), capping at 0"
f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
)
self.loudness_statistics["ebu"]["input_i"] = 0
self.loudness_statistics["ebu_pass1"]["input_i"] = 0

will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic

if self.media_file.ffmpeg_normalize.keep_loudness_range_target:
_logger.debug(
"Keeping target loudness range in second pass loudnorm filter"
)
input_lra = self.loudness_statistics["ebu"]["input_lra"]
input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
if input_lra < 1 or input_lra > 50:
_logger.warning(
"Input file had measured loudness range outside of [1,50] "
f"({input_lra}), capping to allowed range"
)

self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
self.loudness_statistics["ebu"]["input_lra"], 1, 50
self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
)

if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
if (
self.loudness_statistics["ebu"]["input_lra"]
self.loudness_statistics["ebu_pass1"]["input_lra"]
<= self.media_file.ffmpeg_normalize.loudness_range_target
):
_logger.debug(
"Setting loudness range target in second pass loudnorm filter"
)
else:
self.media_file.ffmpeg_normalize.loudness_range_target = (
self.loudness_statistics["ebu"]["input_lra"]
self.loudness_statistics["ebu_pass1"]["input_lra"]
)
_logger.debug(
"Keeping target loudness range in second pass loudnorm filter"
)

if (
self.media_file.ffmpeg_normalize.loudness_range_target
< self.loudness_statistics["ebu"]["input_lra"]
< self.loudness_statistics["ebu_pass1"]["input_lra"]
and not will_use_dynamic_mode
):
_logger.warning(
f"Input file had loudness range of {self.loudness_statistics['ebu']['input_lra']}. "
f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
"Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
"Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
Expand All @@ -443,7 +508,7 @@ def get_second_pass_opts_ebu(self) -> str:
"Specify -ar/--sample-rate to override it."
)

stats = self.loudness_statistics["ebu"]
stats = self.loudness_statistics["ebu_pass1"]

opts = {
"i": self.media_file.ffmpeg_normalize.target_level,
Expand Down
Loading
Loading