Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement --lower-only #271

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ root = true

[*]
indent_style = space
indent_size = 2
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = false
insert_final_newline = false

[*.{yml,yaml}]
indent_size = 2
15 changes: 15 additions & 0 deletions ffmpeg_normalize/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,20 @@ def create_parser() -> argparse.ArgumentParser:
default=0.0,
)

group_ebu.add_argument(
"--lower-only",
action="store_true",
help=textwrap.dedent(
"""\
Whether the audio should not increase in loudness.

If the measured loudness from the first pass is lower than the target
loudness then normalization pass will be skipped for the measured audio
source.
"""
),
)

group_ebu.add_argument(
"--dual-mono",
action="store_true",
Expand Down Expand Up @@ -514,6 +528,7 @@ def _split_options(opts: str) -> list[str]:
keep_lra_above_loudness_range_target=cli_args.keep_lra_above_loudness_range_target,
true_peak=cli_args.true_peak,
offset=cli_args.offset,
lower_only=cli_args.lower_only,
dual_mono=cli_args.dual_mono,
dynamic=cli_args.dynamic,
audio_codec=cli_args.audio_codec,
Expand Down
10 changes: 8 additions & 2 deletions ffmpeg_normalize/_ffmpeg_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import json
import logging
import os
import sys
from itertools import chain
from typing import TYPE_CHECKING, Literal

from tqdm import tqdm
Expand Down Expand Up @@ -58,6 +60,7 @@ class FFmpegNormalize:
keep_lra_above_loudness_range_target (bool, optional): Keep input loudness range above loudness range target. Defaults to False.
true_peak (float, optional): True peak. Defaults to -2.0.
offset (float, optional): Offset. Defaults to 0.0.
lower_only (bool, optional): Whether the audio should not increase in loudness. Defaults to False.
dual_mono (bool, optional): Dual mono. Defaults to False.
dynamic (bool, optional): Dynamic. Defaults to False.
audio_codec (str, optional): Audio codec. Defaults to "pcm_s16le".
Expand Down Expand Up @@ -94,6 +97,7 @@ def __init__(
keep_lra_above_loudness_range_target: bool = False,
true_peak: float = -2.0,
offset: float = 0.0,
lower_only: bool = False,
dual_mono: bool = False,
dynamic: bool = False,
audio_codec: str = "pcm_s16le",
Expand Down Expand Up @@ -164,6 +168,7 @@ def __init__(

self.true_peak = check_range(true_peak, -9, 0, name="true_peak")
self.offset = check_range(offset, -99, 99, name="offset")
self.lower_only = lower_only

# Ensure library user is passing correct types
assert isinstance(dual_mono, bool), "dual_mono must be bool"
Expand Down Expand Up @@ -254,5 +259,6 @@ def run_normalization(self) -> None:

_logger.info(f"Normalized file written to {media_file.output_file}")

if self.print_stats and self.stats:
print(json.dumps(self.stats, indent=4))
if self.print_stats:
json.dump(list(chain.from_iterable(media_file.get_stats() for media_file in self.media_files)), sys.stdout, indent=4)
print()
57 changes: 37 additions & 20 deletions ffmpeg_normalize/_media_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import shlex
from shutil import move, rmtree
from tempfile import mkdtemp
from typing import TYPE_CHECKING, Iterator, Literal, TypedDict
from typing import TYPE_CHECKING, Iterable, Iterator, Literal, TypedDict

from tqdm import tqdm

from ._cmd_utils import DUR_REGEX, NUL, CommandRunner
from ._errors import FFmpegNormalizeError
from ._streams import AudioStream, SubtitleStream, VideoStream
from ._streams import AudioStream, SubtitleStream, VideoStream, LoudnessStatisticsWithMetadata

if TYPE_CHECKING:
from ffmpeg_normalize import FFmpegNormalize
Expand Down Expand Up @@ -240,11 +240,6 @@ def _first_pass(self) -> None:
for _ in fun():
pass

# set initial stats (for dry-runs, this is the only thing we need to do)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]

def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
"""
Return the audio filter command and output labels needed.
Expand All @@ -256,10 +251,35 @@ def _get_audio_filter_cmd(self) -> tuple[str, list[str]]:
output_labels = []

for audio_stream in self.streams["audio"].values():
if self.ffmpeg_normalize.normalization_type == "ebu":
normalization_filter = audio_stream.get_second_pass_opts_ebu()
skip_normalization = False
if self.ffmpeg_normalize.lower_only:
if self.ffmpeg_normalize.normalization_type == "ebu":
if (
audio_stream.loudness_statistics["ebu_pass1"] is not None and
audio_stream.loudness_statistics["ebu_pass1"]["input_i"] < self.ffmpeg_normalize.target_level
):
skip_normalization = True
elif self.ffmpeg_normalize.normalization_type == "peak":
if (
audio_stream.loudness_statistics["max"] is not None and
audio_stream.loudness_statistics["max"] < self.ffmpeg_normalize.target_level
):
skip_normalization = True
elif self.ffmpeg_normalize.normalization_type == "rms":
if (
audio_stream.loudness_statistics["mean"] is not None and
audio_stream.loudness_statistics["mean"] < self.ffmpeg_normalize.target_level
):
skip_normalization = True

if skip_normalization:
_logger.info(f"Stream {audio_stream.stream_id} had measured input loudness lower than target, skipping normalization.")
normalization_filter = "acopy"
else:
normalization_filter = audio_stream.get_second_pass_opts_peakrms()
if self.ffmpeg_normalize.normalization_type == "ebu":
normalization_filter = audio_stream.get_second_pass_opts_ebu()
else:
normalization_filter = audio_stream.get_second_pass_opts_peakrms()

input_label = f"[0:{audio_stream.stream_id}]"
output_label = f"[norm{audio_stream.stream_id}]"
Expand Down Expand Up @@ -421,16 +441,10 @@ def _second_pass(self) -> Iterator[float]:
# in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
# overall output (which includes multiple loudnorm stats)
if self.ffmpeg_normalize.normalization_type == "ebu":
all_stats = AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=len(self.streams["audio"])
)
for idx, audio_stream in enumerate(self.streams["audio"].values()):
audio_stream.set_second_pass_stats(all_stats[idx])

# collect all stats for the final report, again (overwrite the input)
self.ffmpeg_normalize.stats = [
audio_stream.get_stats() for audio_stream in self.streams["audio"].values()
]
all_stats = AudioStream.prune_and_parse_loudnorm_output(output)
for stream_id, audio_stream in self.streams["audio"].items():
if stream_id in all_stats:
audio_stream.set_second_pass_stats(all_stats[stream_id])

# warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
if self.ffmpeg_normalize.dynamic is False:
Expand All @@ -446,3 +460,6 @@ def _second_pass(self) -> Iterator[float]:
)

_logger.debug("Normalization finished")

def get_stats(self) -> Iterable[LoudnessStatisticsWithMetadata]:
return (audio_stream.get_stats() for audio_stream in self.streams["audio"].values())
153 changes: 63 additions & 90 deletions ffmpeg_normalize/_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

_logger = logging.getLogger(__name__)

_loudnorm_pattern = re.compile(r"\[Parsed_loudnorm_(\d+)")

class EbuLoudnessStatistics(TypedDict):
input_i: float
Expand Down Expand Up @@ -320,123 +321,95 @@ def parse_loudnorm_stats(self) -> Iterator[float]:
f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
)

self.loudness_statistics["ebu_pass1"] = (
AudioStream.prune_and_parse_loudnorm_output(
output, num_stats=1
)[0] # only one stream
)
# only one stream
self.loudness_statistics["ebu_pass1"] = next(iter(AudioStream.prune_and_parse_loudnorm_output(output).values()))

@staticmethod
def prune_and_parse_loudnorm_output(
output: str, num_stats: int = 1
) -> List[EbuLoudnessStatistics]:
output: str
) -> dict[int, EbuLoudnessStatistics]:
"""
Prune ffmpeg progress lines from output and parse the loudnorm filter output.
There may be multiple outputs if multiple streams were processed.

Args:
output (str): The output from ffmpeg.
num_stats (int): The number of loudnorm statistics to parse.

Returns:
list: The EBU loudness statistics.
"""
pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
output_lines = [line.strip() for line in pruned_output.split("\n")]

ret = []
idx = 0
while True:
_logger.debug(f"Parsing loudnorm stats for stream {idx}")
loudnorm_stats = AudioStream._parse_loudnorm_output(
output_lines, stream_index=idx
)
idx += 1

if loudnorm_stats is None:
continue
ret.append(loudnorm_stats)

if len(ret) >= num_stats:
break

return ret
return AudioStream._parse_loudnorm_output(output_lines)

@staticmethod
def _parse_loudnorm_output(
output_lines: list[str], stream_index: Optional[int] = None
) -> Optional[EbuLoudnessStatistics]:
output_lines: list[str]
) -> dict[int, EbuLoudnessStatistics]:
"""
Parse the output of a loudnorm filter to get the EBU loudness statistics.

Args:
output_lines (list[str]): The output lines of the loudnorm filter.
stream_index (int): The stream index, optional to filter out the correct stream. If unset, the first stream is used.

Raises:
FFmpegNormalizeError: When the output could not be parsed.

Returns:
EbuLoudnessStatistics: The EBU loudness statistics, if found.
"""
result = dict[int, EbuLoudnessStatistics]()
stream_index = -1
loudnorm_start = 0
loudnorm_end = 0
for index, line in enumerate(output_lines):
if line.startswith(f"[Parsed_loudnorm_{stream_index}"):
loudnorm_start = index + 1
continue
if loudnorm_start and line.startswith("}"):
loudnorm_end = index + 1
break

if not (loudnorm_start and loudnorm_end):
if stream_index is not None:
# not an error
return None

raise FFmpegNormalizeError(
"Could not parse loudnorm stats; no loudnorm-related output found"
)

try:
loudnorm_stats = json.loads(
"\n".join(output_lines[loudnorm_start:loudnorm_end])
)

_logger.debug(
f"Loudnorm stats for stream {stream_index} parsed: {json.dumps(loudnorm_stats)}"
)

for key in [
"input_i",
"input_tp",
"input_lra",
"input_thresh",
"output_i",
"output_tp",
"output_lra",
"output_thresh",
"target_offset",
"normalization_type",
]:
if key not in loudnorm_stats:
continue
if key == "normalization_type":
loudnorm_stats[key] = loudnorm_stats[key].lower()
# handle infinite values
elif float(loudnorm_stats[key]) == -float("inf"):
loudnorm_stats[key] = -99
elif float(loudnorm_stats[key]) == float("inf"):
loudnorm_stats[key] = 0
else:
# convert to floats
loudnorm_stats[key] = float(loudnorm_stats[key])

return cast(EbuLoudnessStatistics, loudnorm_stats)
except Exception as e:
raise FFmpegNormalizeError(
f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
)
if stream_index < 0:
if m := _loudnorm_pattern.match(line):
loudnorm_start = index + 1
stream_index = int(m.group(1))
else:
if line.startswith("}"):
loudnorm_end = index + 1
loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end])

try:
loudnorm_stats = json.loads(loudnorm_data)

_logger.debug(
f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}"
)

for key in [
"input_i",
"input_tp",
"input_lra",
"input_thresh",
"output_i",
"output_tp",
"output_lra",
"output_thresh",
"target_offset",
"normalization_type",
]:
if key not in loudnorm_stats:
continue
if key == "normalization_type":
loudnorm_stats[key] = loudnorm_stats[key].lower()
# handle infinite values
elif float(loudnorm_stats[key]) == -float("inf"):
loudnorm_stats[key] = -99
elif float(loudnorm_stats[key]) == float("inf"):
loudnorm_stats[key] = 0
else:
# convert to floats
loudnorm_stats[key] = float(loudnorm_stats[key])

result[stream_index] = cast(EbuLoudnessStatistics, loudnorm_stats)
stream_index = -1
except Exception as e:
raise FFmpegNormalizeError(
f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
ahmetsait marked this conversation as resolved.
Show resolved Hide resolved
)
return result

def get_second_pass_opts_ebu(self) -> str:
"""
Expand Down Expand Up @@ -515,19 +488,19 @@ def get_second_pass_opts_ebu(self) -> str:
"lra": self.media_file.ffmpeg_normalize.loudness_range_target,
"tp": self.media_file.ffmpeg_normalize.true_peak,
"offset": self._constrain(
float(stats["target_offset"]), -99, 99, name="target_offset"
stats["target_offset"], -99, 99, name="target_offset"
),
"measured_i": self._constrain(
float(stats["input_i"]), -99, 0, name="input_i"
stats["input_i"], -99, 0, name="input_i"
),
"measured_lra": self._constrain(
float(stats["input_lra"]), 0, 99, name="input_lra"
stats["input_lra"], 0, 99, name="input_lra"
),
"measured_tp": self._constrain(
float(stats["input_tp"]), -99, 99, name="input_tp"
stats["input_tp"], -99, 99, name="input_tp"
),
"measured_thresh": self._constrain(
float(stats["input_thresh"]), -99, 0, name="input_thresh"
stats["input_thresh"], -99, 0, name="input_thresh"
),
"linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true",
"print_format": "json",
Expand Down
Loading