From 67c6cf2fc05435186a8c96601fae078b237655b4 Mon Sep 17 00:00:00 2001 From: yero Date: Wed, 30 Oct 2024 02:11:39 -0400 Subject: [PATCH] Changes the use pattern of `AudioFrameGenerator` - No need to call `AddSamples()` with empty frames to signal the end of substreams -- calling `Finalize()` does it. - No need to call `Finalize()` multiple times to flush remaining frames in the encoders -- remaining frames are automatically handled by `OutputFrames()`. - Calling `AddSamples()` after `Finalize()` or calling `Finalize()` multiple times are OK-- they will just become NOOPs. PiperOrigin-RevId: 691295143 --- iamf/cli/iamf_encoder.cc | 8 + iamf/cli/iamf_encoder.h | 4 + .../cli/proto_to_obu/audio_frame_generator.cc | 144 +++++++++++++----- iamf/cli/proto_to_obu/audio_frame_generator.h | 51 ++++--- .../tests/audio_frame_generator_test.cc | 78 +++++++--- 5 files changed, 211 insertions(+), 74 deletions(-) diff --git a/iamf/cli/iamf_encoder.cc b/iamf/cli/iamf_encoder.cc index 114a6b9..bad6d2f 100644 --- a/iamf/cli/iamf_encoder.cc +++ b/iamf/cli/iamf_encoder.cc @@ -174,6 +174,13 @@ absl::Status IamfEncoder::GetInputTimestamp(int32_t& input_timestamp) { void IamfEncoder::AddSamples(const DecodedUleb128 audio_element_id, ChannelLabel::Label label, const std::vector& samples) { + if (add_samples_finalized_) { + LOG_FIRST_N(WARNING, 3) + << "Calling `AddSamples()` after `FinalizeAddSamples()` has no effect; " + << samples.size() << " input samples discarded."; + return; + } + id_to_labeled_samples_[audio_element_id][label] = samples; } @@ -213,6 +220,7 @@ absl::Status IamfEncoder::OutputTemporalUnit( audio_frame_generator_->AddSamples(audio_element_id, label, samples)); } } + if (add_samples_finalized_) { RETURN_IF_NOT_OK(audio_frame_generator_->Finalize()); } diff --git a/iamf/cli/iamf_encoder.h b/iamf/cli/iamf_encoder.h index acc51ca..ff84127 100644 --- a/iamf/cli/iamf_encoder.h +++ b/iamf/cli/iamf_encoder.h @@ -120,6 +120,10 @@ class IamfEncoder { absl::Status GetInputTimestamp(int32_t& input_timestamp); /*!\brief Adds audio samples belonging to the same temporal unit. + * + * The best practice is to not call this function after + * `FinalizeAddSamples()`. But it is OK if you do -- just that the added + * samples will be ignored and not encoded. * * \param audio_element_id ID of the audio element to add samples to. * \param label Channel label to add samples to. diff --git a/iamf/cli/proto_to_obu/audio_frame_generator.cc b/iamf/cli/proto_to_obu/audio_frame_generator.cc index 3c53aa4..bcf8690 100644 --- a/iamf/cli/proto_to_obu/audio_frame_generator.cc +++ b/iamf/cli/proto_to_obu/audio_frame_generator.cc @@ -53,6 +53,7 @@ #include "iamf/obu/codec_config.h" #include "iamf/obu/demixing_info_parameter_data.h" #include "iamf/obu/types.h" +#include "src/google/protobuf/repeated_ptr_field.h" namespace iamf_tools { @@ -233,23 +234,22 @@ absl::Status InitializeSubstreamData( // An audio element may contain many channels, denoted by their labels; // this function returns whether all labels have their (same amount of) // samples ready. -bool SamplesReadyForAudioElement( - const LabelSamplesMap& label_to_samples, - const absl::flat_hash_set& channel_labels) { - size_t common_num_samples = 0; - for (const auto& label : channel_labels) { +bool SamplesReadyForAudioElement(const LabelSamplesMap& label_to_samples, + const absl::flat_hash_set& + channel_labels_for_audio_element) { + std::optional common_num_samples; + for (const auto& label : channel_labels_for_audio_element) { const auto label_to_samples_iter = label_to_samples.find(label); if (label_to_samples_iter == label_to_samples.end()) { return false; } const auto num_samples = label_to_samples_iter->second.size(); - if (common_num_samples == 0 && num_samples != 0) { + if (!common_num_samples.has_value()) { common_num_samples = num_samples; - continue; } - if (num_samples != common_num_samples) { + if (num_samples != *common_num_samples) { return false; } } @@ -369,10 +369,14 @@ std::pair GetNumSamplesToTrimForFrame( frame_samples_to_trim_at_end); } -absl::Status EncodeFramesForAudioElement( +// Encode frames for an audio element if samples are ready. +absl::Status MaybeEncodeFramesForAudioElement( const DecodedUleb128 audio_element_id, const AudioElementWithData& audio_element_with_data, - const DemixingModule& demixing_module, LabelSamplesMap& label_to_samples, + const DemixingModule& demixing_module, + const absl::flat_hash_set& + channel_labels_for_audio_element, + LabelSamplesMap& label_to_samples, absl::flat_hash_map& substream_id_to_trimming_state, ParametersManager& parameters_manager, @@ -381,6 +385,13 @@ absl::Status EncodeFramesForAudioElement( absl::flat_hash_map& substream_id_to_substream_data, GlobalTimingModule& global_timing_module) { + if (!SamplesReadyForAudioElement(label_to_samples, + channel_labels_for_audio_element)) { + // Waiting for more samples belonging to the same audio element; return + // for now. + return absl::OkStatus(); + } + const CodecConfigObu& codec_config = *audio_element_with_data.codec_config; // Get some common information about this stream. @@ -626,6 +637,34 @@ absl::Status ValidateAndApplyUserTrimming( } // namespace +AudioFrameGenerator::AudioFrameGenerator( + const ::google::protobuf::RepeatedPtrField< + iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadata, + const ::google::protobuf::RepeatedPtrField< + iamf_tools_cli_proto::CodecConfigObuMetadata>& codec_config_metadata, + const absl::flat_hash_map& + audio_elements, + const DemixingModule& demixing_module, + ParametersManager& parameters_manager, + GlobalTimingModule& global_timing_module) + : audio_elements_(audio_elements), + demixing_module_(demixing_module), + parameters_manager_(parameters_manager), + global_timing_module_(global_timing_module), + // Set to a state NOT taking samples at first; may be changed to + // `kTakingSamples` once `Initialize()` is called. + state_(kFlushingRemaining) { + for (const auto& audio_frame_obu_metadata : audio_frame_metadata) { + audio_frame_metadata_[audio_frame_obu_metadata.audio_element_id()] = + audio_frame_obu_metadata; + } + + for (const auto& codec_config_obu_metadata : codec_config_metadata) { + codec_config_metadata_[codec_config_obu_metadata.codec_config_id()] = + codec_config_obu_metadata.codec_config(); + } +} + absl::StatusOr AudioFrameGenerator::GetNumberOfSamplesToDelayAtStart( const iamf_tools_cli_proto::CodecConfig& codec_config_metadata, const CodecConfigObu& codec_config) { @@ -728,19 +767,32 @@ absl::Status AudioFrameGenerator::Initialize() { } } + // If `substream_id_to_substream_data_` is not empty, meaning this generator + // is expecting audio substreams and is ready to take audio samples. + if (!substream_id_to_substream_data_.empty()) { + state_ = kTakingSamples; + } + return absl::OkStatus(); } bool AudioFrameGenerator::TakingSamples() const { - return !substream_id_to_substream_data_.empty(); + return (state_ == kTakingSamples); } absl::Status AudioFrameGenerator::AddSamples( const DecodedUleb128 audio_element_id, ChannelLabel::Label label, absl::Span samples) { - const auto& audio_element_labels = + absl::MutexLock lock(&mutex_); + if (state_ != kTakingSamples) { + LOG_FIRST_N(WARNING, 3) + << "Calling `AddSamples()` after `Finalize()` has no effect."; + return absl::OkStatus(); + } + + const auto& audio_element_labels_iter = audio_element_id_to_labels_.find(audio_element_id); - if (audio_element_labels == audio_element_id_to_labels_.end()) { + if (audio_element_labels_iter == audio_element_id_to_labels_.end()) { return absl::InvalidArgumentError( absl::StrCat("No audio frame metadata found for Audio Element ID= ", audio_element_id)); @@ -757,36 +809,20 @@ absl::Status AudioFrameGenerator::AddSamples( } const auto& audio_element_with_data = audio_element_iter->second; - if (SamplesReadyForAudioElement(labeled_samples, - audio_element_labels->second)) { - absl::MutexLock lock(&mutex_); - RETURN_IF_NOT_OK(EncodeFramesForAudioElement( - audio_element_id, audio_element_with_data, demixing_module_, - labeled_samples, substream_id_to_trimming_state_, parameters_manager_, - substream_id_to_encoder_, substream_id_to_substream_data_, - global_timing_module_)); - - labeled_samples.clear(); - } + RETURN_IF_NOT_OK(MaybeEncodeFramesForAudioElement( + audio_element_id, audio_element_with_data, demixing_module_, + audio_element_labels_iter->second, labeled_samples, + substream_id_to_trimming_state_, parameters_manager_, + substream_id_to_encoder_, substream_id_to_substream_data_, + global_timing_module_)); return absl::OkStatus(); } absl::Status AudioFrameGenerator::Finalize() { absl::MutexLock lock(&mutex_); - for (auto& [substream_id, encoder] : substream_id_to_encoder_) { - auto substream_data_iter = - substream_id_to_substream_data_.find(substream_id); - if (substream_data_iter == substream_id_to_substream_data_.end()) { - continue; - } - - // Remove the substream data when there is no more sample to come, and the - // encoder can be finalized. - if (substream_data_iter->second.samples_obu.empty()) { - RETURN_IF_NOT_OK(encoder->Finalize()); - substream_id_to_substream_data_.erase(substream_data_iter); - } + if (state_ == kTakingSamples) { + state_ = kFinalizedCalled; } return absl::OkStatus(); @@ -801,9 +837,43 @@ absl::Status AudioFrameGenerator::OutputFrames( std::list& audio_frames) { absl::MutexLock lock(&mutex_); + if (state_ == kFlushingRemaining) { + // In this state, there might be some remaining samples queued in the + // encoders waiting to be encoded; continue to encode them one frame at a + // time. + for (const auto& [audio_element_id, audio_element_with_data] : + audio_elements_) { + RETURN_IF_NOT_OK(MaybeEncodeFramesForAudioElement( + audio_element_id, audio_element_with_data, demixing_module_, + audio_element_id_to_labels_.at(audio_element_id), + id_to_labeled_samples_[audio_element_id], + substream_id_to_trimming_state_, parameters_manager_, + substream_id_to_encoder_, substream_id_to_substream_data_, + global_timing_module_)); + } + } else if (state_ == kFinalizedCalled) { + // The `Finalize()` has just been called, advance the state so that the + // remaining samples will be encoded in the next iteration. + state_ = kFlushingRemaining; + } + + // Pop encoded audio frames from encoders. for (auto substream_id_to_encoder_iter = substream_id_to_encoder_.begin(); substream_id_to_encoder_iter != substream_id_to_encoder_.end();) { auto& [substream_id, encoder] = *substream_id_to_encoder_iter; + + // Remove the substream data when the generator is in the + // `kFlushingRemaining` state and the encoder can be finalized. + if (state_ == kFlushingRemaining) { + auto substream_data_iter = + substream_id_to_substream_data_.find(substream_id); + if (substream_data_iter != substream_id_to_substream_data_.end() && + substream_data_iter->second.samples_obu.empty()) { + RETURN_IF_NOT_OK(encoder->Finalize()); + substream_id_to_substream_data_.erase(substream_data_iter); + } + } + if (encoder->FramesAvailable()) { RETURN_IF_NOT_OK(encoder->Pop(audio_frames)); RETURN_IF_NOT_OK(ValidateAndApplyUserTrimming( diff --git a/iamf/cli/proto_to_obu/audio_frame_generator.h b/iamf/cli/proto_to_obu/audio_frame_generator.h index fe201fa..2c9ffe0 100644 --- a/iamf/cli/proto_to_obu/audio_frame_generator.h +++ b/iamf/cli/proto_to_obu/audio_frame_generator.h @@ -44,20 +44,34 @@ namespace iamf_tools { * The generation of audio frames can be done asynchronously, where * samples are added on one thread and completed frames are consumed on another. * + * Under the hood, the generator can be in three states: + * 1. `kTakingSamples`: The generator is expecting audio substreams and taking + * samples. + * 2. `kFinalizeCalled`: `Finalize()` has been called; no more "real samples" + * are coming, and the generator will soon (starting in + * the next iteration) be flusing the remaining samples. + * 3. `kFlushingRemaining`: The generator is flushing the remaining samples + * that are still in the underlying encoders. + * * The use pattern of this class is: * * - Initialize (`Initialize()`). + * - (This puts the generator in the `kTakingSamples` state.) * * Thread 1: * - Repeat until no new sample to add (by checking `TakingSamples()`): * - Add samples for each audio element (`AddSamples()`). * - Finalize the sample-adding process (`Finalize()`). + * - (This puts the generator in the `kFinalizeCalled` state.) * * Thread 2: * - Repeat until no frame to generate (by checking `GeneratingFrames()`): * - Output generated frames (`OutputFrames()`). + * - If the generator is in the `kFlushingRemaining` state, the frames + * might come from remaining samples in the underlying encoders. * - If the output is empty, wait. * - Otherwise, add the output of this round to the final result. + * */ class AudioFrameGenerator { public: @@ -88,21 +102,7 @@ class AudioFrameGenerator { audio_elements, const DemixingModule& demixing_module, ParametersManager& parameters_manager, - GlobalTimingModule& global_timing_module) - : audio_elements_(audio_elements), - demixing_module_(demixing_module), - parameters_manager_(parameters_manager), - global_timing_module_(global_timing_module) { - for (const auto& audio_frame_obu_metadata : audio_frame_metadata) { - audio_frame_metadata_[audio_frame_obu_metadata.audio_element_id()] = - audio_frame_obu_metadata; - } - - for (const auto& codec_config_obu_metadata : codec_config_metadata) { - codec_config_metadata_[codec_config_obu_metadata.codec_config_id()] = - codec_config_obu_metadata.codec_config(); - } - } + GlobalTimingModule& global_timing_module); /*!\brief Returns the number of samples to delay based on the codec config. * @@ -129,9 +129,7 @@ class AudioFrameGenerator { /*!\brief Adds samples for an Audio Element and a channel label. * - * Calling this function with empty input `samples` will signal the - * underlying encoder that the a substream has ended. Eventually when all - * substreams are ended, `TakingSamples()` will return false. + * No effect if the generator is not in the `kTakingSamples` state. * * \param audio_element_id Audio Element ID that the added samples belong to. * \param label Channel label of the added samples. @@ -144,8 +142,8 @@ class AudioFrameGenerator { /*!\brief Finalizes the sample-adding process. * - * This will signal all underlying encoders that there are no more samples - * to come. + * This puts the generator in the `kFinalizedCalled` state if it is in the + * `kTakingSamples` state. No effect if the generator is in other states. * * \return `absl::OkStatus()` on success. A specific status on failure. */ @@ -163,12 +161,24 @@ class AudioFrameGenerator { * The output frames all belong to the same temporal unit, sharing the same * start and end timestamps. * + * After `Finalize()` is called, all underlying encoders will be signalled + * to encode the remaining samples. Eventually when all substreams are + * are ended, encoders will be deleted and `GeneratingFrames()` will return + * false. + * * \param audio_frames Output list of audio frames. * \return `absl::OkStatus()` on success. A specific status on failure. */ absl::Status OutputFrames(std::list& audio_frames); private: + // State of an audio frame generator. + enum GeneratorState { + kTakingSamples, + kFinalizedCalled, + kFlushingRemaining, + }; + // Mapping from Audio Element ID to audio frame metadata. absl::flat_hash_map @@ -204,6 +214,7 @@ class AudioFrameGenerator { const DemixingModule& demixing_module_; ParametersManager& parameters_manager_; GlobalTimingModule& global_timing_module_; + GeneratorState state_; // Mutex to protect data accessed in different threads. mutable absl::Mutex mutex_; diff --git a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc index 9f91b6b..f351464 100644 --- a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc +++ b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc @@ -337,26 +337,22 @@ void AddAllSamplesAndFinalizesExpectOk( } // Push in the user data. - for (int frame_count = 0; frame_count < common_num_frames; ++frame_count) { - EXPECT_TRUE((audio_frame_generator.TakingSamples())); + for (int i = 0; i < common_num_frames; ++i) { + // Before `Finalize()` is called, the generator is expected to be taking + // samples. + EXPECT_TRUE(audio_frame_generator.TakingSamples()); for (const auto& [label, frames] : label_to_frames) { - EXPECT_THAT(audio_frame_generator.AddSamples(audio_element_id, label, - frames[frame_count]), - IsOk()); + EXPECT_THAT( + audio_frame_generator.AddSamples(audio_element_id, label, frames[i]), + IsOk()); } } - // Flush out the remaining frames. Several flushes could be required if the - // codec delay is longer than a frame duration. - while (audio_frame_generator.TakingSamples()) { - for (const auto& [label, frames] : label_to_frames) { - EXPECT_THAT(audio_frame_generator.AddSamples(audio_element_id, label, - kEmptyFrame), - IsOk()); - } + // Call `Finalize()` after the last frame. + EXPECT_THAT(audio_frame_generator.Finalize(), IsOk()); - EXPECT_THAT(audio_frame_generator.Finalize(), IsOk()); - } + // Now expect that the generator to NOT be taking samples. + EXPECT_FALSE(audio_frame_generator.TakingSamples()); } // Safe to run simultaneously with `AddAllSamplesAndFinalizesExpectOk`. @@ -389,8 +385,7 @@ void GenerateAudioFrameWithEightSamplesExpectOk( codec_config_obus, audio_elements, demixing_module, global_timing_module, parameters_manager, audio_frame_generator); - // Add only one "real" frame and an empty frame to signal the end of the - // stream. + // Add one "real" frame. const absl::flat_hash_map>> label_to_frames = {{ChannelLabel::kL2, {kFrame0L2EightSamples}}, @@ -517,6 +512,55 @@ TEST(AudioFrameGenerator, OneStereoSubstreamOneFrame) { ValidateAudioFrames(audio_frames, expected_audio_frames); } +TEST(AudioFrameGenerator, AddSamplesAfterFinalizeHasNoEffect) { + iamf_tools_cli_proto::UserMetadata user_metadata = {}; + ConfigureOneStereoSubstreamLittleEndian(user_metadata); + + const absl::flat_hash_map + param_definitions = {}; + absl::flat_hash_map codec_config_obus = {}; + absl::flat_hash_map audio_elements = {}; + DemixingModule demixing_module; + GlobalTimingModule global_timing_module; + // For delayed initialization. + std::optional parameters_manager; + std::optional audio_frame_generator; + InitializeAudioFrameGenerator(user_metadata, param_definitions, + codec_config_obus, audio_elements, + demixing_module, global_timing_module, + parameters_manager, audio_frame_generator); + + // First add one frame of samples and call finalize. + const absl::flat_hash_map>> + label_to_frames = {{ChannelLabel::kL2, {kFrame0L2EightSamples}}, + {ChannelLabel::kR2, {kFrame0R2EightSamples}}}; + ASSERT_FALSE(audio_elements.empty()); + const DecodedUleb128 audio_element_id = audio_elements.begin()->first; + AddAllSamplesAndFinalizesExpectOk(audio_element_id, label_to_frames, + *audio_frame_generator); + + // Expect that one frame is generated. + std::list first_round_audio_frames; + FlushAudioFrameGeneratorExpectOk(*audio_frame_generator, + first_round_audio_frames); + EXPECT_EQ(first_round_audio_frames.size(), 1); + + // Add another frame, but since `Finalize()` has been called in the last + // round, this has no effect. Therefore we expect that no frame is generated. + std::list second_round_audio_frames; + while (audio_frame_generator->TakingSamples()) { + for (const auto& [label, frames] : label_to_frames) { + EXPECT_THAT(audio_frame_generator->AddSamples(audio_element_id, label, + frames.front()), + IsOk()); + } + } + FlushAudioFrameGeneratorExpectOk(*audio_frame_generator, + second_round_audio_frames); + EXPECT_TRUE(second_round_audio_frames.empty()); +} + TEST(AudioFrameGenerator, AllowsOutputToHaveHigherBitDepthThanInput) { iamf_tools_cli_proto::UserMetadata user_metadata = {}; ConfigureOneStereoSubstreamLittleEndian(user_metadata);