Skip to content

Commit

Permalink
Changes the use pattern of AudioFrameGenerator
Browse files Browse the repository at this point in the history
- No need to call `AddSamples()` with empty frames to signal the end of
  substreams -- calling `Finalize()` does it.
- No need to call `Finalize()` multiple times to flush remaining frames in
  the encoders -- remaining frames are automatically handled by
  `OutputFrames()`.
- Calling `AddSamples()` after `Finalize()` or calling `Finalize()` multiple
  times are OK-- they will just become NOOPs.

PiperOrigin-RevId: 691295143
  • Loading branch information
yero authored and jwcullen committed Nov 1, 2024
1 parent e021bfe commit 67c6cf2
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 74 deletions.
8 changes: 8 additions & 0 deletions iamf/cli/iamf_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,13 @@ absl::Status IamfEncoder::GetInputTimestamp(int32_t& input_timestamp) {
void IamfEncoder::AddSamples(const DecodedUleb128 audio_element_id,
ChannelLabel::Label label,
const std::vector<InternalSampleType>& samples) {
if (add_samples_finalized_) {
LOG_FIRST_N(WARNING, 3)
<< "Calling `AddSamples()` after `FinalizeAddSamples()` has no effect; "
<< samples.size() << " input samples discarded.";
return;
}

id_to_labeled_samples_[audio_element_id][label] = samples;
}

Expand Down Expand Up @@ -213,6 +220,7 @@ absl::Status IamfEncoder::OutputTemporalUnit(
audio_frame_generator_->AddSamples(audio_element_id, label, samples));
}
}

if (add_samples_finalized_) {
RETURN_IF_NOT_OK(audio_frame_generator_->Finalize());
}
Expand Down
4 changes: 4 additions & 0 deletions iamf/cli/iamf_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ class IamfEncoder {
absl::Status GetInputTimestamp(int32_t& input_timestamp);

/*!\brief Adds audio samples belonging to the same temporal unit.
*
* The best practice is to not call this function after
* `FinalizeAddSamples()`. But it is OK if you do -- just that the added
* samples will be ignored and not encoded.
*
* \param audio_element_id ID of the audio element to add samples to.
* \param label Channel label to add samples to.
Expand Down
144 changes: 107 additions & 37 deletions iamf/cli/proto_to_obu/audio_frame_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "iamf/obu/codec_config.h"
#include "iamf/obu/demixing_info_parameter_data.h"
#include "iamf/obu/types.h"
#include "src/google/protobuf/repeated_ptr_field.h"

namespace iamf_tools {

Expand Down Expand Up @@ -233,23 +234,22 @@ absl::Status InitializeSubstreamData(
// An audio element may contain many channels, denoted by their labels;
// this function returns whether all labels have their (same amount of)
// samples ready.
bool SamplesReadyForAudioElement(
const LabelSamplesMap& label_to_samples,
const absl::flat_hash_set<ChannelLabel::Label>& channel_labels) {
size_t common_num_samples = 0;
for (const auto& label : channel_labels) {
bool SamplesReadyForAudioElement(const LabelSamplesMap& label_to_samples,
const absl::flat_hash_set<ChannelLabel::Label>&
channel_labels_for_audio_element) {
std::optional<size_t> common_num_samples;
for (const auto& label : channel_labels_for_audio_element) {
const auto label_to_samples_iter = label_to_samples.find(label);
if (label_to_samples_iter == label_to_samples.end()) {
return false;
}

const auto num_samples = label_to_samples_iter->second.size();
if (common_num_samples == 0 && num_samples != 0) {
if (!common_num_samples.has_value()) {
common_num_samples = num_samples;
continue;
}

if (num_samples != common_num_samples) {
if (num_samples != *common_num_samples) {
return false;
}
}
Expand Down Expand Up @@ -369,10 +369,14 @@ std::pair<uint32_t, uint32_t> GetNumSamplesToTrimForFrame(
frame_samples_to_trim_at_end);
}

absl::Status EncodeFramesForAudioElement(
// Encode frames for an audio element if samples are ready.
absl::Status MaybeEncodeFramesForAudioElement(
const DecodedUleb128 audio_element_id,
const AudioElementWithData& audio_element_with_data,
const DemixingModule& demixing_module, LabelSamplesMap& label_to_samples,
const DemixingModule& demixing_module,
const absl::flat_hash_set<ChannelLabel::Label>&
channel_labels_for_audio_element,
LabelSamplesMap& label_to_samples,
absl::flat_hash_map<uint32_t, AudioFrameGenerator::TrimmingState>&
substream_id_to_trimming_state,
ParametersManager& parameters_manager,
Expand All @@ -381,6 +385,13 @@ absl::Status EncodeFramesForAudioElement(
absl::flat_hash_map<uint32_t, SubstreamData>&
substream_id_to_substream_data,
GlobalTimingModule& global_timing_module) {
if (!SamplesReadyForAudioElement(label_to_samples,
channel_labels_for_audio_element)) {
// Waiting for more samples belonging to the same audio element; return
// for now.
return absl::OkStatus();
}

const CodecConfigObu& codec_config = *audio_element_with_data.codec_config;

// Get some common information about this stream.
Expand Down Expand Up @@ -626,6 +637,34 @@ absl::Status ValidateAndApplyUserTrimming(

} // namespace

AudioFrameGenerator::AudioFrameGenerator(
const ::google::protobuf::RepeatedPtrField<
iamf_tools_cli_proto::AudioFrameObuMetadata>& audio_frame_metadata,
const ::google::protobuf::RepeatedPtrField<
iamf_tools_cli_proto::CodecConfigObuMetadata>& codec_config_metadata,
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements,
const DemixingModule& demixing_module,
ParametersManager& parameters_manager,
GlobalTimingModule& global_timing_module)
: audio_elements_(audio_elements),
demixing_module_(demixing_module),
parameters_manager_(parameters_manager),
global_timing_module_(global_timing_module),
// Set to a state NOT taking samples at first; may be changed to
// `kTakingSamples` once `Initialize()` is called.
state_(kFlushingRemaining) {
for (const auto& audio_frame_obu_metadata : audio_frame_metadata) {
audio_frame_metadata_[audio_frame_obu_metadata.audio_element_id()] =
audio_frame_obu_metadata;
}

for (const auto& codec_config_obu_metadata : codec_config_metadata) {
codec_config_metadata_[codec_config_obu_metadata.codec_config_id()] =
codec_config_obu_metadata.codec_config();
}
}

absl::StatusOr<uint32_t> AudioFrameGenerator::GetNumberOfSamplesToDelayAtStart(
const iamf_tools_cli_proto::CodecConfig& codec_config_metadata,
const CodecConfigObu& codec_config) {
Expand Down Expand Up @@ -728,19 +767,32 @@ absl::Status AudioFrameGenerator::Initialize() {
}
}

// If `substream_id_to_substream_data_` is not empty, meaning this generator
// is expecting audio substreams and is ready to take audio samples.
if (!substream_id_to_substream_data_.empty()) {
state_ = kTakingSamples;
}

return absl::OkStatus();
}

bool AudioFrameGenerator::TakingSamples() const {
return !substream_id_to_substream_data_.empty();
return (state_ == kTakingSamples);
}

absl::Status AudioFrameGenerator::AddSamples(
const DecodedUleb128 audio_element_id, ChannelLabel::Label label,
absl::Span<const InternalSampleType> samples) {
const auto& audio_element_labels =
absl::MutexLock lock(&mutex_);
if (state_ != kTakingSamples) {
LOG_FIRST_N(WARNING, 3)
<< "Calling `AddSamples()` after `Finalize()` has no effect.";
return absl::OkStatus();
}

const auto& audio_element_labels_iter =
audio_element_id_to_labels_.find(audio_element_id);
if (audio_element_labels == audio_element_id_to_labels_.end()) {
if (audio_element_labels_iter == audio_element_id_to_labels_.end()) {
return absl::InvalidArgumentError(
absl::StrCat("No audio frame metadata found for Audio Element ID= ",
audio_element_id));
Expand All @@ -757,36 +809,20 @@ absl::Status AudioFrameGenerator::AddSamples(
}
const auto& audio_element_with_data = audio_element_iter->second;

if (SamplesReadyForAudioElement(labeled_samples,
audio_element_labels->second)) {
absl::MutexLock lock(&mutex_);
RETURN_IF_NOT_OK(EncodeFramesForAudioElement(
audio_element_id, audio_element_with_data, demixing_module_,
labeled_samples, substream_id_to_trimming_state_, parameters_manager_,
substream_id_to_encoder_, substream_id_to_substream_data_,
global_timing_module_));

labeled_samples.clear();
}
RETURN_IF_NOT_OK(MaybeEncodeFramesForAudioElement(
audio_element_id, audio_element_with_data, demixing_module_,
audio_element_labels_iter->second, labeled_samples,
substream_id_to_trimming_state_, parameters_manager_,
substream_id_to_encoder_, substream_id_to_substream_data_,
global_timing_module_));

return absl::OkStatus();
}

absl::Status AudioFrameGenerator::Finalize() {
absl::MutexLock lock(&mutex_);
for (auto& [substream_id, encoder] : substream_id_to_encoder_) {
auto substream_data_iter =
substream_id_to_substream_data_.find(substream_id);
if (substream_data_iter == substream_id_to_substream_data_.end()) {
continue;
}

// Remove the substream data when there is no more sample to come, and the
// encoder can be finalized.
if (substream_data_iter->second.samples_obu.empty()) {
RETURN_IF_NOT_OK(encoder->Finalize());
substream_id_to_substream_data_.erase(substream_data_iter);
}
if (state_ == kTakingSamples) {
state_ = kFinalizedCalled;
}

return absl::OkStatus();
Expand All @@ -801,9 +837,43 @@ absl::Status AudioFrameGenerator::OutputFrames(
std::list<AudioFrameWithData>& audio_frames) {
absl::MutexLock lock(&mutex_);

if (state_ == kFlushingRemaining) {
// In this state, there might be some remaining samples queued in the
// encoders waiting to be encoded; continue to encode them one frame at a
// time.
for (const auto& [audio_element_id, audio_element_with_data] :
audio_elements_) {
RETURN_IF_NOT_OK(MaybeEncodeFramesForAudioElement(
audio_element_id, audio_element_with_data, demixing_module_,
audio_element_id_to_labels_.at(audio_element_id),
id_to_labeled_samples_[audio_element_id],
substream_id_to_trimming_state_, parameters_manager_,
substream_id_to_encoder_, substream_id_to_substream_data_,
global_timing_module_));
}
} else if (state_ == kFinalizedCalled) {
// The `Finalize()` has just been called, advance the state so that the
// remaining samples will be encoded in the next iteration.
state_ = kFlushingRemaining;
}

// Pop encoded audio frames from encoders.
for (auto substream_id_to_encoder_iter = substream_id_to_encoder_.begin();
substream_id_to_encoder_iter != substream_id_to_encoder_.end();) {
auto& [substream_id, encoder] = *substream_id_to_encoder_iter;

// Remove the substream data when the generator is in the
// `kFlushingRemaining` state and the encoder can be finalized.
if (state_ == kFlushingRemaining) {
auto substream_data_iter =
substream_id_to_substream_data_.find(substream_id);
if (substream_data_iter != substream_id_to_substream_data_.end() &&
substream_data_iter->second.samples_obu.empty()) {
RETURN_IF_NOT_OK(encoder->Finalize());
substream_id_to_substream_data_.erase(substream_data_iter);
}
}

if (encoder->FramesAvailable()) {
RETURN_IF_NOT_OK(encoder->Pop(audio_frames));
RETURN_IF_NOT_OK(ValidateAndApplyUserTrimming(
Expand Down
51 changes: 31 additions & 20 deletions iamf/cli/proto_to_obu/audio_frame_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,34 @@ namespace iamf_tools {
* The generation of audio frames can be done asynchronously, where
* samples are added on one thread and completed frames are consumed on another.
*
* Under the hood, the generator can be in three states:
* 1. `kTakingSamples`: The generator is expecting audio substreams and taking
* samples.
* 2. `kFinalizeCalled`: `Finalize()` has been called; no more "real samples"
* are coming, and the generator will soon (starting in
* the next iteration) be flusing the remaining samples.
* 3. `kFlushingRemaining`: The generator is flushing the remaining samples
* that are still in the underlying encoders.
*
* The use pattern of this class is:
*
* - Initialize (`Initialize()`).
* - (This puts the generator in the `kTakingSamples` state.)
*
* Thread 1:
* - Repeat until no new sample to add (by checking `TakingSamples()`):
* - Add samples for each audio element (`AddSamples()`).
* - Finalize the sample-adding process (`Finalize()`).
* - (This puts the generator in the `kFinalizeCalled` state.)
*
* Thread 2:
* - Repeat until no frame to generate (by checking `GeneratingFrames()`):
* - Output generated frames (`OutputFrames()`).
* - If the generator is in the `kFlushingRemaining` state, the frames
* might come from remaining samples in the underlying encoders.
* - If the output is empty, wait.
* - Otherwise, add the output of this round to the final result.
*
*/
class AudioFrameGenerator {
public:
Expand Down Expand Up @@ -88,21 +102,7 @@ class AudioFrameGenerator {
audio_elements,
const DemixingModule& demixing_module,
ParametersManager& parameters_manager,
GlobalTimingModule& global_timing_module)
: audio_elements_(audio_elements),
demixing_module_(demixing_module),
parameters_manager_(parameters_manager),
global_timing_module_(global_timing_module) {
for (const auto& audio_frame_obu_metadata : audio_frame_metadata) {
audio_frame_metadata_[audio_frame_obu_metadata.audio_element_id()] =
audio_frame_obu_metadata;
}

for (const auto& codec_config_obu_metadata : codec_config_metadata) {
codec_config_metadata_[codec_config_obu_metadata.codec_config_id()] =
codec_config_obu_metadata.codec_config();
}
}
GlobalTimingModule& global_timing_module);

/*!\brief Returns the number of samples to delay based on the codec config.
*
Expand All @@ -129,9 +129,7 @@ class AudioFrameGenerator {

/*!\brief Adds samples for an Audio Element and a channel label.
*
* Calling this function with empty input `samples` will signal the
* underlying encoder that the a substream has ended. Eventually when all
* substreams are ended, `TakingSamples()` will return false.
* No effect if the generator is not in the `kTakingSamples` state.
*
* \param audio_element_id Audio Element ID that the added samples belong to.
* \param label Channel label of the added samples.
Expand All @@ -144,8 +142,8 @@ class AudioFrameGenerator {

/*!\brief Finalizes the sample-adding process.
*
* This will signal all underlying encoders that there are no more samples
* to come.
* This puts the generator in the `kFinalizedCalled` state if it is in the
* `kTakingSamples` state. No effect if the generator is in other states.
*
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
Expand All @@ -163,12 +161,24 @@ class AudioFrameGenerator {
* The output frames all belong to the same temporal unit, sharing the same
* start and end timestamps.
*
* After `Finalize()` is called, all underlying encoders will be signalled
* to encode the remaining samples. Eventually when all substreams are
* are ended, encoders will be deleted and `GeneratingFrames()` will return
* false.
*
* \param audio_frames Output list of audio frames.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
absl::Status OutputFrames(std::list<AudioFrameWithData>& audio_frames);

private:
// State of an audio frame generator.
enum GeneratorState {
kTakingSamples,
kFinalizedCalled,
kFlushingRemaining,
};

// Mapping from Audio Element ID to audio frame metadata.
absl::flat_hash_map<DecodedUleb128,
iamf_tools_cli_proto::AudioFrameObuMetadata>
Expand Down Expand Up @@ -204,6 +214,7 @@ class AudioFrameGenerator {
const DemixingModule& demixing_module_;
ParametersManager& parameters_manager_;
GlobalTimingModule& global_timing_module_;
GeneratorState state_;

// Mutex to protect data accessed in different threads.
mutable absl::Mutex mutex_;
Expand Down
Loading

0 comments on commit 67c6cf2

Please sign in to comment.