Do not resize the sample inside ArrangeSamplesToRender()

- Audio element renderers know the frame size in advance, so they can hold a sample buffer that is large enough for a frame. - In the case of rendering fewer samples than a frame, `num_valid_samples` will track the actual number. PiperOrigin-RevId: 705103898
AOMediaCodec · Dec 12, 2024 · 4972e15 · 4972e15
1 parent 12e8aeb
commit 4972e15
Show file tree

Hide file tree

Showing 24 changed files with 449 additions and 280 deletions.
diff --git a/iamf/cli/renderer/BUILD b/iamf/cli/renderer/BUILD
@@ -16,11 +16,12 @@ cc_library(
         "//iamf/obu:audio_element",
         "//iamf/obu:mix_presentation",
         "//iamf/obu:types",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
@@ -59,14 +60,15 @@ cc_library(
         "//iamf/obu:audio_element",
         "//iamf/obu:mix_presentation",
         "//iamf/obu:types",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/base:no_destructor",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:string_view",
-        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
@@ -84,6 +86,7 @@ cc_library(
         "//iamf/obu:audio_element",
         "//iamf/obu:mix_presentation",
         "//iamf/obu:types",
+        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/base:no_destructor",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/functional:any_invocable",
@@ -92,7 +95,6 @@ cc_library(
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:string_view",
-        "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/types:span",
     ],
 )
@@ -120,6 +122,7 @@ cc_library(
         "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:string_view",
+        "@com_google_absl//absl/types:span",
     ],
 )
 

diff --git a/iamf/cli/renderer/audio_element_renderer_ambisonics_to_channel.cc b/iamf/cli/renderer/audio_element_renderer_ambisonics_to_channel.cc
@@ -22,7 +22,7 @@
 #include "absl/memory/memory.h"
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
-#include "absl/synchronization/mutex.h"
+#include "absl/types/span.h"
 #include "iamf/cli/audio_element_with_data.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/cli/proto/mix_presentation.pb.h"
@@ -135,7 +135,7 @@ AudioElementRendererAmbisonicsToChannel::CreateFromAmbisonicsConfig(
     const AmbisonicsConfig& ambisonics_config,
     const std::vector<DecodedUleb128>& audio_substream_ids,
     const SubstreamIdLabelsMap& substream_id_to_labels,
-    const Layout& playback_layout) {
+    const Layout& playback_layout, size_t num_samples_per_frame) {
   // Exclude unsupported modes first, and deal with only mono or projection
   // in the rest of the code.
   const auto mode = ambisonics_config.ambisonics_mode;
@@ -194,15 +194,14 @@ AudioElementRendererAmbisonicsToChannel::CreateFromAmbisonicsConfig(
   }
 
   return absl::WrapUnique(new AudioElementRendererAmbisonicsToChannel(
-      static_cast<size_t>(num_output_channels), ambisonics_config,
-      channel_labels, *gains));
+      static_cast<size_t>(num_output_channels), num_samples_per_frame,
+      ambisonics_config, channel_labels, *gains));
 }
 
 absl::Status AudioElementRendererAmbisonicsToChannel::RenderSamples(
-    const std::vector<std::vector<InternalSampleType>>& samples_to_render,
+    absl::Span<const std::vector<InternalSampleType>> samples_to_render,
     std::vector<InternalSampleType>& rendered_samples) {
   // Render the samples.
-  absl::MutexLock lock(&mutex_);
   RETURN_IF_NOT_OK(RenderAmbisonicsToLoudspeakers(
       samples_to_render, ambisonics_config_, gains_, rendered_samples));
 

diff --git a/iamf/cli/renderer/audio_element_renderer_ambisonics_to_channel.h b/iamf/cli/renderer/audio_element_renderer_ambisonics_to_channel.h
@@ -15,7 +15,9 @@
 #include <memory>
 #include <vector>
 
+#include "absl/base/thread_annotations.h"
 #include "absl/status/status.h"
+#include "absl/types/span.h"
 #include "iamf/cli/audio_element_with_data.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/cli/renderer/audio_element_renderer_base.h"
@@ -50,14 +52,15 @@ class AudioElementRendererAmbisonicsToChannel
    * \param audio_substream_ids Audio substream IDs.
    * \param substream_id_to_labels Mapping of substream IDs to labels.
    * \param playback_layout Layout of the audio element to be rendered.
+   * \param num_samples_per_frame Number of samples per frame.
    * \return Render to use or `nullptr` on failure.
    */
   static std::unique_ptr<AudioElementRendererAmbisonicsToChannel>
   CreateFromAmbisonicsConfig(
       const AmbisonicsConfig& ambisonics_config,
       const std::vector<DecodedUleb128>& audio_substream_ids,
       const SubstreamIdLabelsMap& substream_id_to_labels,
-      const Layout& playback_layout);
+      const Layout& playback_layout, size_t num_samples_per_frame);
 
   /*!\brief Destructor. */
   ~AudioElementRendererAmbisonicsToChannel() override = default;
@@ -73,10 +76,12 @@ class AudioElementRendererAmbisonicsToChannel
    * \param gains Gains matrix.
    */
   AudioElementRendererAmbisonicsToChannel(
-      size_t num_output_channels, const AmbisonicsConfig& ambisonics_config,
+      size_t num_output_channels, size_t num_samples_per_frame,
+      const AmbisonicsConfig& ambisonics_config,
       const std::vector<ChannelLabel::Label>& ordered_labels,
       const std::vector<std::vector<double>>& gains)
-      : AudioElementRendererBase(ordered_labels, num_output_channels),
+      : AudioElementRendererBase(ordered_labels, num_samples_per_frame,
+                                 num_output_channels),
         ambisonics_config_(ambisonics_config),
         gains_(gains) {}
 
@@ -87,8 +92,9 @@ class AudioElementRendererAmbisonicsToChannel
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   absl::Status RenderSamples(
-      const std::vector<std::vector<InternalSampleType>>& samples_to_render,
-      std::vector<InternalSampleType>& rendered_samples) override;
+      absl::Span<const std::vector<InternalSampleType>> samples_to_render,
+      std::vector<InternalSampleType>& rendered_samples)
+      ABSL_SHARED_LOCKS_REQUIRED(mutex_) override;
 
   const AmbisonicsConfig ambisonics_config_;
 

diff --git a/iamf/cli/renderer/audio_element_renderer_base.cc b/iamf/cli/renderer/audio_element_renderer_base.cc
@@ -11,11 +11,13 @@
  */
 #include "iamf/cli/renderer/audio_element_renderer_base.h"
 
+#include <cstddef>
 #include <vector>
 
 #include "absl/log/check.h"
 #include "absl/status/status.h"
 #include "absl/synchronization/mutex.h"
+#include "absl/types/span.h"
 #include "iamf/cli/demixing_module.h"
 #include "iamf/cli/renderer/renderer_utils.h"
 #include "iamf/common/macros.h"
@@ -25,35 +27,37 @@ namespace iamf_tools {
 
 AudioElementRendererBase::~AudioElementRendererBase() {}
 
-absl::Status AudioElementRendererBase::Flush(
-    std::vector<InternalSampleType>& rendered_samples) {
+absl::StatusOr<size_t> AudioElementRendererBase::RenderLabeledFrame(
+    const LabeledFrame& labeled_frame) {
   absl::MutexLock lock(&mutex_);
-  rendered_samples.insert(rendered_samples.end(), rendered_samples_.begin(),
-                          rendered_samples_.end());
-  rendered_samples_.clear();
-  return absl::OkStatus();
-}
 
-absl::StatusOr<int> AudioElementRendererBase::RenderLabeledFrame(
-    const LabeledFrame& labeled_frame) {
-  std::vector<std::vector<InternalSampleType>> samples_to_render;
+  size_t num_valid_samples = 0;
   RETURN_IF_NOT_OK(iamf_tools::renderer_utils::ArrangeSamplesToRender(
-      labeled_frame, ordered_labels_, samples_to_render));
+      labeled_frame, ordered_labels_, samples_to_render_, num_valid_samples));
 
   // Render samples in concrete subclasses.
-  mutex_.Lock();
   current_labeled_frame_ = &labeled_frame;
-  mutex_.Unlock();
+
   std::vector<InternalSampleType> rendered_samples(
-      num_output_channels_ * samples_to_render.size(), 0);
-  RETURN_IF_NOT_OK(RenderSamples(samples_to_render, rendered_samples));
+      num_output_channels_ * num_valid_samples, 0);
+  RETURN_IF_NOT_OK(RenderSamples(
+      absl::MakeConstSpan(samples_to_render_).first(num_valid_samples),
+      rendered_samples));
 
   // Copy rendered samples to the output.
-  absl::MutexLock lock(&mutex_);
   rendered_samples_.insert(rendered_samples_.end(), rendered_samples.begin(),
                            rendered_samples.end());
 
-  return samples_to_render.size();
+  return num_valid_samples;
+}
+
+absl::Status AudioElementRendererBase::Flush(
+    std::vector<InternalSampleType>& rendered_samples) {
+  absl::MutexLock lock(&mutex_);
+  rendered_samples.insert(rendered_samples.end(), rendered_samples_.begin(),
+                          rendered_samples_.end());
+  rendered_samples_.clear();
+  return absl::OkStatus();
 }
 
 }  // namespace iamf_tools
diff --git a/iamf/cli/renderer/audio_element_renderer_base.h b/iamf/cli/renderer/audio_element_renderer_base.h
@@ -49,10 +49,10 @@ class AudioElementRendererBase {
   /*!\brief Renders samples stored in labeled frames.
    *
    * \param labeled_frame Labeled frame to render.
-   * \return Number of ticks which will be rendered. A specific status on
+   * \return Number of ticks that will be rendered. A specific status on
    *         failure.
    */
-  absl::StatusOr<int> RenderLabeledFrame(const LabeledFrame& labeled_frame);
+  absl::StatusOr<size_t> RenderLabeledFrame(const LabeledFrame& labeled_frame);
 
   /*!\brief Flushes finished audio frames.
    *
@@ -87,12 +87,18 @@ class AudioElementRendererBase {
   /*!\brief Constructor.
    *
    * \param ordered_labels Ordered list of channel labels to render.
+   * \param num_samples_per_frame Number of samples per frame.
    * \param num_output_channels Number of output channels.
    */
   AudioElementRendererBase(absl::Span<const ChannelLabel::Label> ordered_labels,
+                           const size_t num_samples_per_frame,
                            const size_t num_output_channels)
       : ordered_labels_(ordered_labels.begin(), ordered_labels.end()),
-        num_output_channels_(num_output_channels) {}
+        num_samples_per_frame_(num_samples_per_frame),
+        num_output_channels_(num_output_channels),
+        samples_to_render_(
+            num_samples_per_frame_,
+            std::vector<InternalSampleType>(ordered_labels_.size(), 0)) {}
 
   /*!\brief Renders samples.
    *
@@ -101,14 +107,18 @@ class AudioElementRendererBase {
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   virtual absl::Status RenderSamples(
-      const std::vector<std::vector<InternalSampleType>>& samples_to_render,
-      std::vector<InternalSampleType>& rendered_samples) = 0;
+      absl::Span<const std::vector<InternalSampleType>> samples_to_render,
+      std::vector<InternalSampleType>& rendered_samples)
+      ABSL_SHARED_LOCKS_REQUIRED(mutex_) = 0;
 
   const std::vector<ChannelLabel::Label> ordered_labels_;
+  const size_t num_samples_per_frame_ = 0;
   const size_t num_output_channels_;
 
   // Mutex to guard simultaneous access to data members.
   mutable absl::Mutex mutex_;
+  std::vector<std::vector<InternalSampleType>> samples_to_render_
+      ABSL_GUARDED_BY(mutex_);
   std::vector<InternalSampleType> rendered_samples_ ABSL_GUARDED_BY(mutex_);
   bool is_finalized_ ABSL_GUARDED_BY(mutex_) = false;
   const LabeledFrame* current_labeled_frame_ ABSL_GUARDED_BY(mutex_) = nullptr;

diff --git a/iamf/cli/renderer/audio_element_renderer_channel_to_channel.cc b/iamf/cli/renderer/audio_element_renderer_channel_to_channel.cc
@@ -26,7 +26,7 @@
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
-#include "absl/synchronization/mutex.h"
+#include "absl/types/span.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/cli/proto/mix_presentation.pb.h"
 #include "iamf/cli/proto/test_vector_metadata.pb.h"
@@ -117,7 +117,7 @@ absl::StatusOr<absl::string_view> LookupInputKeyFromLoudspeakerLayout(
 std::unique_ptr<AudioElementRendererChannelToChannel>
 AudioElementRendererChannelToChannel::CreateFromScalableChannelLayoutConfig(
     const ScalableChannelLayoutConfig& scalable_channel_layout_config,
-    const Layout& playback_layout) {
+    const Layout& playback_layout, size_t num_samples_per_frame) {
   if (scalable_channel_layout_config.channel_audio_layer_configs.empty()) {
     LOG(ERROR) << "No channel audio layer configs provided.";
     return nullptr;
@@ -162,14 +162,13 @@ AudioElementRendererChannelToChannel::CreateFromScalableChannelLayoutConfig(
 
   return absl::WrapUnique(new AudioElementRendererChannelToChannel(
       *input_key, *output_key, static_cast<size_t>(num_output_channels),
-      *ordered_labels, *gains));
+      num_samples_per_frame, *ordered_labels, *gains));
 }
 
 absl::Status AudioElementRendererChannelToChannel::RenderSamples(
-    const std::vector<std::vector<InternalSampleType>>& samples_to_render,
+    absl::Span<const std::vector<InternalSampleType>> samples_to_render,
     std::vector<InternalSampleType>& rendered_samples) {
   // Render the samples.
-  absl::MutexLock lock(&mutex_);
   RETURN_IF_NOT_OK(RenderChannelLayoutToLoudspeakers(
       samples_to_render, current_labeled_frame_->demixing_params,
       ordered_labels_, input_key_, output_key_, gains_, rendered_samples));

diff --git a/iamf/cli/renderer/audio_element_renderer_channel_to_channel.h b/iamf/cli/renderer/audio_element_renderer_channel_to_channel.h
@@ -16,8 +16,10 @@
 #include <string>
 #include <vector>
 
+#include "absl/base/thread_annotations.h"
 #include "absl/status/status.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/span.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/cli/renderer/audio_element_renderer_base.h"
 #include "iamf/obu/audio_element.h"
@@ -49,12 +51,13 @@ class AudioElementRendererChannelToChannel : public AudioElementRendererBase {
    * \param scalable_channel_layout_config Config for the scalable channel
    *        layout.
    * \param playback_layout Layout of the audio element to be rendered.
+   * \param num_samples_per_frame Number of samples per frame.
    * \return Render to use or `nullptr` on failure.
    */
   static std::unique_ptr<AudioElementRendererChannelToChannel>
   CreateFromScalableChannelLayoutConfig(
       const ScalableChannelLayoutConfig& scalable_channel_layout_config,
-      const Layout& playback_layout);
+      const Layout& playback_layout, size_t num_samples_per_frame);
 
   /*!\brief Destructor. */
   ~AudioElementRendererChannelToChannel() override = default;
@@ -67,14 +70,16 @@ class AudioElementRendererChannelToChannel : public AudioElementRendererBase {
    * \param input_key Key representing the input loudspeaker layout.
    * \param output_key Key representing the output loudspeaker layout.
    * \param num_output_channels Number of output channels.
+   * \param num_samples_per_frame Number of samples per frame.
    * \param ordered_labels Ordered list of channel labels to render.
    */
   AudioElementRendererChannelToChannel(
       absl::string_view input_key, absl::string_view output_key,
-      size_t num_output_channels,
+      size_t num_output_channels, size_t num_samples_per_frame,
       const std::vector<ChannelLabel::Label>& ordered_labels,
       const std::vector<std::vector<double>>& gains)
-      : AudioElementRendererBase(ordered_labels, num_output_channels),
+      : AudioElementRendererBase(ordered_labels, num_samples_per_frame,
+                                 num_output_channels),
         input_key_(input_key),
         output_key_(output_key),
         gains_(gains) {}
@@ -86,8 +91,9 @@ class AudioElementRendererChannelToChannel : public AudioElementRendererBase {
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   absl::Status RenderSamples(
-      const std::vector<std::vector<InternalSampleType>>& samples_to_render,
-      std::vector<InternalSampleType>& rendered_samples) override;
+      absl::Span<const std::vector<InternalSampleType>> samples_to_render,
+      std::vector<InternalSampleType>& rendered_samples)
+      ABSL_SHARED_LOCKS_REQUIRED(mutex_) override;
 
   const std::string input_key_;
   const std::string output_key_;