From bf4742620b37f91dcea2923dc139750f71048a2e Mon Sep 17 00:00:00 2001
From: CrystalP <crystalp@kodi.tv>
Date: Mon, 19 Aug 2024 10:42:35 -0400
Subject: [PATCH 1/2] [xaudio2] wait for queued audio data to play when
 draining the sink

- matches expectations of AE that the buffered data is played and not flushed
- fixes incorrect GetDelay values after Drain() due to difference between MS doc and actual behavior
of voice reset.
- fixes incorrect m_framesInBuffers value when restarting playback of a drained sink.
Confirmed by MS insider see https://stackoverflow.com/questions/65754955/how-to-reset-a-ixaudio2sourcevoices-samplesplayed-counter-after-flushing-sour
---
 xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp | 105 ++++++++++++++----
 xbmc/cores/AudioEngine/Sinks/AESinkXAudio.h   |  34 +++++-
 xbmc/cores/AudioEngine/Utils/AEUtil.cpp       |  22 ++++
 xbmc/cores/AudioEngine/Utils/AEUtil.h         |   4 +
 4 files changed, 140 insertions(+), 25 deletions(-)
diff --git a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
index 473495c16876d..3e399442928e8 100644
--- a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
+++ b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
@@ -239,18 +239,8 @@ unsigned int CAESinkXAudio::AddPackets(uint8_t **data, unsigned int frames, unsi
   LARGE_INTEGER timerStop;
   LARGE_INTEGER timerFreq;
 #endif
-  size_t dataLenght = frames * m_format.m_frameSize;
 
-  struct buffer_ctx *ctx = new buffer_ctx;
-  ctx->data = new uint8_t[dataLenght];
-  ctx->frames = frames;
-  ctx->sink = this;
-  memcpy(ctx->data, data[0] + offset * m_format.m_frameSize, dataLenght);
-
-  XAUDIO2_BUFFER xbuffer = {};
-  xbuffer.AudioBytes = dataLenght;
-  xbuffer.pAudioData = ctx->data;
-  xbuffer.pContext = ctx;
+  XAUDIO2_BUFFER xbuffer = BuildXAudio2Buffer(data, frames, offset);
 
   if (!m_running) //first time called, pre-fill buffer then start voice
   {
@@ -259,7 +249,7 @@ unsigned int CAESinkXAudio::AddPackets(uint8_t **data, unsigned int frames, unsi
     if (FAILED(hr))
     {
       CLog::LogF(LOGERROR, "voice submit buffer failed due to {}", WASAPIErrToStr(hr));
-      delete ctx;
+      delete xbuffer.pContext;
       return 0;
     }
     hr = m_sourceVoice->Start(0, XAUDIO2_COMMIT_NOW);
@@ -267,7 +257,7 @@ unsigned int CAESinkXAudio::AddPackets(uint8_t **data, unsigned int frames, unsi
     {
       CLog::LogF(LOGERROR, "voice start failed due to {}", WASAPIErrToStr(hr));
       m_isDirty = true; //flag new device or re-init needed
-      delete ctx;
+      delete xbuffer.pContext;
       return INT_MAX;
     }
     m_sinkFrames += frames;
@@ -292,7 +282,7 @@ unsigned int CAESinkXAudio::AddPackets(uint8_t **data, unsigned int frames, unsi
     if (eventAudioCallback != WAIT_OBJECT_0)
     {
       CLog::LogF(LOGERROR, "voice buffer timed out");
-      delete ctx;
+      delete xbuffer.pContext;
       return INT_MAX;
     }
   }
@@ -319,7 +309,7 @@ unsigned int CAESinkXAudio::AddPackets(uint8_t **data, unsigned int frames, unsi
     #ifdef _DEBUG
     CLog::LogF(LOGERROR, "submiting buffer failed due to {}", WASAPIErrToStr(hr));
 #endif
-    delete ctx;
+    delete xbuffer.pContext;
     return INT_MAX;
   }
 
@@ -843,23 +833,49 @@ void CAESinkXAudio::Drain()
   if(!m_sourceVoice)
     return;
 
-  AEDelayStatus status;
-  GetDelay(status);
-
-  KODI::TIME::Sleep(std::chrono::milliseconds(static_cast<int>(status.GetDelay() * 500)));
-
   if (m_running)
   {
     try
     {
+      // Contrary to MS doc, the voice must play a buffer with end of stream flag for the voice
+      // SamplesPlayed counter to be reset.
+      // Per MS doc, Discontinuity() may not take effect until after the entire buffer queue is
+      // consumed, which wouldn't invoke the callback or reset the voice stats.
+      // Solution: submit a 1 sample buffer with end of stream flag and wait for StreamEnd callback
+      // The StreamEnd event is manual reset so that it cannot be missed even if raised before this
+      // code starts waiting for it
+
+      AddEndOfStreamPacket();
+
+      constexpr uint32_t waitSafety = 100; // extra ms wait in case of scheduler issue
+      DWORD waitRc =
+          WaitForSingleObject(m_voiceCallback.m_StreamEndEvent,
+                              m_framesInBuffers * 1000 / m_format.m_sampleRate + waitSafety);
+
+      if (waitRc != WAIT_OBJECT_0)
+      {
+        if (waitRc == WAIT_FAILED)
+        {
+          //! @todo use FormatMessage for a human readable error message
+          CLog::LogF(LOGERROR,
+                     "error WAIT_FAILED while waiting for queued buffers to drain. GetLastError:{}",
+                     GetLastError());
+        }
+        else
+        {
+          CLog::LogF(LOGERROR, "error {} while waiting for queued buffers to drain.", waitRc);
+        }
+      }
+
       m_sourceVoice->Stop();
-      m_sourceVoice->FlushSourceBuffers();
+      ResetEvent(m_voiceCallback.m_StreamEndEvent);
+
       m_sinkFrames = 0;
       m_framesInBuffers = 0;
     }
     catch (...)
     {
-      CLog::Log(LOGDEBUG, "{}: Invalidated source voice - Releasing", __FUNCTION__);
+      CLog::Log(LOGERROR, "{}: Invalidated source voice - Releasing", __FUNCTION__);
     }
   }
   m_running = false;
@@ -887,3 +903,48 @@ bool CAESinkXAudio::IsUSBDevice()
 #endif
   return false;
 }
+
+bool CAESinkXAudio::AddEndOfStreamPacket()
+{
+  constexpr unsigned int frames{1};
+
+  XAUDIO2_BUFFER xbuffer = BuildXAudio2Buffer(nullptr, frames, 0);
+  xbuffer.Flags = XAUDIO2_END_OF_STREAM;
+
+  HRESULT hr = m_sourceVoice->SubmitSourceBuffer(&xbuffer);
+
+  if (hr != S_OK)
+  {
+    CLog::LogF(LOGERROR, "SubmitSourceBuffer failed due to {:X}", hr);
+    delete xbuffer.pContext;
+    return false;
+  }
+
+  m_sinkFrames += frames;
+  m_framesInBuffers += frames;
+  return true;
+}
+
+XAUDIO2_BUFFER CAESinkXAudio::BuildXAudio2Buffer(uint8_t** data,
+                                                 unsigned int frames,
+                                                 unsigned int offset)
+{
+  const unsigned int dataLength{frames * m_format.m_frameSize};
+
+  struct buffer_ctx* ctx = new buffer_ctx;
+  ctx->data = new uint8_t[dataLength];
+  ctx->frames = frames;
+  ctx->sink = this;
+
+  if (data)
+    memcpy(ctx->data, data[0] + offset * m_format.m_frameSize, dataLength);
+  else
+    CAEUtil::GenerateSilence(m_format.m_dataFormat, m_format.m_frameSize, ctx->data, frames);
+
+  XAUDIO2_BUFFER xbuffer{};
+  xbuffer.AudioBytes = dataLength;
+  xbuffer.pAudioData = ctx->data;
+  xbuffer.pContext = ctx;
+
+  return xbuffer;
+}
diff --git a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.h b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.h
index 6af787273272e..86158ca925b29 100644
--- a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.h
+++ b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.h
@@ -66,14 +66,27 @@ class CAESinkXAudio : public IAESink
         mBufferEnd.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE));
         if (!mBufferEnd)
         {
-          throw std::exception("CreateEvent");
+          throw std::exception("CreateEventEx BufferEnd");
         }
+        if (NULL == (m_StreamEndEvent = CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET,
+                                                      EVENT_MODIFY_STATE | SYNCHRONIZE)))
+        {
+          throw std::exception("CreateEventEx StreamEnd");
+        }
+      }
+      virtual ~VoiceCallback()
+      {
+        if (m_StreamEndEvent != NULL)
+          CloseHandle(m_StreamEndEvent);
       }
-      virtual ~VoiceCallback() { }
 
       STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) override {}
       STDMETHOD_(void, OnVoiceProcessingPassEnd)() override {}
-      STDMETHOD_(void, OnStreamEnd)() override {}
+      STDMETHOD_(void, OnStreamEnd)() override
+      {
+        if (m_StreamEndEvent != NULL)
+          SetEvent(m_StreamEndEvent);
+      }
       STDMETHOD_(void, OnBufferStart)(void*) override {}
       STDMETHOD_(void, OnBufferEnd)(void* context) override
       {
@@ -95,10 +108,25 @@ class CAESinkXAudio : public IAESink
         }
       };
       std::unique_ptr<void, handle_closer> mBufferEnd;
+      HANDLE m_StreamEndEvent{0};
     };
 
     bool InitializeInternal(std::string deviceId, AEAudioFormat &format);
     bool IsUSBDevice();
+    /*!
+     * \brief Add a 1 frame long buffer with the end of stream flag to the voice.
+     * \return true for success, false for failure
+     */
+    bool AddEndOfStreamPacket();
+    /*!
+     * \brief Create a XAUDIO2_BUFFER with a struct buffer_ctx in pContext member, which must
+     * be deleted either manually or by XAudio2 BufferEnd callbak to avoid memory leaks.
+     * \param data data of the frames to copy. if null, the new buffer will contain silence.
+     * \param frames number of frames
+     * \param offset offset from the start in the data buffer.
+     * \return the new buffer
+     */
+    XAUDIO2_BUFFER BuildXAudio2Buffer(uint8_t** data, unsigned int frames, unsigned int offset);
 
     Microsoft::WRL::ComPtr<IXAudio2> m_xAudio2;
     IXAudio2MasteringVoice* m_masterVoice;
diff --git a/xbmc/cores/AudioEngine/Utils/AEUtil.cpp b/xbmc/cores/AudioEngine/Utils/AEUtil.cpp
index 81a5f1995a291..3932760575d95 100644
--- a/xbmc/cores/AudioEngine/Utils/AEUtil.cpp
+++ b/xbmc/cores/AudioEngine/Utils/AEUtil.cpp
@@ -607,3 +607,25 @@ int CAEUtil::GetAVChannelIndex(enum AEChannel aechannel, uint64_t layout)
   av_channel_layout_uninit(&ch_layout);
   return idx;
 }
+
+void CAEUtil::GenerateSilence(AEDataFormat format,
+                              unsigned int frameSize,
+                              void* buffer,
+                              unsigned int frames)
+
+{
+  const unsigned int dataLength{frames * frameSize};
+
+  switch (format)
+  {
+    case AV_SAMPLE_FMT_U8:
+    case AV_SAMPLE_FMT_U8P:
+      memset(buffer, 0x80, dataLength);
+      break;
+
+    default:
+      // binary representation of 0 in all other formats regardless of number of bits per sample
+      // is a concatenation of 0 bytes.
+      memset(buffer, 0, dataLength);
+  }
+}
diff --git a/xbmc/cores/AudioEngine/Utils/AEUtil.h b/xbmc/cores/AudioEngine/Utils/AEUtil.h
index 65a846e04402d..e47137ab95af7 100644
--- a/xbmc/cores/AudioEngine/Utils/AEUtil.h
+++ b/xbmc/cores/AudioEngine/Utils/AEUtil.h
@@ -178,4 +178,8 @@ class CAEUtil
   static uint64_t GetAVChannelMask(enum AEChannel aechannel);
   static enum AVChannel GetAVChannel(enum AEChannel aechannel);
   static int GetAVChannelIndex(enum AEChannel aechannel, uint64_t layout);
+  static void GenerateSilence(AEDataFormat format,
+                              unsigned int frameSize,
+                              void* buffer,
+                              unsigned int frames);
 };

From b3b7410f68aa3177924068951c3bf4e9cf731203 Mon Sep 17 00:00:00 2001
From: CrystalP <crystalp@kodi.tv>
Date: Sun, 25 Aug 2024 12:07:36 -0400
Subject: [PATCH 2/2] [xaudio2] wait for buffer release before destroying the
 source voice.

Stop and FlushSourceBuffers are async and callbacks on the queued buffers don't seem to be called when destroying the voice, with a leak of the buffers' memory.
---
 xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
index 3e399442928e8..ab7f46adce5cc 100644
--- a/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
+++ b/xbmc/cores/AudioEngine/Sinks/AESinkXAudio.cpp
@@ -163,12 +163,26 @@ void CAESinkXAudio::Deinitialize()
     {
       m_sourceVoice->Stop();
       m_sourceVoice->FlushSourceBuffers();
+
+      // Stop and FlushSourceBuffers are async, wait for queued buffers to be released by XAudio2.
+      // callbacks don't seem to be called otherwise, with memory leakage.
+      XAUDIO2_VOICE_STATE state{};
+      do
+      {
+        if (WAIT_OBJECT_0 != WaitForSingleObject(m_voiceCallback.mBufferEnd.get(), 500))
+        {
+          CLog::LogF(LOGERROR, "timeout waiting for buffer flush - possible buffer memory leak");
+          break;
+        }
+        m_sourceVoice->GetState(&state, 0);
+      } while (state.BuffersQueued > 0);
+
       m_sinkFrames = 0;
       m_framesInBuffers = 0;
     }
     catch (...)
     {
-      CLog::Log(LOGDEBUG, "{}: Invalidated source voice - Releasing", __FUNCTION__);
+      CLog::Log(LOGERROR, "{}: Invalidated source voice - Releasing", __FUNCTION__);
     }
   }
   m_running = false;