diff --git a/Source/AvfEncoder.mm b/Source/AvfEncoder.mm index f1a7c97..dc60308 100755 --- a/Source/AvfEncoder.mm +++ b/Source/AvfEncoder.mm @@ -721,10 +721,17 @@ void NaluToAnnexB(std::span Data,size_t LengthSize,std::function()); - OutputPacket.mInputMeta = FrameMeta; + OutputPacket.mEncodeMeta = FrameMeta; std::copy( Data.begin(), Data.end(), std::back_inserter(*OutputPacket.mData) ); OnOutputPacket(OutputPacket); } diff --git a/Source/TDecoder.h b/Source/TDecoder.h index bdd1287..2ba3d22 100644 --- a/Source/TDecoder.h +++ b/Source/TDecoder.h @@ -34,7 +34,7 @@ namespace PopH264 class PacketMeta_t; class TInputNaluPacket; typedef uint32_t FrameNumber_t; // this could be an index, or it could be time. Should essentially be a frame identifier - constexpr FrameNumber_t FrameNumberInvalid = 0; + constexpr FrameNumber_t FrameNumberInvalid = 0; // gr; should probably switch to uint32_t::max // just shorthand names for cleaner constructors typedef std::function OnDecodedFrame_t; diff --git a/Source/TEncoder.cpp b/Source/TEncoder.cpp index 838ff15..9ae3b49 100644 --- a/Source/TEncoder.cpp +++ b/Source/TEncoder.cpp @@ -16,7 +16,7 @@ void PopH264::TEncoder::OnOutputPacket(TPacket& Packet) auto OutputPacket = [&](std::span Data) { TPacket NextPacket; - NextPacket.mInputMeta = Packet.mInputMeta; + NextPacket.mEncodeMeta = Packet.mEncodeMeta; NextPacket.mData.reset(new std::vector()); std::copy( Data.begin(), Data.end(), std::back_inserter( *NextPacket.mData ) ); mOnOutputPacket(NextPacket); @@ -45,31 +45,29 @@ void PopH264::TEncoder::OnFinished() -size_t PopH264::TEncoder::PushFrameMeta(const std::string& Meta) +PopH264::FrameNumber_t PopH264::TEncoder::PushFrameMeta(const std::string& Meta) { - TEncoderFrameMeta FrameMeta; - FrameMeta.mFrameNumber = mFrameCount; - FrameMeta.mMeta = Meta; - mFrameMetas.PushBack(FrameMeta); + auto NewFrameNumber = mFrameCount; mFrameCount++; - return FrameMeta.mFrameNumber; + + TEncoderFrameMeta FrameMeta; + FrameMeta.mInputMeta = Meta; + FrameMeta.mPushTime = EventTime_t::clock::now(); + mFrameMetas.insert({ NewFrameNumber, FrameMeta }); + + return NewFrameNumber; } -std::string PopH264::TEncoder::GetFrameMeta(size_t FrameNumber) +PopH264::TEncoderFrameMeta PopH264::TEncoder::GetFrameMeta(FrameNumber_t FrameNumber) { - for (auto i = 0; i < mFrameMetas.GetSize(); i++) + auto Match = mFrameMetas.find(FrameNumber); + if ( Match == mFrameMetas.end() ) { - auto& FrameMeta = mFrameMetas[i]; - if (FrameMeta.mFrameNumber != FrameNumber) - continue; - - // gr: for now, sometimes we get multiple packets for one frame, so we can't discard them all - //auto Meta = mFrameMetas.PopAt(i); - auto Meta = mFrameMetas[i]; - return Meta.mMeta; + std::stringstream Error; + Error << "No frame meta matching frame number " << FrameNumber; + throw std::runtime_error(Error.str()); } - - std::stringstream Error; - Error << "No frame meta matching frame number " << FrameNumber; - throw std::runtime_error(Error.str()); + + auto& Meta = Match->second; + return Meta; } diff --git a/Source/TEncoder.h b/Source/TEncoder.h index 0b83c87..bc2cfe9 100644 --- a/Source/TEncoder.h +++ b/Source/TEncoder.h @@ -5,6 +5,7 @@ #include "SoyTime.h" #include #include +#include "TDecoderInstance.h" // EventTime_t class SoyPixelsImpl; @@ -16,28 +17,49 @@ namespace PopH264 class TEncoderFrameMeta; } + +// as packets are popped asynchronously to input, we need to keep meta +// associated with frames we use an arbritry number for frame (presentation +// time), we can also store other encoder per-frame meta here (timing) +class PopH264::TEncoderFrameMeta +{ +public: + std::string mInputMeta; // meta provided by user to keep with frame + EventTime_t mPushTime = EventTime_t::min(); + EventTime_t mEncodedTime = EventTime_t::min(); + + // write encoded time if it hasn't been set + void OnEncoded() + { + // already set + if ( mEncodedTime != EventTime_t::min() ) + return; + mEncodedTime = EventTime_t::clock::now(); + } + + std::chrono::milliseconds GetEncodeDurationMs() + { + if ( mPushTime == EventTime_t::min() || mEncodedTime == EventTime_t::min() ) + return std::chrono::milliseconds(0); + auto Delta = mEncodedTime - mPushTime; + return std::chrono::duration_cast( Delta ); + } +}; + + class PopH264::TPacket { public: std::span GetData() { return mData ? std::span( mData->data(), mData->size() ) : std::span(); } + std::string_view GetInputMeta() { return mEncodeMeta.mInputMeta; } public: std::shared_ptr> mData; - std::string mInputMeta; // original input meta json + TEncoderFrameMeta mEncodeMeta; // includes original input meta bool mEndOfStream = false; std::string mError; }; -// as packets are popped asynchronously to input, we need to keep meta -// associated with frames we use an arbritry number for frame (presentation -// time) -class PopH264::TEncoderFrameMeta -{ -public: - size_t mFrameNumber = 0; - std::string mMeta; -}; - class PopH264::TEncoder { @@ -57,12 +79,12 @@ class PopH264::TEncoder void OnFinished(); // returns frame number used as PTS and stores meta - size_t PushFrameMeta(const std::string& Meta); + FrameNumber_t PushFrameMeta(const std::string& Meta); // gr: SOME frames will yield multiple packets (eg SPS & PPS) so some we need to keep around... // gotta work out a way to figure out what we can discard - std::string GetFrameMeta(size_t FrameNumber); + TEncoderFrameMeta GetFrameMeta(FrameNumber_t FrameNumber); - bool HasEncodingFinished() { return mHasOutputEndOfStream || mHasOutputError; } + bool HasEncodingFinished() { return mHasOutputEndOfStream || mHasOutputError; } private: std::function mOnOutputPacket; @@ -70,6 +92,6 @@ class PopH264::TEncoder bool mHasOutputError = false; std::mutex mFrameMetaLock; - size_t mFrameCount = 0; - Array mFrameMetas; + FrameNumber_t mFrameCount = 0; + std::unordered_map mFrameMetas; }; diff --git a/Source/TEncoderInstance.cpp b/Source/TEncoderInstance.cpp index 3a40cda..7d6a766 100755 --- a/Source/TEncoderInstance.cpp +++ b/Source/TEncoderInstance.cpp @@ -309,7 +309,7 @@ void PopH264::TEncoderInstance::PeekPacket(json11::Json::object& Meta) auto& Packet0 = mPackets[0]; Packet = Packet0; } - + // write meta auto DataSize = Packet.mData ? Packet.mData->size() : 0; Meta[POPH264_ENCODEDFRAME_DATASIZE] = static_cast(DataSize); @@ -320,12 +320,17 @@ void PopH264::TEncoderInstance::PeekPacket(json11::Json::object& Meta) if ( Packet.mEndOfStream ) Meta[POPH264_ENCODEDFRAME_ENDOFSTREAM] = Packet.mEndOfStream; - if ( !Packet.mInputMeta.empty() ) + auto EncodeDuration = Packet.mEncodeMeta.GetEncodeDurationMs(); + if ( EncodeDuration.count() != 0 ) + Meta[POPH264_ENCODEDFRAME_ENCODEDURATIONMS] = static_cast(EncodeDuration.count()); + + auto InputMeta = Packet.GetInputMeta(); + if ( !InputMeta.empty() ) { using namespace json11; // we're expecting json, so make it an object std::string ParseError; - auto MetaObject = Json::parse( Packet.mInputMeta, ParseError ); + auto MetaObject = Json::parse( std::string(InputMeta), ParseError ); // this shouldn't come up, as we've already parsed it on input, but just in case if (!ParseError.empty()) @@ -382,7 +387,10 @@ void PopH264::TEncoderInstance::OnNewPacket(TPacket& Packet) } { - std::lock_guard Lock(mPacketsLock); + std::scoped_lock Lock(mPacketsLock); + + // gr: if a packet encode duration wasn't written, write one here with a warning, so the low level encoder is reminded to add it + Packet.mEncodeMeta.OnEncoded(); mPackets.PushBack(Packet); } diff --git a/Source_CSharp/PopH264.cs b/Source_CSharp/PopH264.cs index 3acc459..3f48102 100644 --- a/Source_CSharp/PopH264.cs +++ b/Source_CSharp/PopH264.cs @@ -585,7 +585,7 @@ public struct PoppedFrameMeta public int DataSize; // bytes public bool EndOfStream; public EncodedFrameMeta Meta; // all the meta sent to PopH264_EncoderPushFrame - public int? EncodeDurationMs; // time it took to encode + public int EncodeDurationMs; // time it took to encode public int? DelayDurationMs; // time spent in queue before encoding (lag) public int OutputQueueCount; // time spent in queue before encoding (lag) public string EncoderName; // low level encoder name