For thread devices throttle the response to BlockQuery by an interval… (

#37533) * For thread devices throttle the response to BlockQuery by an interval specified in kBdxThrottleIntervalInMsecs so that we don't overload the network with frequent BDX messages * Restyle * Apply suggestions from code review Co-authored-by: Boris Zbarsky <[email protected]> * Add user defaults support to allow the BDX throttle interval for thread device to be configurable - Address review comments --------- Co-authored-by: Boris Zbarsky <[email protected]>
project-chip · Feb 13, 2025 · ecb3c14 · ecb3c14
1 parent 92f9f0b
commit ecb3c14
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 13 deletions.
diff --git a/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.h b/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.h
@@ -223,6 +223,11 @@ NS_ASSUME_NONNULL_BEGIN
                             queue:(dispatch_queue_t)queue
                        completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion;
 
+/**
+ * Returns YES if the MTRDevice corrresponding to the given node ID is known to be a thread device, NO otherwise.
+ */
+- (BOOL)definitelyUsesThreadForDevice:(chip::NodeId)nodeID;
+
 /**
  * Will return chip::kUndefinedFabricIndex if we do not have a fabric index.
  */

diff --git a/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.mm b/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.mm
@@ -1429,33 +1429,42 @@ - (BOOL)checkIsRunning:(NSError * __autoreleasing *)error
     return NO;
 }
 
-- (void)getSessionForNode:(chip::NodeId)nodeID completion:(MTRInternalDeviceConnectionCallback)completion
+- (BOOL)definitelyUsesThreadForDevice:(chip::NodeId)nodeID
 {
-    // TODO: Figure out whether the synchronization here makes sense.  What
-    // happens if this call happens mid-suspend or mid-resume?
-    if (self.suspended) {
-        MTR_LOG_ERROR("%@ suspended: can't get session for node %016llX-%016llx (%llu)", self, self.compressedFabricID.unsignedLongLongValue, nodeID, nodeID);
-        // TODO: Can we do a better error here?
-        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
-        return;
+    if (!chip::IsOperationalNodeId(nodeID)) {
+        return NO;
     }
 
-    // Get the corresponding MTRDevice object to determine if the case/subscription pool is to be used
+    // Get the corresponding MTRDevice object for the node id
     MTRDevice * device = [self deviceForNodeID:@(nodeID)];
 
     // TODO: Can we not just assume this isKindOfClass test is true?  Would be
     // really nice if we had compile-time checking for this somehow...
     if (![device isKindOfClass:MTRDevice_Concrete.class]) {
         MTR_LOG_ERROR("%@ somehow has %@ instead of MTRDevice_Concrete for node ID 0x%016llX (%llu)", self, device, nodeID, nodeID);
-        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
-        return;
+        return NO;
     }
 
     auto * concreteDevice = static_cast<MTRDevice_Concrete *>(device);
 
+    BOOL usesThread = [concreteDevice deviceUsesThread];
+    return usesThread;
+}
+
+- (void)getSessionForNode:(chip::NodeId)nodeID completion:(MTRInternalDeviceConnectionCallback)completion
+{
+    // TODO: Figure out whether the synchronization here makes sense.  What
+    // happens if this call happens mid-suspend or mid-resume?
+    if (self.suspended) {
+        MTR_LOG_ERROR("%@ suspended: can't get session for node %016llX-%016llx (%llu)", self, self.compressedFabricID.unsignedLongLongValue, nodeID, nodeID);
+        // TODO: Can we do a better error here?
+        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
+        return;
+    }
+
     // In the case that this device is known to use thread, queue this with subscription attempts as well, to
     // help with throttling Thread traffic.
-    if ([concreteDevice deviceUsesThread]) {
+    if ([self definitelyUsesThreadForDevice:nodeID]) {
         MTRAsyncWorkItem * workItem = [[MTRAsyncWorkItem alloc] initWithQueue:dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0)];
         [workItem setReadyHandler:^(id _Nonnull context, NSInteger retryCount, MTRAsyncWorkCompletionBlock _Nonnull workItemCompletion) {
             MTRInternalDeviceConnectionCallback completionWrapper = ^(chip::Messaging::ExchangeManager * _Nullable exchangeManager,

diff --git a/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.h b/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.h
@@ -74,6 +74,10 @@ class MTROTAImageTransferHandler : public chip::bdx::AsyncResponder {
     MTROTAImageTransferHandlerWrapper * mOTAImageTransferHandlerWrapper;
 
     bool mNeedToCallTransferSessionEnd = false;
+
+    bool mIsPeerNodeAKnownThreadDevice = NO;
+
+    chip::System::Clock::Milliseconds32 mBDXThrottleIntervalForThreadDevicesInMSecs;
 };
 
 NS_ASSUME_NONNULL_END
diff --git a/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.mm b/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.mm
@@ -22,6 +22,9 @@
 #import "MTROTAUnsolicitedBDXMessageHandler.h"
 #import "NSStringSpanConversion.h"
 
+#include <chrono>
+#include <platform/Darwin/UserDefaults.h>
+
 using namespace chip;
 using namespace chip::bdx;
 using namespace chip::app;
@@ -31,6 +34,14 @@
 // Timeout for the BDX transfer session. The OTA Spec mandates this should be >= 5 minutes.
 constexpr System::Clock::Timeout kBdxTimeout = System::Clock::Seconds16(5 * 60);
 
+// For thread devices, we may want to throttle sending Blocks in response to BlockQuery messages
+// to avoid spamming the network with too many BDX messages.  For now, default to the old 50ms
+// polling interval we used to have.
+// To override the throttle interval,
+// use ` defaults write <domain> BDXThrottleIntervalForThreadDevicesInMSecs <throttleIntervalinMsecs>`
+// See UserDefaults.mm for details.
+constexpr auto kBdxThrottleDefaultIntervalInMsecs = System::Clock::Milliseconds32(50);
+
 constexpr bdx::TransferRole kBdxRole = bdx::TransferRole::kSender;
 
 // An ARC-managed object that lets us do weak references to a MTROTAImageTransferHandler
@@ -78,6 +89,11 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
     VerifyOrReturnError(mDelegate != nil, CHIP_ERROR_INCORRECT_STATE);
     VerifyOrReturnError(mDelegateNotificationQueue != nil, CHIP_ERROR_INCORRECT_STATE);
 
+    mIsPeerNodeAKnownThreadDevice = [controller definitelyUsesThreadForDevice:mPeer.GetNodeId()];
+
+    uint16_t throttleInterval = chip::Platform::GetUserDefaultBDXThrottleIntervalForThreadInMSecs().value_or(kBdxThrottleDefaultIntervalInMsecs.count());
+    mBDXThrottleIntervalForThreadDevicesInMSecs = System::Clock::Milliseconds32(throttleInterval);
+
     BitFlags<bdx::TransferControlFlags> flags(bdx::TransferControlFlags::kReceiverDrive);
 
     return AsyncResponder::Init(mSystemLayer, exchangeCtx, kBdxRole, flags, kMaxBdxBlockSize, kBdxTimeout);
@@ -233,6 +249,11 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
 {
     assertChipStackLockedByCurrentThread();
 
+    // For thread devices, we need to throttle sending the response to BlockQuery, if the query is processed, before kBdxThrottleDefaultIntervalInMsecs
+    // has elapsed to prevent the BDX messages spamming up the network. Get the timestamp at which we start processing the BlockQuery message.
+
+    auto startBlockQueryHandlingTimestamp = System::SystemClock().GetMonotonicTimestamp();
+
     auto blockSize = @(mTransfer.GetTransferBlockSize());
     auto blockIndex = @(mTransfer.GetNextBlockNum());
 
@@ -241,7 +262,7 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
 
     MTROTAImageTransferHandlerWrapper * __weak weakWrapper = mOTAImageTransferHandlerWrapper;
 
-    auto completionHandler = ^(NSData * _Nullable data, BOOL isEOF) {
+    auto respondWithBlock = ^(NSData * _Nullable data, BOOL isEOF) {
         [controller
             asyncDispatchToMatterQueue:^() {
                 assertChipStackLockedByCurrentThread();
@@ -283,6 +304,30 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
         return CHIP_ERROR_INCORRECT_STATE;
     }
 
+    void (^completionHandler)(NSData * _Nullable data, BOOL isEOF) = nil;
+
+    // If the peer node is a Thread device, check how much time has elapsed since we started processing the BlockQuery.
+    // If the time elapsed is greater than kBdxThrottleDefaultIntervalInMsecs, call the completion handler to respond with a Block right away.
+    // If time elapsed is less than kBdxThrottleDefaultIntervalInMsecs, dispatch the completion handler to respond with a Block after kBdxThrottleDefaultIntervalInMsecs has elapsed.
+
+    if (mIsPeerNodeAKnownThreadDevice) {
+        completionHandler = ^(NSData * _Nullable data, BOOL isEOF) {
+            auto timeElapsed = System::SystemClock().GetMonotonicTimestamp() - startBlockQueryHandlingTimestamp;
+
+            if (timeElapsed >= mBDXThrottleIntervalForThreadDevicesInMSecs) {
+                respondWithBlock(data, isEOF);
+            } else {
+                auto timeRemaining = std::chrono::duration_cast<std::chrono::nanoseconds>(mBDXThrottleIntervalForThreadDevicesInMSecs - timeElapsed);
+                dispatch_time_t time = dispatch_time(DISPATCH_TIME_NOW, (int64_t) (timeRemaining.count()));
+                dispatch_after(time, delegateQueue, ^{
+                    respondWithBlock(data, isEOF);
+                });
+            }
+        };
+    } else {
+        completionHandler = respondWithBlock;
+    }
+
     dispatch_async(delegateQueue, ^{
         if ([strongDelegate respondsToSelector:@selector(handleBDXQueryForNodeID:
                                                                       controller:blockSize:blockIndex:bytesToSkip:completion:)]) {

diff --git a/src/platform/Darwin/UserDefaults.h b/src/platform/Darwin/UserDefaults.h
@@ -24,5 +24,7 @@ namespace Platform {
 
 std::optional<uint16_t> GetUserDefaultDnssdSRPTimeoutInMSecs();
 
+std::optional<uint16_t> GetUserDefaultBDXThrottleIntervalForThreadInMSecs();
+
 } // namespace Platform
 } // namespace chip
diff --git a/src/platform/Darwin/UserDefaults.mm b/src/platform/Darwin/UserDefaults.mm
@@ -23,6 +23,8 @@
 
 static NSString * const kSRPTimeoutInMsecsUserDefaultKey = @"SRPTimeoutInMSecsOverride";
 
+static NSString * const kBDXThrottleIntervalInMsecsUserDefaultKey = @"BDXThrottleIntervalForThreadDevicesInMSecs";
+
 namespace chip {
 namespace Platform {
 
@@ -38,5 +40,21 @@
         return std::nullopt;
     }
 
+    std::optional<uint16_t> GetUserDefaultBDXThrottleIntervalForThreadInMSecs()
+    {
+        NSUserDefaults * defaults = [NSUserDefaults standardUserDefaults];
+        NSInteger bdxThrottleIntervalInMsecs = [defaults integerForKey:kBDXThrottleIntervalInMsecsUserDefaultKey];
+
+        if (bdxThrottleIntervalInMsecs > 0 && CanCastTo<uint16_t>(bdxThrottleIntervalInMsecs)) {
+            uint16_t intervalInMsecs = static_cast<uint16_t>(bdxThrottleIntervalInMsecs);
+            ChipLogProgress(BDX, "Got a user default value for BDX Throttle Interval for Thread devices - %d msecs", intervalInMsecs);
+            return std::make_optional(intervalInMsecs);
+        }
+
+        // For now return NullOptional if value returned in bdxThrottleIntervalInMsecs is 0, since that either means the key was not found or value was zero.
+        // Since 0 is not a feasible value for this interval for now, we will treat that as not being set.
+        return std::nullopt;
+    }
+
 } // namespace Platform
 } // namespace chip