Skip to content

Commit

Permalink
Add test for stateful model and fix output backings issue (pytorch#5294)
Browse files Browse the repository at this point in the history
Summary:
Fixes a couple of issues
-  Prewarms stateful model
- The prediction was run twice if output backings was used.
- Adds unit test for stateful model

Pull Request resolved: pytorch#5294

Reviewed By: kirklandsign

Differential Revision: D62551117

Pulled By: cccclai

fbshipit-source-id: 48835c3e841914ac11c1ea7a9246c548f2b16035
  • Loading branch information
cymbalrush authored and facebook-github-bot committed Sep 12, 2024
1 parent c5c69a9 commit 08c8c6e
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 32 deletions.
6 changes: 1 addition & 5 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
#import <CoreML/CoreML.h>
#import <vector>

#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
#define MODEL_STATE_IS_SUPPORTED 1
#endif

NS_ASSUME_NONNULL_BEGIN

@class ETCoreMLAsset;
Expand Down Expand Up @@ -45,7 +41,7 @@ __attribute__((objc_subclassing_restricted))
@property (strong, readonly, nonatomic) MLModel* mlModel;

/// The model state.
@property (strong, readonly, nonatomic) id state API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0));
@property (strong, readonly, nonatomic, nullable) id state;

/// The asset from which the model is loaded.
@property (strong, readonly, nonatomic) ETCoreMLAsset* asset;
Expand Down
41 changes: 29 additions & 12 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,19 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
return get_multi_array_constraints_by_name(description.outputDescriptionsByName);
}

#if MODEL_STATE_IS_SUPPORTED
API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0))
void reset_state_for_feature_name(NSString *feature_name, MLState *state) {
[state getMultiArrayForStateNamed:feature_name handler:^(MLMultiArray *buffer) {
[buffer getMutableBytesWithHandler:^(void *mutableBytes, NSInteger size, NSArray<NSNumber *> * __unused strides) {
uint8_t *start = reinterpret_cast<uint8_t *>(mutableBytes);
uint8_t *end = start + size;
std::fill(start, end, uint8_t(0));
}];
}];
}
#endif

}

#pragma mark - ETCoreMLModel
Expand Down Expand Up @@ -282,7 +295,6 @@ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type
- (nullable id<MLFeatureProvider>)predictionFromFeatures:(id<MLFeatureProvider>)input
options:(MLPredictionOptions *)options
error:(NSError **)error {

#if MODEL_STATE_IS_SUPPORTED
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
if (self.state != nil) {
Expand All @@ -294,28 +306,33 @@ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type
}
#endif

return [self.mlModel predictionFromFeatures:input
options:options
error:error];
id<MLFeatureProvider> result = [self.mlModel predictionFromFeatures:input
options:options
error:error];

return result;
}

- (BOOL)prewarmAndReturnError:(NSError* __autoreleasing*)error {
BOOL prewarm = YES;
#if MODEL_STATE_IS_SUPPORTED
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
prewarm = (self.mlModel.modelDescription.stateDescriptionsByName.count == 0);
}
#endif

NSError *localError = nil;
BOOL result = prewarm ? [self.mlModel prewarmAndReturnError:&localError] : NO;
BOOL result = [self.mlModel prewarmUsingState:self.state error:error];
if (!result) {
ETCoreMLLogError(localError,
"%@: Failed to prewarm model with identifier = %@",
NSStringFromClass(self.class),
self.identifier);
}

#if MODEL_STATE_IS_SUPPORTED
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
NSDictionary<NSString *, MLFeatureDescription *> *stateDescriptions = self.mlModel.modelDescription.stateDescriptionsByName;
[stateDescriptions enumerateKeysAndObjectsUsingBlock:^(NSString *featureName, MLFeatureDescription * __unused obj, BOOL * __unused stop) {
reset_state_for_feature_name(featureName, (MLState *) self.state);
}];
}
#endif


if (error) {
*error = localError;
}
Expand Down
15 changes: 7 additions & 8 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
Original file line number Diff line number Diff line change
Expand Up @@ -669,16 +669,15 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset {
error:&localError];
// Try without output backings.
if (!modelOutputs && predictionOptions.outputBackings.count > 0) {
localError = nil;
executor.ignoreOutputBackings = YES;
localError = nil;
modelOutputs = [executor executeModelWithInputs:inputFeatures
predictionOptions:predictionOptions
loggingOptions:loggingOptions
eventLogger:eventLogger
error:&localError];
}

modelOutputs = [executor executeModelWithInputs:inputFeatures
predictionOptions:predictionOptions
loggingOptions:loggingOptions
eventLogger:eventLogger
error:&localError];


if (error) {
*error = localError;
}
Expand Down
6 changes: 5 additions & 1 deletion backends/apple/coreml/runtime/delegate/MLModel_Prewarm.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,20 @@

#import <CoreML/CoreML.h>

#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
#define MODEL_STATE_IS_SUPPORTED 1
#endif

NS_ASSUME_NONNULL_BEGIN

@interface MLModel (Prewarm)

/// Pre-warms the model by running a prediction with zeroed-out inputs.
///
/// @param state The model state.
/// @param error On failure, error is filled with the failure information.
/// @retval `YES` if the prediction succeeded otherwise `NO`.
- (BOOL)prewarmAndReturnError:(NSError* __autoreleasing*)error;
- (BOOL)prewarmUsingState:(nullable id)state error:(NSError* __autoreleasing*)error;

@end

Expand Down
16 changes: 14 additions & 2 deletions backends/apple/coreml/runtime/delegate/MLModel_Prewarm.mm
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,28 @@ + (MLMultiArray *)zeroedMultiArrayWithShape:(NSArray<NSNumber *> *)shape

@implementation MLModel (Prewarm)

- (BOOL)prewarmAndReturnError:(NSError * __autoreleasing *)error {
- (BOOL)prewarmUsingState:(nullable id)state error:(NSError * __autoreleasing *)error {
@autoreleasepool {
id<MLFeatureProvider> inputs = ::get_zeroed_inputs(self, error);
if (!inputs) {
return NO;
}

id<MLFeatureProvider> outputs = [self predictionFromFeatures:inputs error:error];

id<MLFeatureProvider> outputs = nil;
if (state != nil) {
#if MODEL_STATE_IS_SUPPORTED
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
outputs = [self predictionFromFeatures:inputs usingState:(MLState *)state error:error];
return outputs != nil;
}
#endif
}

outputs = [self predictionFromFeatures:inputs error:error];
return outputs != nil;
}
}


@end
16 changes: 13 additions & 3 deletions backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#import <executorch/runtime/platform/runtime.h>
#import <string>

#import "MLModel_Prewarm.h"

static constexpr size_t kRuntimeMemorySize = 50 * 1024U * 1024U; // 50 MB

using namespace torch::executor;
Expand Down Expand Up @@ -184,20 +186,28 @@ - (void)executeModelAtURL:(NSURL *)modelURL nLoads:(NSUInteger)nLoads nExecution
- (void)testAddProgramExecute {
NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"pte"];
XCTAssertNotNil(modelURL);
[self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
[self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
}

- (void)testMulProgramExecute {
NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"pte"];
XCTAssertNotNil(modelURL);
[self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
[self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
}

- (void)testMV3ProgramExecute {
NSURL *modelURL = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"];
XCTAssertNotNil(modelURL);
[self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
[self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
}

#if MODEL_STATE_IS_SUPPORTED
- (void)testStateProgramExecute {
NSURL *modelURL = [[self class] bundledResourceWithName:@"state_coreml_all" extension:@"pte"];
XCTAssertNotNil(modelURL);
[self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
}
#endif

- (void)executeMultipleModelsConcurrently:(NSArray<NSURL *> *)modelURLs
nLoads:(NSUInteger)nLoads
Expand Down
77 changes: 77 additions & 0 deletions backends/apple/coreml/runtime/test/export_stateful_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright © 2024 Apple Inc. All rights reserved.
#
# Please refer to the license found in the LICENSE file in the root directory of the source tree.

import os
from pathlib import Path

import coremltools as ct
import executorch.exir as exir

import torch

from executorch.backends.apple.coreml.compiler import CoreMLBackend
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
from torch.export import export


class StatefulModel(torch.nn.Module):
def __init__(
self,
embedding_dim: int,
max_seq_len: int,
):
super().__init__()
self.register_buffer(
"cache", torch.zeros((max_seq_len, embedding_dim), dtype=torch.float32)
)

def forward(
self,
q: torch.Tensor,
k_val: torch.Tensor,
input_pos: torch.Tensor,
):
q_T = q.transpose(0, 1)
k = torch.ops.aten.index_put_(self.cache, [input_pos, None], k_val)
attn = k.mm(q_T)
return attn


def main() -> None:
embedding_dim = 3
max_seq_len = 2
model = StatefulModel(embedding_dim=embedding_dim, max_seq_len=max_seq_len)
example_inputs = (
torch.randn((1, embedding_dim)),
torch.randn((1, embedding_dim)),
torch.tensor([0]),
)
exported_model = export(model, example_inputs)
edge_program_manager = exir.to_edge(exported_model)
compile_specs = CoreMLBackend.generate_compile_specs(
compute_precision=ct.precision.FLOAT16,
compute_unit=ct.ComputeUnit.ALL,
minimum_deployment_target=ct.target.iOS18,
)

partitioner = CoreMLPartitioner(
skip_ops_for_coreml_delegation=None,
compile_specs=compile_specs,
)

delegated_program_manager = edge_program_manager.to_backend(partitioner)
exec_program = delegated_program_manager.to_executorch(
config=exir.ExecutorchBackendConfig(extract_delegate_segments=True)
)

buffer = exec_program.buffer
models_dir = Path(os.path.dirname(os.path.realpath(__file__))) / "models"
models_dir.mkdir(parents=False, exist_ok=True)
file_path = models_dir / "state_coreml_all.pte"
with open(file_path.resolve(), "wb") as file:
file.write(buffer)


if __name__ == "__main__":
main() # pragma: no cover
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
objects = {

/* Begin PBXBuildFile section */
8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 8307EB892C9262060011AE6D /* state_coreml_all.pte */; };
83BB78A02C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm in Sources */ = {isa = PBXBuildFile; fileRef = 83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */; };
83BB78BF2C66AAAE00274ED7 /* add_mul_coreml_all.bin in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */; };
83BB78C02C66AAAE00274ED7 /* add_mul_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */; };
Expand Down Expand Up @@ -120,6 +121,7 @@
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
8307EB892C9262060011AE6D /* state_coreml_all.pte */ = {isa = PBXFileReference; lastKnownFileType = file; name = state_coreml_all.pte; path = ../test/models/state_coreml_all.pte; sourceTree = "<group>"; };
83BB789E2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = ETCoreMLModelDebugInfo.h; path = ../sdk/ETCoreMLModelDebugInfo.h; sourceTree = "<group>"; };
83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = ETCoreMLModelDebugInfo.mm; path = ../sdk/ETCoreMLModelDebugInfo.mm; sourceTree = "<group>"; };
83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = add_mul_coreml_all.bin; path = ../test/models/add_mul_coreml_all.bin; sourceTree = "<group>"; };
Expand Down Expand Up @@ -607,6 +609,7 @@
C98551982AD2542D009143F9 /* mv3_coreml_all.pte */,
83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */,
83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */,
8307EB892C9262060011AE6D /* state_coreml_all.pte */,
);
name = models;
sourceTree = "<group>";
Expand Down Expand Up @@ -677,6 +680,7 @@
C985519E2AD2542D009143F9 /* mv3_coreml_all.pte in Resources */,
C98551A02AD2542D009143F9 /* add_coreml_all.bin in Resources */,
C98551A22AD2542D009143F9 /* mul_coreml_all.pte in Resources */,
8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */,
C98551A32AD2542D009143F9 /* add_coreml_all.pte in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down
5 changes: 4 additions & 1 deletion backends/apple/coreml/scripts/generate_test_models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ cd "$EXECUTORCH_ROOT_PATH"

mkdir "$COREML_DIR_PATH/runtime/test/models/"
#Generate models
echo "Executorch: Generating test models"
cd "$EXECUTORCH_ROOT_PATH"

MODELS=("add" "add_mul" "mul" "mv3")
for MODEL in "${MODELS[@]}"
do
echo "Executorch: Generating $MODEL model"
# TODO: Don't use the script in examples directory.
python3 -m examples.apple.coreml.scripts.export --model_name "$MODEL" --save_processed_bytes
mv -f "$MODEL""_coreml_all.pte" "$COREML_DIR_PATH/runtime/test/models"
mv -f "$MODEL""_coreml_all.bin" "$COREML_DIR_PATH/runtime/test/models"
done

echo "Executorch: Generating stateful model"
python3 "$SCRIPT_DIR_PATH/../runtime/test/export_stateful_model.py"

0 comments on commit 08c8c6e

Please sign in to comment.