diff --git a/MLPNeuralNet.podspec b/MLPNeuralNet.podspec index 8a6c78e..9e12f34 100644 --- a/MLPNeuralNet.podspec +++ b/MLPNeuralNet.podspec @@ -1,11 +1,11 @@ Pod::Spec.new do |s| s.name = "MLPNeuralNet" - s.version = "1.0.10" + s.version = "1.0.11" s.summary = "Fast multilayer perceptron neural net for iOS and Mac OS X" s.license = { :type => 'BSD' } s.author = { "Mykola Pavlov" => "me@nikolaypavlov.com" } - s.source = { :git => "https://github.com/nikolaypavlov/MLPNeuralNet.git", :tag => "1.0.10" } + s.source = { :git => "https://github.com/alexobednikov/MLPNeuralNet.git", :tag => "1.0.11" } s.homepage = "http://nikolaypavlov.github.io/MLPNeuralNet/" s.screenshots = "http://nikolaypavlov.github.io/MLPNeuralNet/images/500px-Artificial_neural_network.png" diff --git a/MLPNeuralNet/MLPNeuralNet.h b/MLPNeuralNet/MLPNeuralNet.h index e04e473..bc975a7 100644 --- a/MLPNeuralNet/MLPNeuralNet.h +++ b/MLPNeuralNet/MLPNeuralNet.h @@ -40,21 +40,34 @@ typedef enum { MLPNone, } MLPActivationFunction; +typedef enum { + // Dense is the default + MLPLayerDense, + MLPLayerBatchNormalization, +} MlPLayerType; + @interface MLPNeuralNet : NSObject @property (readonly, nonatomic) NSUInteger numberOfLayers; @property (readonly, nonatomic) NSUInteger featureVectorSize; // in bytes @property (readonly, nonatomic) NSUInteger predictionVectorSize; // in bytes @property (readonly, nonatomic) MLPOutput outputMode; +// sequence of MLPLayerType identifyers; by default all are MLPLayerDense +@property (readonly, nonatomic) NSMutableArray* layersTypes; @property (nonatomic) MLPActivationFunction hiddenActivationFunction; @property (nonatomic) MLPActivationFunction outputActivationFunction; @property (nonatomic) MLPActivationFunction activationFunction; -// Designated initializer +// Designated initializers - (id)initWithLayerConfig:(NSArray *)layerConfig // of NSNumbers weights:(NSData *)weights // of double outputMode:(MLPOutput)outputMode; +- (id)initWithLayerConfigAndLayerType:(NSArray *)layerConfig // of NSNumbers + weights:(NSData *)weights // of double + layerTypes:(NSMutableArray*)layerTypes // of MLPLayerType + outputMode:(MLPOutput)outputMode; + // Predicts new examples by feature-vector and copies the prediction into specified buffer // Vector and prediction buffers should be allocated to work with double precision - (void)predictByFeatureVector:(NSData *)vector intoPredictionVector:(NSMutableData *)prediction; @@ -64,6 +77,8 @@ typedef enum { - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableData *)prediction; // Number of weights requred for the neural net of this configuration -+ (NSInteger)countWeights:(NSArray *)layerConfig; ++ (NSInteger)countWeights:(NSArray *)layerConfig + layersTypes:(NSMutableArray*)layersTypes; ++ (NSInteger)countWeights:(NSArray *)layerConfig __deprecated_msg("use method countWeights(layerConfig, layerTypes) instead"); @end diff --git a/MLPNeuralNet/MLPNeuralNet.m b/MLPNeuralNet/MLPNeuralNet.m index aa97e60..e44d97d 100644 --- a/MLPNeuralNet/MLPNeuralNet.m +++ b/MLPNeuralNet/MLPNeuralNet.m @@ -13,15 +13,12 @@ #define ReLU_THR 0.0 typedef struct { - // Number of rows - NSInteger nrow; - // Number of columns - NSInteger ncol; + NSInteger nrow; // Number of rows + NSInteger ncol; // Number of columns double *weightMatrix; } MLPLayer; @interface MLPNeuralNet () { - NSMutableData *hiddenFeatures; NSMutableData *buffer; // MLPLayer structures @@ -40,17 +37,24 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig outputMode:(MLPOutput)outputMode { self = [super init]; if (self) { - if ([self.class countWeights:layerConfig] != weights.length / sizeof(double)) { + _numberOfLayers = layerConfig.count; + + // set MLPLayerDense identifier for each layer except the first one + _layersTypes = [[NSMutableArray alloc] initWithCapacity:_numberOfLayers]; + for (NSUInteger n = 0; n < _numberOfLayers - 1; ++n) { + [_layersTypes addObject:[NSNumber numberWithInteger:MLPLayerDense]]; + } + + if ([self.class countWeights:layerConfig layersTypes:_layersTypes] != weights.length / sizeof(double)) { @throw [NSException exceptionWithName:NSInternalInconsistencyException reason:@"Number of weights doesn't match to configuration" userInfo:nil]; } - _numberOfLayers = layerConfig.count; _featureVectorSize = [layerConfig[0] unsignedIntegerValue] * sizeof(double); _predictionVectorSize = [layerConfig.lastObject unsignedIntegerValue] * sizeof(double); _outputMode = outputMode; - + // Allocate buffers of the maximum possible vector size, there should be a place for bias unit also. unsigned maxVectorLength = [[layerConfig valueForKeyPath:@"@max.self"] unsignedIntValue] + BIAS_UNIT; hiddenFeatures = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)]; @@ -60,7 +64,7 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig // the input layer, so the total size is equal to number of layers - 1. arrayOfLayers = [NSMutableData dataWithLength:(_numberOfLayers - 1) * sizeof(MLPLayer)]; - // Allocate memory for the wigth matrices and initialize them. + // Allocate memory for the weigth matrices and initialize them. MLPLayer *layer = (MLPLayer *)arrayOfLayers.bytes; double *wts = (double *)weights.bytes; int crossLayerOffset = 0; // An offset between the weight matrices of different layers @@ -91,6 +95,82 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig return self; } +- (id)initWithLayerConfigAndLayerType:(NSArray *)layerConfig // of NSNumbers + weights:(NSData *)weights // of double + layerTypes:(NSMutableArray*)layerTypes // of MLPLayerType + outputMode:(MLPOutput)outputMode; { + self = [super init]; + if (self) { + _layersTypes = layerTypes; + if ([self.class countWeights:layerConfig layersTypes:_layersTypes] != weights.length / sizeof(double)) { + @throw [NSException exceptionWithName:NSInternalInconsistencyException + reason:@"Number of weights doesn't match to configuration" + userInfo:nil]; + } + + _numberOfLayers = layerConfig.count; + _featureVectorSize = [layerConfig[0] unsignedIntegerValue] * sizeof(double); + _predictionVectorSize = [layerConfig.lastObject unsignedIntegerValue] * sizeof(double); + _outputMode = outputMode; + + // Allocate buffers of the maximum possible vector size, there should be a place for bias unit also. + unsigned maxVectorLength = [[layerConfig valueForKeyPath:@"@max.self"] unsignedIntValue] + BIAS_UNIT; + hiddenFeatures = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)]; + buffer = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)]; + + // Allocate memory for layers. Note that we don't need a matrix for + // the input layer, so the total size is equal to number of layers - 1. + arrayOfLayers = [NSMutableData dataWithLength:(_numberOfLayers - 1) * sizeof(MLPLayer)]; + + // Allocate memory for the weigth matrices and initialize them. + MLPLayer *layer = (MLPLayer *)arrayOfLayers.bytes; + double *wts = (double *)weights.bytes; + int crossLayerOffset = 0; // An offset between the weight matrices of different layers + for (int layer_index = 0; layer_index < _numberOfLayers - 1; layer_index++) { // Recall we don't need a matrix for the input layer + if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) { + // If network has X units in layer j, and Y units in layer j+1, then weight matrix for layer j + // will be of demension: [ Y x (X+1) ] + layer[layer_index].nrow = [layerConfig[layer_index+1] unsignedIntegerValue]; + layer[layer_index].ncol = [layerConfig[layer_index] unsignedIntegerValue] + 1; // +1 for BIAS_UNIT + layer[layer_index].weightMatrix = calloc(layer[layer_index].nrow * layer[layer_index].ncol, sizeof(double)); + NSAssert(layer[layer_index].weightMatrix != NULL, @"Out of memory for weight matrices"); + + int totalOffset = 0; + for (int row = 0; row < layer[layer_index].nrow; row++) { + for (int col = 0; col < layer[layer_index].ncol; col++) { + // Simulate the matrix using row-major ordering + int crossRowOffset = (col + row * (int)layer[layer_index].ncol); + // Now matrix[offset] corresponds to M[row, col] + totalOffset = crossRowOffset + crossLayerOffset; + layer[layer_index].weightMatrix[crossRowOffset] = wts[totalOffset]; + } + } + + crossLayerOffset = totalOffset + 1; // Adjust offset to the next layer + } else if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) { + layer[layer_index].ncol = layer[layer_index].nrow = [layerConfig[layer_index] unsignedIntegerValue]; + layer[layer_index].weightMatrix = calloc(layer[layer_index].nrow * 4, sizeof(double)); + NSAssert(layer[layer_index].weightMatrix != NULL, @"Out of memory for weight matrices"); + + // the order is gamma, beta, running_mean, running_std + for (int row_index = 0; row_index < layer[layer_index].nrow * 4; ++row_index) { + layer[layer_index].weightMatrix[row_index] = wts[crossLayerOffset + row_index]; + } + crossLayerOffset += layer[layer_index].nrow * 4; + + + } else { + NSString* error = [NSString stringWithFormat:@"Unsupported layer identifyer. Got: %@", + _layersTypes[layer_index]]; + @throw [NSException exceptionWithName:NSInternalInconsistencyException reason:error userInfo:nil]; + } + } + } + + return self; +} + + - (id)init { @throw [NSException exceptionWithName:@"MLPNeuralNet init" reason:@"Use designated initializer, not init" @@ -153,25 +233,50 @@ - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableD vDSP_mtransD((double *)matrix.bytes, 1, &features[numExamples], 1, numFeatures, numExamples); // Forward propagation algorithm - for (int j = 0; j < self.numberOfLayers - 1; j++) { - - // 1. Calculate hidden features for current layer j - vDSP_mmulD(layer[j].weightMatrix, 1, features, 1, &buf[numExamples], 1, layer[j].nrow, numExamples, layer[j].ncol); - - // 2. Add the bias unit in row 0 and propagate features to the next level - vDSP_vfillD(&bias, &buf[0], 1, numExamples); - - memcpy(features, buf, (layer[j].nrow + BIAS_UNIT) * numExamples * sizeof(double)); + for (int layer_index = 0; layer_index < self.numberOfLayers - 1; layer_index++) { + if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) { + //apply BN per example + for (int example_index = 0; example_index < numExamples; ++example_index) { + //standartize x = (x - mean) / std + double* current_example_features_start = &features[numExamples] + example_index; + vDSP_vsubD(layer[layer_index].weightMatrix + 2 * layer[layer_index].ncol, 1, + current_example_features_start, numExamples, + current_example_features_start, numExamples, + layer[layer_index].ncol); + vDSP_vdivD(layer[layer_index].weightMatrix + 3 * layer[layer_index].ncol, 1, + current_example_features_start, numExamples, + current_example_features_start, numExamples, + layer[layer_index].ncol); + + // apply BN compensation x = gamma * x + beta + vDSP_vmulD(layer[layer_index].weightMatrix, 1, + current_example_features_start, numExamples, + current_example_features_start, numExamples, + layer[layer_index].ncol); + vDSP_vaddD(layer[layer_index].weightMatrix + layer[layer_index].ncol, 1, + current_example_features_start, numExamples, + current_example_features_start, numExamples, + layer[layer_index].ncol); + } + } else if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) { + // 1. Calculate hidden features for current layer j + vDSP_mmulD(layer[layer_index].weightMatrix, 1, features, 1, &buf[numExamples], + 1, layer[layer_index].nrow, numExamples, layer[layer_index].ncol); + + // 2. Add the bias unit in row 0 and propagate features to the next level + vDSP_vfillD(&bias, &buf[0], 1, numExamples); + memcpy(features, buf, (layer[layer_index].nrow + BIAS_UNIT) * numExamples * sizeof(double)); + } - // 3. Apply activation function, e.g. logistic func: http://en.wikipedia.org/wiki/Logistic_function - if (self.outputMode == MLPClassification) { - int feature_len = (int)(layer[j].nrow * numExamples); + // 3. Apply activation function + if (self.outputMode == MLPClassification && [_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) { + int feature_len = (int)(layer[layer_index].nrow * numExamples); double one = 1.0; double mone = -1.0; double relu_threshold = ReLU_THR; MLPActivationFunction activation = - (j < self.numberOfLayers - 2) ? self.hiddenActivationFunction : self.outputActivationFunction; + (layer_index < self.numberOfLayers - 2) ? self.hiddenActivationFunction : self.outputActivationFunction; switch (activation) { case MLPSigmoid: @@ -190,16 +295,15 @@ - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableD break; case MLPSoftmax: { - // subtract maximum input to avoid overflow. - double max_input = 0; - vDSP_maxvD(&features[numExamples], 1, &max_input, feature_len); - max_input *= -1; - vDSP_vsaddD(&features[numExamples], 1, &max_input, &features[numExamples], 1, feature_len); - vvexp(&features[numExamples], &features[numExamples], &feature_len); - double sum_exp = 0; - vDSP_sveD(&features[numExamples], 1, &sum_exp, feature_len); - vDSP_vsdivD(&features[numExamples], 1, &sum_exp, &features[numExamples], 1, feature_len); + + for (int example_index = 0; example_index < numExamples; ++example_index) { + double* current_example_features_start = &features[numExamples] + example_index; + + double sum_exp = 0; + vDSP_sveD(current_example_features_start, numExamples, &sum_exp, feature_len); + vDSP_vsdivD(current_example_features_start, numExamples, &sum_exp, current_example_features_start, numExamples, feature_len); + } break; } case MLPNone: @@ -239,6 +343,29 @@ - (NSString *)description { return [NSString stringWithFormat:@"a %@ network with %d weigths", networkArch, numberOfWeights]; } ++ (NSInteger)countWeights:(NSArray *)layerConfig + layersTypes:(NSMutableArray*)layersTypes { + + NSAssert(layersTypes.count + 1 == layerConfig.count, @"Found inconsistency in layers shapes and layers types"); + + NSInteger count = 0; + for (int layer_index = 0; layer_index < layerConfig.count - 1; layer_index++) { + if ([layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) { + count += ([layerConfig[layer_index] unsignedIntValue] + 1) * [layerConfig[layer_index + 1] unsignedIntValue]; + } else if ([layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) { + count += 4 * [layerConfig[layer_index] unsignedIntValue]; + } else { + + NSString* error = [NSString stringWithFormat:@"Unsupported layer identifyer. Got: %@", + layersTypes[layer_index]]; + NSAssert(false, error); + break; + } + } + + return count; +} + + (NSInteger)countWeights:(NSArray *)layerConfig { NSInteger count = 0; diff --git a/MLPNeuralNetTests/MLPNeuralNetTests.m b/MLPNeuralNetTests/MLPNeuralNetTests.m index 9facb4d..c66126e 100644 --- a/MLPNeuralNetTests/MLPNeuralNetTests.m +++ b/MLPNeuralNetTests/MLPNeuralNetTests.m @@ -46,6 +46,11 @@ @interface MLPNeuralNetTests : XCTestCase { NSArray *layersForReLUSoftmaxModel; MLPNeuralNet *modelWithReLUSoftmax; + NSData *wtsForModelWithBatchNorm; + NSArray *layersForModelWithBatchNorm; + NSMutableArray *layerTypesForModelWithBatchNorm; + MLPNeuralNet *modelForModelWithBatchNorm; + NSData *vector; NSMutableData *prediction; double *assessment; @@ -118,9 +123,10 @@ - (void)setUp { modelWithReLUSig.hiddenActivationFunction = MLPReLU; modelWithReLUSig.outputActivationFunction = MLPSigmoid; - double wtsForeLUSoftmax[] = {0.0, 1.1093217, -0.29420424, 0.0, 0.40102676, 0.048761927, - 0.0, 0.18262321, 0.16701823, 0.0, -0.014809706, 0.81076205}; - wtsForReLUSoftmaxModel = [NSData dataWithBytes:wtsForeLUSoftmax length:sizeof(wtsForeLUSoftmax)]; + double wtsForReLUSoftmax[] = {-0.00333056, -0.29518637, 0.26010591, 0.00627716, -0.63008577, + 0.5226832 , 0.02341191, 0.89141166, -0.66637737, -0.02341191, + -0.19588685, -0.01236533}; + wtsForReLUSoftmaxModel = [NSData dataWithBytes:wtsForReLUSoftmax length:sizeof(wtsForReLUSoftmax)]; layersForReLUSoftmaxModel = [NSArray arrayWithObjects:@2, @2, @2, nil]; modelWithReLUSoftmax = [[MLPNeuralNet alloc] initWithLayerConfig:layersForReLUSoftmaxModel weights:wtsForReLUSoftmaxModel @@ -128,6 +134,25 @@ - (void)setUp { modelWithReLUSoftmax.hiddenActivationFunction = MLPReLU; modelWithReLUSoftmax.outputActivationFunction = MLPSoftmax; + double wtsForModelWithBN[] = {-0.02911046, 0.96149528, -0.30875102, -0.07312316, -0.53453773, + -0.95400345, -0.04936157, -0.60033119, 1.01297891, 0.8972764 , + 0.91061068, 0.95709789, 0.0074056 , 0.00735936, -0.00107296, + 0.38664621, 0.41322696, 0.35796061, 0.53616369, 0.51914299, + 0.56763041, 0.00775698, 0.4242098 , 0.59139675, -0.1122655 , + -0.00775698, -0.75524676, -0.39608201, -0.15460265}; + + wtsForModelWithBatchNorm = [NSData dataWithBytes:wtsForModelWithBN length:sizeof(wtsForModelWithBN)]; + // 2 = input shape; 3 = #neurons in dense layer; 3 = BatchNorm shape; 2 = output shape + layersForModelWithBatchNorm = [NSArray arrayWithObjects:@2, @3, @3, @2, nil]; + layerTypesForModelWithBatchNorm = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerBatchNormalization), @(MLPLayerDense), nil]; + modelForModelWithBatchNorm = [[MLPNeuralNet alloc] initWithLayerConfigAndLayerType:layersForModelWithBatchNorm + weights:wtsForModelWithBatchNorm + layerTypes:layerTypesForModelWithBatchNorm + outputMode:MLPClassification]; + modelForModelWithBatchNorm.hiddenActivationFunction = MLPReLU; + modelForModelWithBatchNorm.outputActivationFunction = MLPSoftmax; + + double features[] = { 1, 1, 1, 0, @@ -344,7 +369,14 @@ - (void)testModelSettingsWithDifferentActivations { - (void)testNumberOfWeigthsByLayerConfig { NSArray *cfg = @[@2, @3, @2, @1]; - XCTAssertEqual([MLPNeuralNet countWeights:cfg], (NSInteger)20); + NSMutableArray *layer_types = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerDense), @(MLPLayerDense), nil]; + XCTAssertEqual([MLPNeuralNet countWeights:cfg + layersTypes:layer_types], (NSInteger)20); + + NSArray *cfg2 = @[@2, @3, @2, @1]; + NSMutableArray *layer_types2 = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerBatchNormalization), @(MLPLayerDense), nil]; + XCTAssertEqual([MLPNeuralNet countWeights:cfg2 + layersTypes:layer_types2], (NSInteger)24); } #pragma mark - Exception tests @@ -377,15 +409,26 @@ - (void)testIncorrectNumberOfWeights { - (void)testSoftmaxOutputLayer { double features[] = {-1, 10}; - vector = [NSData dataWithBytes:features length:sizeof(features)]; - NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double)*2]; + vector = [NSData dataWithBytes:features length:sizeof(features)]; + NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double) * 2]; double* assessmentM2 = (double *)predictionM2.bytes; [modelWithReLUSoftmax predictByFeatureMatrix:vector intoPredictionMatrix:predictionM2]; - XCTAssertEqualWithAccuracy(assessmentM2[0], 0.48606774, 0.0001); - XCTAssertEqualWithAccuracy(assessmentM2[1], 0.51393223, 0.0001); + XCTAssertEqualWithAccuracy(assessmentM2[0], 0.3447236, 0.0001); + XCTAssertEqualWithAccuracy(assessmentM2[1], 0.6552764, 0.0001); +} +-(void)testBatchNormalizationLayer { + double features[] = {-1, 10}; + vector = [NSData dataWithBytes:features length:sizeof(features)]; + + NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double) * 2]; + double* assessmentM2 = (double *)predictionM2.bytes; + + [modelForModelWithBatchNorm predictByFeatureMatrix:vector intoPredictionMatrix:predictionM2]; + XCTAssertEqualWithAccuracy(assessmentM2[0], 0.32947651, 0.0001); + XCTAssertEqualWithAccuracy(assessmentM2[1], 0.67052352, 0.0001); } @end