diff --git a/MLPNeuralNet.podspec b/MLPNeuralNet.podspec
index 8a6c78e..9e12f34 100644
--- a/MLPNeuralNet.podspec
+++ b/MLPNeuralNet.podspec
@@ -1,11 +1,11 @@
 Pod::Spec.new do |s|
 
   s.name         = "MLPNeuralNet"
-  s.version      = "1.0.10"
+  s.version      = "1.0.11"
   s.summary      = "Fast multilayer perceptron neural net for iOS and Mac OS X"
   s.license      = { :type => 'BSD' }
   s.author       = { "Mykola Pavlov" => "me@nikolaypavlov.com" }
-  s.source       = { :git => "https://github.com/nikolaypavlov/MLPNeuralNet.git", :tag => "1.0.10" }
+  s.source       = { :git => "https://github.com/alexobednikov/MLPNeuralNet.git", :tag => "1.0.11" }
   s.homepage     = "http://nikolaypavlov.github.io/MLPNeuralNet/"
   s.screenshots  = "http://nikolaypavlov.github.io/MLPNeuralNet/images/500px-Artificial_neural_network.png"
 
diff --git a/MLPNeuralNet/MLPNeuralNet.h b/MLPNeuralNet/MLPNeuralNet.h
index e04e473..bc975a7 100644
--- a/MLPNeuralNet/MLPNeuralNet.h
+++ b/MLPNeuralNet/MLPNeuralNet.h
@@ -40,21 +40,34 @@ typedef enum {
     MLPNone,
 } MLPActivationFunction;
 
+typedef enum {
+    // Dense is the default
+    MLPLayerDense,
+    MLPLayerBatchNormalization,
+} MlPLayerType;
+
 @interface MLPNeuralNet : NSObject
 
 @property (readonly, nonatomic) NSUInteger numberOfLayers;
 @property (readonly, nonatomic) NSUInteger featureVectorSize; // in bytes
 @property (readonly, nonatomic) NSUInteger predictionVectorSize; // in bytes
 @property (readonly, nonatomic) MLPOutput outputMode;
+// sequence of MLPLayerType identifyers; by default all are MLPLayerDense
+@property (readonly, nonatomic) NSMutableArray* layersTypes;
 @property (nonatomic) MLPActivationFunction hiddenActivationFunction;
 @property (nonatomic) MLPActivationFunction outputActivationFunction;
 @property (nonatomic) MLPActivationFunction activationFunction;
 
-// Designated initializer
+// Designated initializers
 - (id)initWithLayerConfig:(NSArray *)layerConfig // of NSNumbers
                   weights:(NSData *)weights      // of double
                outputMode:(MLPOutput)outputMode;
 
+- (id)initWithLayerConfigAndLayerType:(NSArray *)layerConfig // of NSNumbers
+                              weights:(NSData *)weights      // of double
+                           layerTypes:(NSMutableArray*)layerTypes // of MLPLayerType
+                           outputMode:(MLPOutput)outputMode;
+
 // Predicts new examples by feature-vector and copies the prediction into specified buffer
 // Vector and prediction buffers should be allocated to work with double precision
 - (void)predictByFeatureVector:(NSData *)vector intoPredictionVector:(NSMutableData *)prediction;
@@ -64,6 +77,8 @@ typedef enum {
 - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableData *)prediction;
 
 // Number of weights requred for the neural net of this configuration
-+ (NSInteger)countWeights:(NSArray *)layerConfig;
++ (NSInteger)countWeights:(NSArray *)layerConfig
+               layersTypes:(NSMutableArray*)layersTypes;
++ (NSInteger)countWeights:(NSArray *)layerConfig __deprecated_msg("use method countWeights(layerConfig, layerTypes) instead");
 
 @end
diff --git a/MLPNeuralNet/MLPNeuralNet.m b/MLPNeuralNet/MLPNeuralNet.m
index aa97e60..e44d97d 100644
--- a/MLPNeuralNet/MLPNeuralNet.m
+++ b/MLPNeuralNet/MLPNeuralNet.m
@@ -13,15 +13,12 @@
 #define ReLU_THR 0.0
 
 typedef struct {
-    // Number of rows
-    NSInteger nrow;
-    // Number of columns
-    NSInteger ncol;
+    NSInteger nrow;  // Number of rows
+    NSInteger ncol;  // Number of columns
     double *weightMatrix;
 } MLPLayer;
 
 @interface MLPNeuralNet () {
-    
     NSMutableData *hiddenFeatures;
     NSMutableData *buffer;
     // MLPLayer structures
@@ -40,17 +37,24 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig
                outputMode:(MLPOutput)outputMode {
     self = [super init];
     if (self) {
-        if ([self.class countWeights:layerConfig] != weights.length / sizeof(double)) {
+        _numberOfLayers = layerConfig.count;
+        
+        // set MLPLayerDense identifier for each layer except the first one
+        _layersTypes = [[NSMutableArray alloc] initWithCapacity:_numberOfLayers];
+        for (NSUInteger n = 0; n < _numberOfLayers - 1; ++n) {
+            [_layersTypes addObject:[NSNumber numberWithInteger:MLPLayerDense]];
+        }
+        
+        if ([self.class countWeights:layerConfig layersTypes:_layersTypes] != weights.length / sizeof(double)) {
             @throw [NSException exceptionWithName:NSInternalInconsistencyException
                                            reason:@"Number of weights doesn't match to configuration"
                                          userInfo:nil];
         }
         
-        _numberOfLayers = layerConfig.count;
         _featureVectorSize = [layerConfig[0] unsignedIntegerValue] * sizeof(double);
         _predictionVectorSize = [layerConfig.lastObject unsignedIntegerValue] * sizeof(double);
         _outputMode = outputMode;
-        
+
         // Allocate buffers of the maximum possible vector size, there should be a place for bias unit also.
         unsigned maxVectorLength = [[layerConfig valueForKeyPath:@"@max.self"] unsignedIntValue] + BIAS_UNIT;
         hiddenFeatures = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)];
@@ -60,7 +64,7 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig
         // the input layer, so the total size is equal to number of layers - 1.
         arrayOfLayers = [NSMutableData dataWithLength:(_numberOfLayers - 1) * sizeof(MLPLayer)];
         
-        // Allocate memory for the wigth matrices and initialize them.
+        // Allocate memory for the weigth matrices and initialize them.
         MLPLayer *layer = (MLPLayer *)arrayOfLayers.bytes;
         double *wts = (double *)weights.bytes;
         int crossLayerOffset = 0; // An offset between the weight matrices of different layers
@@ -91,6 +95,82 @@ - (id)initWithLayerConfig:(NSArray *)layerConfig
     return self;
 }
 
+- (id)initWithLayerConfigAndLayerType:(NSArray *)layerConfig // of NSNumbers
+                              weights:(NSData *)weights      // of double
+                           layerTypes:(NSMutableArray*)layerTypes // of MLPLayerType
+                           outputMode:(MLPOutput)outputMode; {
+    self = [super init];
+    if (self) {
+        _layersTypes = layerTypes;
+        if ([self.class countWeights:layerConfig layersTypes:_layersTypes] != weights.length / sizeof(double)) {
+            @throw [NSException exceptionWithName:NSInternalInconsistencyException
+                                           reason:@"Number of weights doesn't match to configuration"
+                                         userInfo:nil];
+        }
+        
+        _numberOfLayers = layerConfig.count;
+        _featureVectorSize = [layerConfig[0] unsignedIntegerValue] * sizeof(double);
+        _predictionVectorSize = [layerConfig.lastObject unsignedIntegerValue] * sizeof(double);
+        _outputMode = outputMode;
+        
+        // Allocate buffers of the maximum possible vector size, there should be a place for bias unit also.
+        unsigned maxVectorLength = [[layerConfig valueForKeyPath:@"@max.self"] unsignedIntValue] + BIAS_UNIT;
+        hiddenFeatures = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)];
+        buffer = [NSMutableData dataWithLength:maxVectorLength * sizeof(double)];
+        
+        // Allocate memory for layers. Note that we don't need a matrix for
+        // the input layer, so the total size is equal to number of layers - 1.
+        arrayOfLayers = [NSMutableData dataWithLength:(_numberOfLayers - 1) * sizeof(MLPLayer)];
+        
+        // Allocate memory for the weigth matrices and initialize them.
+        MLPLayer *layer = (MLPLayer *)arrayOfLayers.bytes;
+        double *wts = (double *)weights.bytes;
+        int crossLayerOffset = 0; // An offset between the weight matrices of different layers
+        for (int layer_index = 0; layer_index < _numberOfLayers - 1; layer_index++) { // Recall we don't need a matrix for the input layer
+            if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) {
+                // If network has X units in layer j, and Y units in layer j+1, then weight matrix for layer j
+                // will be of demension: [ Y x (X+1) ]
+                layer[layer_index].nrow = [layerConfig[layer_index+1] unsignedIntegerValue];
+                layer[layer_index].ncol = [layerConfig[layer_index] unsignedIntegerValue] + 1; // +1 for BIAS_UNIT
+                layer[layer_index].weightMatrix = calloc(layer[layer_index].nrow * layer[layer_index].ncol, sizeof(double));
+                NSAssert(layer[layer_index].weightMatrix != NULL, @"Out of memory for weight matrices");
+                
+                int totalOffset = 0;
+                for (int row = 0; row < layer[layer_index].nrow; row++) {
+                    for (int col = 0; col < layer[layer_index].ncol; col++) {
+                        // Simulate the matrix using row-major ordering
+                        int crossRowOffset = (col + row * (int)layer[layer_index].ncol);
+                        // Now matrix[offset] corresponds to M[row, col]
+                        totalOffset = crossRowOffset + crossLayerOffset;
+                        layer[layer_index].weightMatrix[crossRowOffset] = wts[totalOffset];
+                    }
+                }
+                
+                crossLayerOffset = totalOffset + 1; // Adjust offset to the next layer
+            } else if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) {
+                layer[layer_index].ncol = layer[layer_index].nrow = [layerConfig[layer_index] unsignedIntegerValue];
+                layer[layer_index].weightMatrix = calloc(layer[layer_index].nrow * 4, sizeof(double));
+                NSAssert(layer[layer_index].weightMatrix != NULL, @"Out of memory for weight matrices");
+                
+                // the order is gamma, beta, running_mean, running_std
+                for (int row_index = 0; row_index < layer[layer_index].nrow * 4; ++row_index) {
+                    layer[layer_index].weightMatrix[row_index] = wts[crossLayerOffset + row_index];
+                }
+                crossLayerOffset += layer[layer_index].nrow * 4;
+                
+
+            } else {
+                NSString* error = [NSString stringWithFormat:@"Unsupported layer identifyer. Got: %@",
+                                   _layersTypes[layer_index]];
+                @throw [NSException exceptionWithName:NSInternalInconsistencyException reason:error userInfo:nil];
+            }
+        }
+    }
+    
+    return self;
+}
+
+
 - (id)init {
     @throw [NSException exceptionWithName:@"MLPNeuralNet init"
                                    reason:@"Use designated initializer, not init"
@@ -153,25 +233,50 @@ - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableD
     vDSP_mtransD((double *)matrix.bytes, 1, &features[numExamples], 1, numFeatures, numExamples);
     
     // Forward propagation algorithm
-    for (int j = 0; j < self.numberOfLayers - 1; j++) {
-        
-        // 1. Calculate hidden features for current layer j
-        vDSP_mmulD(layer[j].weightMatrix, 1, features, 1, &buf[numExamples], 1, layer[j].nrow, numExamples, layer[j].ncol);
-        
-        // 2. Add the bias unit in row 0 and propagate features to the next level
-        vDSP_vfillD(&bias, &buf[0], 1, numExamples);
-        
-        memcpy(features, buf, (layer[j].nrow + BIAS_UNIT) * numExamples * sizeof(double));
+    for (int layer_index = 0; layer_index < self.numberOfLayers - 1; layer_index++) {
+        if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) {
+            //apply BN per example
+            for (int example_index = 0; example_index < numExamples; ++example_index) {
+                //standartize x = (x - mean) / std
+                double* current_example_features_start = &features[numExamples] + example_index;
+                vDSP_vsubD(layer[layer_index].weightMatrix + 2 * layer[layer_index].ncol, 1,
+                           current_example_features_start, numExamples,
+                           current_example_features_start, numExamples,
+                           layer[layer_index].ncol);
+                vDSP_vdivD(layer[layer_index].weightMatrix + 3 * layer[layer_index].ncol, 1,
+                           current_example_features_start, numExamples,
+                           current_example_features_start, numExamples,
+                           layer[layer_index].ncol);
+                
+                // apply BN compensation x = gamma * x + beta
+                vDSP_vmulD(layer[layer_index].weightMatrix, 1,
+                           current_example_features_start, numExamples,
+                           current_example_features_start, numExamples,
+                           layer[layer_index].ncol);
+                vDSP_vaddD(layer[layer_index].weightMatrix + layer[layer_index].ncol, 1,
+                           current_example_features_start, numExamples,
+                           current_example_features_start, numExamples,
+                           layer[layer_index].ncol);
+            }
+        } else if ([_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) {
+            // 1. Calculate hidden features for current layer j
+            vDSP_mmulD(layer[layer_index].weightMatrix, 1, features, 1, &buf[numExamples],
+                       1, layer[layer_index].nrow, numExamples, layer[layer_index].ncol);
+            
+            // 2. Add the bias unit in row 0 and propagate features to the next level
+            vDSP_vfillD(&bias, &buf[0], 1, numExamples);
+            memcpy(features, buf, (layer[layer_index].nrow + BIAS_UNIT) * numExamples * sizeof(double));
+        }
         
-        // 3. Apply activation function, e.g. logistic func: http://en.wikipedia.org/wiki/Logistic_function
-        if (self.outputMode == MLPClassification) {
-            int feature_len = (int)(layer[j].nrow * numExamples);
+        // 3. Apply activation function
+        if (self.outputMode == MLPClassification && [_layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) {
+            int feature_len = (int)(layer[layer_index].nrow * numExamples);
             double one = 1.0;
             double mone = -1.0;
             double relu_threshold = ReLU_THR;
             
             MLPActivationFunction activation =
-            (j < self.numberOfLayers - 2) ? self.hiddenActivationFunction : self.outputActivationFunction;
+            (layer_index < self.numberOfLayers - 2) ? self.hiddenActivationFunction : self.outputActivationFunction;
             
             switch (activation) {
                 case MLPSigmoid:
@@ -190,16 +295,15 @@ - (void)predictByFeatureMatrix:(NSData *)matrix intoPredictionMatrix:(NSMutableD
                     break;
                     
                 case MLPSoftmax: {
-                    // subtract maximum input to avoid overflow.
-                    double max_input = 0;
-                    vDSP_maxvD(&features[numExamples], 1, &max_input, feature_len);
-                    max_input *= -1;
-                    vDSP_vsaddD(&features[numExamples], 1, &max_input, &features[numExamples], 1, feature_len);
-                    
                     vvexp(&features[numExamples], &features[numExamples], &feature_len);
-                    double sum_exp = 0;
-                    vDSP_sveD(&features[numExamples], 1, &sum_exp, feature_len);
-                    vDSP_vsdivD(&features[numExamples], 1, &sum_exp, &features[numExamples], 1, feature_len);
+                    
+                    for (int example_index = 0; example_index < numExamples; ++example_index) {
+                        double* current_example_features_start = &features[numExamples] + example_index;
+                        
+                        double sum_exp = 0;
+                        vDSP_sveD(current_example_features_start, numExamples, &sum_exp, feature_len);
+                        vDSP_vsdivD(current_example_features_start, numExamples, &sum_exp, current_example_features_start, numExamples, feature_len);
+                    }
                     break;
                 }
                 case MLPNone:
@@ -239,6 +343,29 @@ - (NSString *)description {
     return [NSString stringWithFormat:@"a %@ network with %d weigths", networkArch, numberOfWeights];
 }
 
++ (NSInteger)countWeights:(NSArray *)layerConfig
+               layersTypes:(NSMutableArray*)layersTypes {
+
+    NSAssert(layersTypes.count + 1 == layerConfig.count, @"Found inconsistency in layers shapes and layers types");
+    
+    NSInteger count = 0;
+    for (int layer_index = 0; layer_index < layerConfig.count - 1; layer_index++) {
+        if ([layersTypes[layer_index] isEqualToNumber:@(MLPLayerDense)]) {
+                count += ([layerConfig[layer_index] unsignedIntValue] + 1) * [layerConfig[layer_index + 1] unsignedIntValue];
+        } else if ([layersTypes[layer_index] isEqualToNumber:@(MLPLayerBatchNormalization)]) {
+                count += 4 * [layerConfig[layer_index] unsignedIntValue];
+        } else {
+
+                NSString* error = [NSString stringWithFormat:@"Unsupported layer identifyer. Got: %@",
+                                   layersTypes[layer_index]];
+                NSAssert(false, error);
+                break;
+        }
+    }
+    
+    return count;
+}
+
 + (NSInteger)countWeights:(NSArray *)layerConfig {
     NSInteger count = 0;
     
diff --git a/MLPNeuralNetTests/MLPNeuralNetTests.m b/MLPNeuralNetTests/MLPNeuralNetTests.m
index 9facb4d..c66126e 100644
--- a/MLPNeuralNetTests/MLPNeuralNetTests.m
+++ b/MLPNeuralNetTests/MLPNeuralNetTests.m
@@ -46,6 +46,11 @@ @interface MLPNeuralNetTests : XCTestCase {
     NSArray *layersForReLUSoftmaxModel;
     MLPNeuralNet *modelWithReLUSoftmax;
     
+    NSData *wtsForModelWithBatchNorm;
+    NSArray *layersForModelWithBatchNorm;
+    NSMutableArray *layerTypesForModelWithBatchNorm;
+    MLPNeuralNet *modelForModelWithBatchNorm;
+    
     NSData *vector;
     NSMutableData *prediction;
     double *assessment;
@@ -118,9 +123,10 @@ - (void)setUp {
     modelWithReLUSig.hiddenActivationFunction = MLPReLU;
     modelWithReLUSig.outputActivationFunction = MLPSigmoid;
     
-    double wtsForeLUSoftmax[] = {0.0, 1.1093217, -0.29420424, 0.0, 0.40102676, 0.048761927,
-                                 0.0, 0.18262321, 0.16701823, 0.0, -0.014809706, 0.81076205};
-    wtsForReLUSoftmaxModel = [NSData dataWithBytes:wtsForeLUSoftmax length:sizeof(wtsForeLUSoftmax)];
+    double wtsForReLUSoftmax[] = {-0.00333056, -0.29518637,  0.26010591,  0.00627716, -0.63008577,
+                                   0.5226832 ,  0.02341191,  0.89141166, -0.66637737, -0.02341191,
+                                  -0.19588685, -0.01236533};
+    wtsForReLUSoftmaxModel = [NSData dataWithBytes:wtsForReLUSoftmax length:sizeof(wtsForReLUSoftmax)];
     layersForReLUSoftmaxModel = [NSArray arrayWithObjects:@2, @2, @2, nil];
     modelWithReLUSoftmax = [[MLPNeuralNet alloc] initWithLayerConfig:layersForReLUSoftmaxModel
                                                              weights:wtsForReLUSoftmaxModel
@@ -128,6 +134,25 @@ - (void)setUp {
     modelWithReLUSoftmax.hiddenActivationFunction = MLPReLU;
     modelWithReLUSoftmax.outputActivationFunction = MLPSoftmax;
 
+    double wtsForModelWithBN[] = {-0.02911046,  0.96149528, -0.30875102, -0.07312316, -0.53453773,
+                                  -0.95400345, -0.04936157, -0.60033119,  1.01297891,  0.8972764 ,
+                                   0.91061068,  0.95709789,  0.0074056 ,  0.00735936, -0.00107296,
+                                   0.38664621,  0.41322696,  0.35796061,  0.53616369,  0.51914299,
+                                   0.56763041,  0.00775698,  0.4242098 ,  0.59139675, -0.1122655 ,
+                                  -0.00775698, -0.75524676, -0.39608201, -0.15460265};
+    
+    wtsForModelWithBatchNorm = [NSData dataWithBytes:wtsForModelWithBN length:sizeof(wtsForModelWithBN)];
+    // 2 = input shape; 3 = #neurons in dense layer; 3 = BatchNorm shape; 2 = output shape
+    layersForModelWithBatchNorm = [NSArray arrayWithObjects:@2, @3, @3, @2, nil];
+    layerTypesForModelWithBatchNorm = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerBatchNormalization), @(MLPLayerDense), nil];
+    modelForModelWithBatchNorm = [[MLPNeuralNet alloc] initWithLayerConfigAndLayerType:layersForModelWithBatchNorm
+                                                                               weights:wtsForModelWithBatchNorm
+                                                                            layerTypes:layerTypesForModelWithBatchNorm
+                                                                            outputMode:MLPClassification];
+    modelForModelWithBatchNorm.hiddenActivationFunction = MLPReLU;
+    modelForModelWithBatchNorm.outputActivationFunction = MLPSoftmax;
+
+    
     double features[] = {
         1, 1,
         1, 0,
@@ -344,7 +369,14 @@ - (void)testModelSettingsWithDifferentActivations {
 
 - (void)testNumberOfWeigthsByLayerConfig {
     NSArray *cfg = @[@2, @3, @2, @1];
-    XCTAssertEqual([MLPNeuralNet countWeights:cfg], (NSInteger)20);
+    NSMutableArray *layer_types = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerDense), @(MLPLayerDense), nil];
+    XCTAssertEqual([MLPNeuralNet countWeights:cfg
+                                  layersTypes:layer_types], (NSInteger)20);
+    
+    NSArray *cfg2 = @[@2, @3, @2, @1];
+    NSMutableArray *layer_types2 = [NSMutableArray arrayWithObjects:@(MLPLayerDense), @(MLPLayerBatchNormalization), @(MLPLayerDense), nil];
+    XCTAssertEqual([MLPNeuralNet countWeights:cfg2
+                                  layersTypes:layer_types2], (NSInteger)24);
 }
 
 #pragma mark - Exception tests
@@ -377,15 +409,26 @@ - (void)testIncorrectNumberOfWeights {
 
 - (void)testSoftmaxOutputLayer {
     double features[] = {-1, 10};
-    vector = [NSData dataWithBytes:features length:sizeof(features)];
     
-    NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double)*2];
+    vector = [NSData dataWithBytes:features length:sizeof(features)];
+    NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double) * 2];
     double* assessmentM2 = (double *)predictionM2.bytes;
     
     [modelWithReLUSoftmax predictByFeatureMatrix:vector intoPredictionMatrix:predictionM2];
-    XCTAssertEqualWithAccuracy(assessmentM2[0], 0.48606774, 0.0001);
-    XCTAssertEqualWithAccuracy(assessmentM2[1], 0.51393223, 0.0001);
+    XCTAssertEqualWithAccuracy(assessmentM2[0], 0.3447236, 0.0001);
+    XCTAssertEqualWithAccuracy(assessmentM2[1], 0.6552764, 0.0001);
+}
 
+-(void)testBatchNormalizationLayer {
+    double features[] = {-1, 10};
+    vector = [NSData dataWithBytes:features length:sizeof(features)];
+    
+    NSMutableData* predictionM2 = [NSMutableData dataWithLength:sizeof(double) * 2];
+    double* assessmentM2 = (double *)predictionM2.bytes;
+    
+    [modelForModelWithBatchNorm predictByFeatureMatrix:vector intoPredictionMatrix:predictionM2];
+    XCTAssertEqualWithAccuracy(assessmentM2[0], 0.32947651, 0.0001);
+    XCTAssertEqualWithAccuracy(assessmentM2[1], 0.67052352, 0.0001);
 }
 
 @end