cgnorthcutt · rwightman · Apr 12, 2019 · Apr 12, 2019 · Apr 16, 2019 · Apr 22, 2019
diff --git a/README.md b/README.md
@@ -4,9 +4,7 @@ Benchmarks for **every** pre-trained model in PyTorch and Keras-Tensorflow. Benc
 
 ## Why this is helpful
 
-Combining Keras and PyTorch benchmarks into a single framework lets researchers decide which platform is best for a given model. For example `resnet` architectures perform better in PyTorch and `inception` architectures perform better in Keras (see below). These benchmarks serve as a standard from which to start new projects or debug current implementations. 
-
-For researchers exploring Keras and PyTorch models, these benchmarks serve as a standard from which to start new projects or debug current implementations. 
+Combining Keras and PyTorch benchmarks into a single framework lets researchers decide which platform is best for a given model. For example `resnet` architectures perform better in PyTorch and `inception` architectures perform better in Keras (see below). These benchmarks serve as a standard from which to start new projects or debug current implementations.
 
 Many researchers struggle with reproducible accuracy benchmarks of pre-trained Keras (Tensorflow) models on ImageNet. Examples of issues are [here1](https://github.com/keras-team/keras/issues/10040), [here2](https://github.com/keras-team/keras/issues/10979), [here3](http://blog.datumbox.com/the-batch-normalization-layer-of-keras-is-broken/), [here4](https://github.com/keras-team/keras/issues/8672), and [here5](https://github.com/keras-team/keras/issues/7848). 
 

diff --git a/imagenet_keras_get_predictions.py b/imagenet_keras_get_predictions.py
@@ -16,6 +16,7 @@
 import numpy as np
 import os
 import sys
+from PIL import Image
 
 # Use PyTorch/torchvision for dataloading (more reliable/faster)
 from torchvision import datasets
@@ -149,11 +150,11 @@ def main(args = parser.parse_args()):
     # Create output directory if it does not exist
     if not os.path.exists(args.output_dir):
         os.makedirs(args.output_dir)
-        
+
     # Grab imagenet data
     val_dataset = datasets.ImageFolder(args.val_dir)
     img_paths, labels = (list(t) for t in zip(*val_dataset.imgs))
-        
+
     # Run forward pass inference on all models for all examples in val set.
     models = keras_models if args.model is None else [args.model]
     for model in models:
@@ -176,6 +177,23 @@ def main(args = parser.parse_args()):
 
 # In[9]:
 
+def crop_center(img, target_size):
+    # target_size is assumed to be in network's order (H, W)
+    w, h = img.size
+    cx = w // 2
+    cy = h // 2
+    left = cx - target_size[1] // 2
+    top = cy - target_size[0] // 2
+    return img.crop((left, top, left + target_size[1], top + target_size[0]))
+
+
+def shortest_edge_scale(img, target_size, scale):
+    # target_size is assumed to be in network's order (H, W)
+    w, h = img.size
+    nw = int(w * target_size[1] / scale) // min((w, h))
+    nh = int(h * target_size[0] / scale) // min((w, h))
+    return img.resize((nw, nh), resample=Image.BILINEAR)
+
 
 def process_model(
     model_name, 
@@ -201,16 +219,31 @@ def process_model(
 
     # Create Keras model
     model = Model(weights='imagenet')
-    
+
     # Preprocessing and Forward pass through validation set.
     probs = []
+    inputs = []
+    batch_size = 64
     for i, img_path in enumerate(img_paths):
-        if i % 32 == 0:
+        img = image.load_img(img_path, target_size=None)
+        img = shortest_edge_scale(img, img_size, 0.875)
+        img = crop_center(img, img_size)
+        img = np.expand_dims(image.img_to_array(img), axis=0)
+        inputs.append(img)
+
+        current_batch = 0
+        if i % (batch_size + 1) == 0:
+            current_batch = batch_size
+        elif i == len(img_paths) - 1:
+            current_batch = len(img_paths) % batch_size
+
+        if current_batch:
+            inputs = np.concatenate(inputs, axis=0)
+            probs.append(model.predict_on_batch(preprocess_model(inputs)))
             print("\r{} completed: {:.2%}".format(model_name, i / len(img_paths)), end="")
             sys.stdout.flush()
-        img = image.load_img(img_path, target_size=img_size)
-        img = np.expand_dims(image.img_to_array(img), axis=0)            
-        probs.append(model.predict(preprocess_model(img)))
+            inputs = []
+
     probs = np.vstack(probs)
     if save_all_probs:
         np.save(wfn_base + "probs.npy", probs.astype(np.float16))

diff --git a/imagenet_pytorch_get_predictions.py b/imagenet_pytorch_get_predictions.py
@@ -92,7 +92,7 @@ def main(args = parser.parse_args()):
     dataloaders = {}
     for img_size in [224, 299]:
         val_transform = transforms.Compose([
-            transforms.Resize(256),
+            transforms.Resize(img_size // 0.875),
             transforms.CenterCrop(img_size),
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
@@ -132,7 +132,7 @@ def process_model(
     model = model.to(device)
     wfn_base = os.path.join(out_dir, model_name + "_pytorch_imagenet_")
     probs, labels = [], []
-    loader = dataloaders[299] if model_name is "inception_v3" else dataloaders[224]
+    loader = dataloaders[299] if model_name == "inception_v3" else dataloaders[224]
 
     # Inference, with no gradient changing
     model.eval() # set model to inference mode (not train mode)