fix

TropComplique · Aug 24, 2018 · 545ec4f · 545ec4f
1 parent 9c4f6cb
commit 545ec4f
Show file tree

Hide file tree

Showing 7 changed files with 28 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ Examples of face detections:
 But you can improve its performance if you upscale images before feeding them to the network.
 2. You can see how anchor densification works in `visualize_densified_anchor_boxes.ipynb`.
 3. You can see how my data augmentation works in `test_input_pipeline.ipynb`.
+4. The speed on a CPU is **~30 ms/image** (image size is 1024x768).
 
 ## How to train
 

diff --git a/checkpoint-step-240000 (1).tar.gz b/checkpoint-step-240000 (1).tar.gz
diff --git a/predict_for_FDDB.ipynb b/predict_for_FDDB.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,14 +28,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "IMAGES_DIR = '/home/gpu2/hdd/dan/FDDB/originalPics/'\n",
     "ANNOTATIONS_PATH = '/home/gpu2/hdd/dan/FDDB/FDDB-folds/'\n",
     "RESULT_DIR = 'result/'\n",
-    "MODEL_PATH = 'model-step-240000.pb'"
+    "MODEL_PATH = 'model.pb'"
    ]
   },
   {
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +71,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -83,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -120,7 +120,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,17 +129,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 2845/2845 [00:15<00:00, 181.58it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "predictions = []\n",
     "for n in tqdm(images_to_use):\n",
@@ -152,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -175,17 +167,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 2845/2845 [00:00<00:00, 16142.70it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "for n in tqdm(images_to_use):\n",
     "    p = os.path.join(RESULT_DIR, 'images', n + '.jpg')\n",

diff --git a/save.py b/save.py
@@ -32,7 +32,7 @@
 
 def serving_input_receiver_fn():
     images = tf.placeholder(dtype=tf.uint8, shape=[None, HEIGHT, WIDTH, 3], name='image_tensor')
-    features = {'images': tf.transpose(tf.to_float(images)*(1.0/255.0), perm=[0, 3, 1, 2])}
+    features = {'images': tf.to_float(images)*(1.0/255.0)}
     return tf.estimator.export.ServingInputReceiver(features, {'images': images})
 
 

diff --git a/src/anchor_generator.py b/src/anchor_generator.py
@@ -41,7 +41,7 @@ def __call__(self, image_features, image_size):
         """
         Arguments:
             image_features: a list of float tensors where the ith tensor
-                has shape [batch, channels_i, height_i, width_i].
+                has shape [batch, height_i, width_i, channels_i].
             image_size: a tuple of integers (int tensors with shape []) (width, height).
         Returns:
             a float tensor with shape [num_anchor, 4],
@@ -50,9 +50,9 @@ def __call__(self, image_features, image_size):
         feature_map_shape_list = []
         for feature_map in image_features:
 
-            height_i, width_i = feature_map.shape.as_list()[2:]
+            height_i, width_i = feature_map.shape.as_list()[1:3]
             if height_i is None or width_i is None:
-                height_i, width_i = tf.shape(feature_map)[2], tf.shape(feature_map)[3]
+                height_i, width_i = tf.shape(feature_map)[1], tf.shape(feature_map)[2]
 
             feature_map_shape_list.append((height_i, width_i))
         image_width, image_height = image_size

diff --git a/src/detector.py b/src/detector.py
@@ -301,5 +301,5 @@ def _add_box_predictions(self, feature_maps):
                 y = tf.reshape(y, [batch_size, height_i, width_i, num_predictions_per_location, 2])
                 class_predictions_with_background[i] = tf.reshape(y, tf.stack([batch_size, num_anchors_on_feature_map, 2]))
 
-            self.box_encodings = tf.concat(box_encodings, axis=3)
-            self.class_predictions_with_background = tf.concat(class_predictions_with_background, axis=3)
+            self.box_encodings = tf.concat(box_encodings, axis=1)
+            self.class_predictions_with_background = tf.concat(class_predictions_with_background, axis=1)
diff --git a/try_detector.ipynb b/try_detector.ipynb
@@ -16,6 +16,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n",
+    "\n",
     "import numpy as np\n",
     "from PIL import Image, ImageDraw\n",
     "import os\n",
@@ -31,7 +34,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "MODEL_PATH = 'model-step-240000.pb'\n",
+    "MODEL_PATH = 'model.pb'\n",
     "face_detector = FaceDetector(MODEL_PATH, gpu_memory_fraction=0.25, visible_device_list='0')"
    ]
   },
@@ -48,7 +51,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path = '/home/gpu2/hdd/dan/WIDER/train/images/48_Parachutist_Paratrooper_Parachutist_Paratrooper_48_733.jpg'\n",
+    "path = '/home/gpu2/hdd/dan/WIDER/WIDER_train/images/48--Parachutist_Paratrooper/48_Parachutist_Paratrooper_Parachutist_Paratrooper_48_972.jpg'\n",
+    "\n",
     "image_array = cv2.imread(path)\n",
     "image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)\n",
     "image = Image.fromarray(image_array)\n",