diff --git a/HandPose/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/HandPose/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 000000000..2fd64429b --- /dev/null +++ b/HandPose/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/HandPose/CMakeLists.txt b/HandPose/CMakeLists.txt new file mode 100644 index 000000000..ddf474e34 --- /dev/null +++ b/HandPose/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 2.8.12) + +PROJECT(handPose) + +find_package( OpenCV REQUIRED ) + +include_directories( ${OpenCV_INCLUDE_DIRS}) + +MACRO(add_example name) + ADD_EXECUTABLE(${name} ${name}.cpp) + TARGET_LINK_LIBRARIES(${name} ${OpenCV_LIBS}) +ENDMACRO() + + +add_example(handPoseImage) +add_example(handPoseVideo) diff --git a/HandPose/getModels.sh b/HandPose/getModels.sh new file mode 100755 index 000000000..c444e94bd --- /dev/null +++ b/HandPose/getModels.sh @@ -0,0 +1,9 @@ +# ------------------------- BODY, FACE AND HAND MODELS ------------------------- +# Downloading body pose (COCO and MPI), face and hand models +OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/" +HAND_FOLDER="hand/" + +# "------------------------- HAND MODELS -------------------------" +# Hand +HAND_MODEL=$HAND_FOLDER"pose_iter_102000.caffemodel" +wget -c ${OPENPOSE_URL}${HAND_MODEL} -P ${HAND_FOLDER} diff --git a/HandPose/hand.jpg b/HandPose/hand.jpg new file mode 100644 index 000000000..26ec755a3 Binary files /dev/null and b/HandPose/hand.jpg differ diff --git a/HandPose/hand/pose_deploy.prototxt b/HandPose/hand/pose_deploy.prototxt new file mode 100644 index 000000000..3554c3a02 --- /dev/null +++ b/HandPose/hand/pose_deploy.prototxt @@ -0,0 +1,1756 @@ +input: "image" +input_dim: 1 # Original: 2 +input_dim: 3 # It crashes if not left to 3 +input_dim: 1 # Original: 368 +input_dim: 1 # Original: 368 +layer { + name: "conv1_1" + type: "Convolution" + bottom: "image" + top: "conv1_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1_stage1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1_stage1" + top: "conv2_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2_stage1" + type: "Pooling" + bottom: "conv2_2" + top: "pool2_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2_stage1" + top: "conv3_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu3_4" + type: "ReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "pool3_stage1" + type: "Pooling" + bottom: "conv3_4" + top: "pool3_stage1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3_stage1" + top: "conv4_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "conv4_4" + type: "Convolution" + bottom: "conv4_3" + top: "conv4_4" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu4_4" + type: "ReLU" + bottom: "conv4_4" + top: "conv4_4" +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "conv4_4" + top: "conv5_1" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "conv5_3_CPM" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu5_4_stage1_3" + type: "ReLU" + bottom: "conv5_3_CPM" + top: "conv5_3_CPM" +} +layer { + name: "conv6_1_CPM" + type: "Convolution" + bottom: "conv5_3_CPM" + top: "conv6_1_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "relu6_4_stage1_1" + type: "ReLU" + bottom: "conv6_1_CPM" + top: "conv6_1_CPM" +} +layer { + name: "conv6_2_CPM" + type: "Convolution" + bottom: "conv6_1_CPM" + top: "conv6_2_CPM" + param { + lr_mult: 1.0 + decay_mult: 1 + } + param { + lr_mult: 2.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "concat_stage2" + type: "Concat" + bottom: "conv6_2_CPM" + bottom: "conv5_3_CPM" + top: "concat_stage2" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage2" + type: "Convolution" + bottom: "concat_stage2" + top: "Mconv1_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_2_stage2_1" + type: "ReLU" + bottom: "Mconv1_stage2" + top: "Mconv1_stage2" +} +layer { + name: "Mconv2_stage2" + type: "Convolution" + bottom: "Mconv1_stage2" + top: "Mconv2_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_3_stage2_2" + type: "ReLU" + bottom: "Mconv2_stage2" + top: "Mconv2_stage2" +} +layer { + name: "Mconv3_stage2" + type: "Convolution" + bottom: "Mconv2_stage2" + top: "Mconv3_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_4_stage2_3" + type: "ReLU" + bottom: "Mconv3_stage2" + top: "Mconv3_stage2" +} +layer { + name: "Mconv4_stage2" + type: "Convolution" + bottom: "Mconv3_stage2" + top: "Mconv4_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_5_stage2_4" + type: "ReLU" + bottom: "Mconv4_stage2" + top: "Mconv4_stage2" +} +layer { + name: "Mconv5_stage2" + type: "Convolution" + bottom: "Mconv4_stage2" + top: "Mconv5_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_6_stage2_5" + type: "ReLU" + bottom: "Mconv5_stage2" + top: "Mconv5_stage2" +} +layer { + name: "Mconv6_stage2" + type: "Convolution" + bottom: "Mconv5_stage2" + top: "Mconv6_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_7_stage2_6" + type: "ReLU" + bottom: "Mconv6_stage2" + top: "Mconv6_stage2" +} +layer { + name: "Mconv7_stage2" + type: "Convolution" + bottom: "Mconv6_stage2" + top: "Mconv7_stage2" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "concat_stage3" + type: "Concat" + bottom: "Mconv7_stage2" + bottom: "conv5_3_CPM" + top: "concat_stage3" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage3" + type: "Convolution" + bottom: "concat_stage3" + top: "Mconv1_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_2_stage3_1" + type: "ReLU" + bottom: "Mconv1_stage3" + top: "Mconv1_stage3" +} +layer { + name: "Mconv2_stage3" + type: "Convolution" + bottom: "Mconv1_stage3" + top: "Mconv2_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_3_stage3_2" + type: "ReLU" + bottom: "Mconv2_stage3" + top: "Mconv2_stage3" +} +layer { + name: "Mconv3_stage3" + type: "Convolution" + bottom: "Mconv2_stage3" + top: "Mconv3_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_4_stage3_3" + type: "ReLU" + bottom: "Mconv3_stage3" + top: "Mconv3_stage3" +} +layer { + name: "Mconv4_stage3" + type: "Convolution" + bottom: "Mconv3_stage3" + top: "Mconv4_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_5_stage3_4" + type: "ReLU" + bottom: "Mconv4_stage3" + top: "Mconv4_stage3" +} +layer { + name: "Mconv5_stage3" + type: "Convolution" + bottom: "Mconv4_stage3" + top: "Mconv5_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_6_stage3_5" + type: "ReLU" + bottom: "Mconv5_stage3" + top: "Mconv5_stage3" +} +layer { + name: "Mconv6_stage3" + type: "Convolution" + bottom: "Mconv5_stage3" + top: "Mconv6_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_7_stage3_6" + type: "ReLU" + bottom: "Mconv6_stage3" + top: "Mconv6_stage3" +} +layer { + name: "Mconv7_stage3" + type: "Convolution" + bottom: "Mconv6_stage3" + top: "Mconv7_stage3" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "concat_stage4" + type: "Concat" + bottom: "Mconv7_stage3" + bottom: "conv5_3_CPM" + top: "concat_stage4" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage4" + type: "Convolution" + bottom: "concat_stage4" + top: "Mconv1_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_2_stage4_1" + type: "ReLU" + bottom: "Mconv1_stage4" + top: "Mconv1_stage4" +} +layer { + name: "Mconv2_stage4" + type: "Convolution" + bottom: "Mconv1_stage4" + top: "Mconv2_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_3_stage4_2" + type: "ReLU" + bottom: "Mconv2_stage4" + top: "Mconv2_stage4" +} +layer { + name: "Mconv3_stage4" + type: "Convolution" + bottom: "Mconv2_stage4" + top: "Mconv3_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_4_stage4_3" + type: "ReLU" + bottom: "Mconv3_stage4" + top: "Mconv3_stage4" +} +layer { + name: "Mconv4_stage4" + type: "Convolution" + bottom: "Mconv3_stage4" + top: "Mconv4_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_5_stage4_4" + type: "ReLU" + bottom: "Mconv4_stage4" + top: "Mconv4_stage4" +} +layer { + name: "Mconv5_stage4" + type: "Convolution" + bottom: "Mconv4_stage4" + top: "Mconv5_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_6_stage4_5" + type: "ReLU" + bottom: "Mconv5_stage4" + top: "Mconv5_stage4" +} +layer { + name: "Mconv6_stage4" + type: "Convolution" + bottom: "Mconv5_stage4" + top: "Mconv6_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_7_stage4_6" + type: "ReLU" + bottom: "Mconv6_stage4" + top: "Mconv6_stage4" +} +layer { + name: "Mconv7_stage4" + type: "Convolution" + bottom: "Mconv6_stage4" + top: "Mconv7_stage4" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "concat_stage5" + type: "Concat" + bottom: "Mconv7_stage4" + bottom: "conv5_3_CPM" + top: "concat_stage5" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage5" + type: "Convolution" + bottom: "concat_stage5" + top: "Mconv1_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_2_stage5_1" + type: "ReLU" + bottom: "Mconv1_stage5" + top: "Mconv1_stage5" +} +layer { + name: "Mconv2_stage5" + type: "Convolution" + bottom: "Mconv1_stage5" + top: "Mconv2_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_3_stage5_2" + type: "ReLU" + bottom: "Mconv2_stage5" + top: "Mconv2_stage5" +} +layer { + name: "Mconv3_stage5" + type: "Convolution" + bottom: "Mconv2_stage5" + top: "Mconv3_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_4_stage5_3" + type: "ReLU" + bottom: "Mconv3_stage5" + top: "Mconv3_stage5" +} +layer { + name: "Mconv4_stage5" + type: "Convolution" + bottom: "Mconv3_stage5" + top: "Mconv4_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_5_stage5_4" + type: "ReLU" + bottom: "Mconv4_stage5" + top: "Mconv4_stage5" +} +layer { + name: "Mconv5_stage5" + type: "Convolution" + bottom: "Mconv4_stage5" + top: "Mconv5_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_6_stage5_5" + type: "ReLU" + bottom: "Mconv5_stage5" + top: "Mconv5_stage5" +} +layer { + name: "Mconv6_stage5" + type: "Convolution" + bottom: "Mconv5_stage5" + top: "Mconv6_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_7_stage5_6" + type: "ReLU" + bottom: "Mconv6_stage5" + top: "Mconv6_stage5" +} +layer { + name: "Mconv7_stage5" + type: "Convolution" + bottom: "Mconv6_stage5" + top: "Mconv7_stage5" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "concat_stage6" + type: "Concat" + bottom: "Mconv7_stage5" + bottom: "conv5_3_CPM" + top: "concat_stage6" + concat_param { + axis: 1 + } +} +layer { + name: "Mconv1_stage6" + type: "Convolution" + bottom: "concat_stage6" + top: "Mconv1_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_2_stage6_1" + type: "ReLU" + bottom: "Mconv1_stage6" + top: "Mconv1_stage6" +} +layer { + name: "Mconv2_stage6" + type: "Convolution" + bottom: "Mconv1_stage6" + top: "Mconv2_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_3_stage6_2" + type: "ReLU" + bottom: "Mconv2_stage6" + top: "Mconv2_stage6" +} +layer { + name: "Mconv3_stage6" + type: "Convolution" + bottom: "Mconv2_stage6" + top: "Mconv3_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_4_stage6_3" + type: "ReLU" + bottom: "Mconv3_stage6" + top: "Mconv3_stage6" +} +layer { + name: "Mconv4_stage6" + type: "Convolution" + bottom: "Mconv3_stage6" + top: "Mconv4_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_5_stage6_4" + type: "ReLU" + bottom: "Mconv4_stage6" + top: "Mconv4_stage6" +} +layer { + name: "Mconv5_stage6" + type: "Convolution" + bottom: "Mconv4_stage6" + top: "Mconv5_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 3 + kernel_size: 7 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_6_stage6_5" + type: "ReLU" + bottom: "Mconv5_stage6" + top: "Mconv5_stage6" +} +layer { + name: "Mconv6_stage6" + type: "Convolution" + bottom: "Mconv5_stage6" + top: "Mconv6_stage6" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} +layer { + name: "Mrelu1_7_stage6_6" + type: "ReLU" + bottom: "Mconv6_stage6" + top: "Mconv6_stage6" +} +layer { + name: "Mconv7_stage6" + type: "Convolution" + bottom: "Mconv6_stage6" +# top: "Mconv7_stage6" + top: "net_output" + param { + lr_mult: 4.0 + decay_mult: 1 + } + param { + lr_mult: 8.0 + decay_mult: 0 + } + convolution_param { + num_output: 22 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + dilation: 1 + } +} + diff --git a/HandPose/handPoseImage.cpp b/HandPose/handPoseImage.cpp new file mode 100644 index 000000000..7b05bc7e0 --- /dev/null +++ b/HandPose/handPoseImage.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +using namespace std; +using namespace cv; +using namespace cv::dnn; + + +const int POSE_PAIRS[20][2] = +{ + {0,1}, {1,2}, {2,3}, {3,4}, // thumb + {0,5}, {5,6}, {6,7}, {7,8}, // index + {0,9}, {9,10}, {10,11}, {11,12}, // middle + {0,13}, {13,14}, {14,15}, {15,16}, // ring + {0,17}, {17,18}, {18,19}, {19,20} // small +}; + +string protoFile = "hand/pose_deploy.prototxt"; +string weightsFile = "hand/pose_iter_102000.caffemodel"; + +int nPoints = 22; + +int main(int argc, char **argv) +{ + + cout << "USAGE : ./handPoseImage " << endl; + + string imageFile = "hand.jpg"; + // Take arguments from commmand line + if (argc == 2) + { + imageFile = argv[1]; + } + + int inWidth = 368; + int inHeight = 368; + float thresh = 0.01; + + Mat frame = imread(imageFile); + Mat frameCopy = frame.clone(); + int frameWidth = frame.cols; + int frameHeight = frame.rows; + + double t = (double) cv::getTickCount(); + Net net = readNetFromCaffe(protoFile, weightsFile); + + Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + net.setInput(inpBlob); + + Mat output = net.forward(); + + int H = output.size[2]; + int W = output.size[3]; + + // find the position of the body parts + vector points(nPoints); + for (int n=0; n < nPoints; n++) + { + // Probability map of corresponding body's part. + Mat probMap(H, W, CV_32F, output.ptr(0,n)); + resize(probMap, probMap, Size(frameWidth, frameHeight)); + + Point maxLoc; + double prob; + minMaxLoc(probMap, 0, &prob, 0, &maxLoc); + if (prob > thresh) + { + circle(frameCopy, cv::Point((int)maxLoc.x, (int)maxLoc.y), 8, Scalar(0,255,255), -1); + cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)maxLoc.x, (int)maxLoc.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); + + } + points[n] = maxLoc; + } + + int nPairs = sizeof(POSE_PAIRS)/sizeof(POSE_PAIRS[0]); + + for (int n = 0; n < nPairs; n++) + { + // lookup 2 connected body/hand parts + Point2f partA = points[POSE_PAIRS[n][0]]; + Point2f partB = points[POSE_PAIRS[n][1]]; + + if (partA.x<=0 || partA.y<=0 || partB.x<=0 || partB.y<=0) + continue; + + line(frame, partA, partB, Scalar(0,255,255), 8); + circle(frame, partA, 8, Scalar(0,0,255), -1); + circle(frame, partB, 8, Scalar(0,0,255), -1); + } + + t = ((double)cv::getTickCount() - t)/cv::getTickFrequency(); + cout << "Time Taken = " << t << endl; + imshow("Output-Keypoints", frameCopy); + imshow("Output-Skeleton", frame); + imwrite("Output-Skeleton.jpg", frame); + + waitKey(); + + return 0; +} diff --git a/HandPose/handPoseImage.py b/HandPose/handPoseImage.py new file mode 100644 index 000000000..3de3598bf --- /dev/null +++ b/HandPose/handPoseImage.py @@ -0,0 +1,74 @@ +import cv2 +import time +import numpy as np + +protoFile = "hand/pose_deploy.prototxt" +weightsFile = "hand/pose_iter_102000.caffemodel" +nPoints = 22 +POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] + + +frame = cv2.imread("hand.jpg") +frameCopy = np.copy(frame) +frameWidth = frame.shape[1] +frameHeight = frame.shape[0] +threshold = 0.1 + +net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) + +t = time.time() +# input image dimensions for the network +inWidth = 368 +inHeight = 368 +inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), + (0, 0, 0), swapRB=False, crop=False) + +net.setInput(inpBlob) + +output = net.forward() +print("time taken by network : {:.3f}".format(time.time() - t)) + +H = output.shape[2] +W = output.shape[3] + +# Empty list to store the detected keypoints +points = [] + +for i in range(nPoints): + # confidence map of corresponding body's part. + probMap = output[0, i, :, :] + probMap = cv2.resize(probMap, (frameWidth, frameHeight)) + + # Find global maxima of the probMap. + minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) + + if prob > threshold : + cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) + cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA) + + # Add the point to the list if the probability is greater than the threshold + points.append((int(point[0]), int(point[1]))) + else : + points.append(None) + +# Draw Skeleton +for pair in POSE_PAIRS: + partA = pair[0] + partB = pair[1] + + if points[partA] and points[partB]: + cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2) + cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + +cv2.imshow('Output-Keypoints', frameCopy) +cv2.imshow('Output-Skeleton', frame) + + +cv2.imwrite('Output-Keypoints.jpg', frameCopy) +cv2.imwrite('Output-Skeleton.jpg', frame) + +print("Total time taken : {:.3f}".format(time.time() - t)) + +cv2.waitKey(0) diff --git a/HandPose/handPoseVideo.cpp b/HandPose/handPoseVideo.cpp new file mode 100644 index 000000000..aee5a435e --- /dev/null +++ b/HandPose/handPoseVideo.cpp @@ -0,0 +1,112 @@ +#include +#include +#include +#include + +using namespace std; +using namespace cv; +using namespace cv::dnn; + +const int POSE_PAIRS[20][2] = +{ + {0,1}, {1,2}, {2,3}, {3,4}, // thumb + {0,5}, {5,6}, {6,7}, {7,8}, // index + {0,9}, {9,10}, {10,11}, {11,12}, // middle + {0,13}, {13,14}, {14,15}, {15,16}, // ring + {0,17}, {17,18}, {18,19}, {19,20} // small +}; + +string protoFile = "hand/pose_deploy.prototxt"; +string weightsFile = "hand/pose_iter_102000.caffemodel"; + +int nPoints = 22; + +int main(int argc, char **argv) +{ + int inWidth = 368; + int inHeight = 368; + float thresh = 0.01; + + cv::VideoCapture cap(0); + + if (!cap.isOpened()) + { + cerr << "Unable to connect to camera" << endl; + return 1; + } + + Mat frame, frameCopy; + int frameWidth = cap.get(CAP_PROP_FRAME_WIDTH); + int frameHeight = cap.get(CAP_PROP_FRAME_HEIGHT); + + VideoWriter video("Output-Skeleton.avi",VideoWriter::fourcc('M','J','P','G'), 10, Size(frameWidth,frameHeight)); + + Net net = readNetFromCaffe(protoFile, weightsFile); + + double t=0; + while(1) + { + double t = (double) cv::getTickCount(); + + cap >> frame; + frameCopy = frame.clone(); + Mat inpBlob = blobFromImage(frame, 1.0 / 255, Size(inWidth, inHeight), Scalar(0, 0, 0), false, false); + + net.setInput(inpBlob); + + Mat output = net.forward(); + + int H = output.size[2]; + int W = output.size[3]; + + // find the position of the body parts + vector points(nPoints); + for (int n=0; n < nPoints; n++) + { + // Probability map of corresponding body's part. + Mat probMap(H, W, CV_32F, output.ptr(0,n)); + resize(probMap, probMap, Size(frameWidth, frameHeight)); + + Point maxLoc; + double prob; + minMaxLoc(probMap, 0, &prob, 0, &maxLoc); + if (prob > thresh) + { + circle(frameCopy, cv::Point((int)maxLoc.x, (int)maxLoc.y), 8, Scalar(0,255,255), -1); + cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)maxLoc.x, (int)maxLoc.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2); + + } + points[n] = maxLoc; + } + + int nPairs = sizeof(POSE_PAIRS)/sizeof(POSE_PAIRS[0]); + + for (int n = 0; n < nPairs; n++) + { + // lookup 2 connected body/hand parts + Point2f partA = points[POSE_PAIRS[n][0]]; + Point2f partB = points[POSE_PAIRS[n][1]]; + + if (partA.x<=0 || partA.y<=0 || partB.x<=0 || partB.y<=0) + continue; + + line(frame, partA, partB, Scalar(0,255,255), 8); + circle(frame, partA, 8, Scalar(0,0,255), -1); + circle(frame, partB, 8, Scalar(0,0,255), -1); + } + + t = ((double)cv::getTickCount() - t)/cv::getTickFrequency(); + cv::putText(frame, cv::format("time taken = %.2f sec", t), cv::Point(50, 50), cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(255, 50, 0), 2); + // imshow("Output-Keypoints", frameCopy); + imshow("Output-Skeleton", frame); + video.write(frame); + char key = waitKey(1); + if (key==27) + break; + } + // When everything done, release the video capture and write object + cap.release(); + video.release(); + + return 0; +} diff --git a/HandPose/handPoseVideo.py b/HandPose/handPoseVideo.py new file mode 100644 index 000000000..7e6526b38 --- /dev/null +++ b/HandPose/handPoseVideo.py @@ -0,0 +1,98 @@ +import cv2 +import time +import numpy as np + + +protoFile = "hand/pose_deploy.prototxt" +weightsFile = "hand/pose_iter_102000.caffemodel" +nPoints = 22 +POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] + +inWidth = 368 +inHeight = 368 +threshold = 0.1 + + +input_source = 0 +cap = cv2.VideoCapture(input_source) +hasFrame, frame = cap.read() + +vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0])) + +net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) +k = 0 +while 1: + k+=1 + t = time.time() + hasFrame, frame = cap.read() + frameCopy = np.copy(frame) + if not hasFrame: + cv2.waitKey() + break + + print("imread = {}".format(time.time() - t)) + + frameWidth = frame.shape[1] + frameHeight = frame.shape[0] + + inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), + (0, 0, 0), swapRB=False, crop=False) + + print("Blob = {}".format(time.time() - t)) + + net.setInput(inpBlob) + + print("setInput = {}".format(time.time() - t)) + + output = net.forward() + + print("forward = {}".format(time.time() - t)) + + # Empty list to store the detected keypoints + points = [] + + for i in range(nPoints): + # confidence map of corresponding body's part. + probMap = output[0, i, :, :] + probMap = cv2.resize(probMap, (frameWidth, frameHeight)) + + # Find global maxima of the probMap. + minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) + + if prob > threshold : + cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) + cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA) + + # Add the point to the list if the probability is greater than the threshold + points.append((int(point[0]), int(point[1]))) + else : + points.append(None) + + print("keypoints = {}".format(time.time() - t)) + + # Draw Skeleton + for pair in POSE_PAIRS: + partA = pair[0] + partB = pair[1] + + if points[partA] and points[partB]: + cv2.line(frame, points[partA], points[partB], (0, 255, 255), 3, lineType=cv2.LINE_AA) + cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + print("skeleton = {}".format(time.time() - t)) + + # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA) + cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA) + # cv2.imshow('Output-Keypoints', frameCopy) + cv2.imshow('Output-Skeleton', frame) + # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame) + key = cv2.waitKey(1) + if key == 27: + break + + print("total = {}".format(time.time() - t)) + + vid_writer.write(frame) + +vid_writer.release()