forked from hexiang10/facial-expression-recognition
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_CNN_test.py
173 lines (138 loc) · 5.3 KB
/
model_CNN_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# -*- coding: utf-8 -*-
import cv2
import torch
import torch.nn as nn
import numpy as np
from statistics import mode
# 人脸数据归一化,将像素值从0-255映射到0-1之间
def preprocess_input(images):
""" preprocess input by substracting the train mean
# Arguments: images or image of any shape
# Returns: images or image with substracted train mean (129)
"""
images = images/255.0
return images
def gaussian_weights_init(m):
classname = m.__class__.__name__
# 字符串查找find,找不到返回-1,不等-1即字符串中含有该字符
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.04)
class FaceCNN(nn.Module):
# 初始化网络结构
def __init__(self):
super(FaceCNN, self).__init__()
# 第一次卷积、池化
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), # 卷积层
nn.BatchNorm2d(num_features=64), # 归一化
nn.RReLU(inplace=True), # 激活函数
nn.MaxPool2d(kernel_size=2, stride=2), # 最大值池化
)
# 第二次卷积、池化
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=128),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# 第三次卷积、池化
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=256),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# 参数初始化
self.conv1.apply(gaussian_weights_init)
self.conv2.apply(gaussian_weights_init)
self.conv3.apply(gaussian_weights_init)
# 全连接层
self.fc = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=256 * 6 * 6, out_features=4096),
nn.RReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=1024),
nn.RReLU(inplace=True),
nn.Linear(in_features=1024, out_features=256),
nn.RReLU(inplace=True),
nn.Linear(in_features=256, out_features=7),
)
# 前向传播
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
# 数据扁平化
x = x.view(x.shape[0], -1)
y = self.fc(x)
return y
#opencv自带的一个面部识别分类器
detection_model_path = 'model/haarcascade_frontalface_default.xml'
classification_model_path = 'model/model_cnn.pkl'
# 加载人脸检测模型
face_detection = cv2.CascadeClassifier(detection_model_path)
# 加载表情识别模型
emotion_classifier = torch.load(classification_model_path)
frame_window = 10
#表情标签
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
emotion_window = []
# 调起摄像头,0是笔记本自带摄像头
video_capture = cv2.VideoCapture(0)
# 视频文件识别
# video_capture = cv2.VideoCapture("video/example_dsh.mp4")
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.startWindowThread()
cv2.namedWindow('window_frame')
while True:
# 读取一帧
_, frame = video_capture.read()
frame = frame[:,::-1,:]#水平翻转,符合自拍习惯
frame = frame.copy()
# 获得灰度图,并且在内存中创建一个图像对象
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 获取当前帧中的全部人脸
faces = face_detection.detectMultiScale(gray,1.3,5)
# 对于所有发现的人脸
for (x, y, w, h) in faces:
# 在脸周围画一个矩形框,(255,0,0)是颜色,2是线宽
cv2.rectangle(frame,(x,y),(x+w,y+h),(84,255,159),2)
# 获取人脸图像
face = gray[y:y+h,x:x+w]
try:
# shape变为(48,48)
face = cv2.resize(face,(48,48))
except:
continue
# 扩充维度,shape变为(1,48,48,1)
#将(1,48,48,1)转换成为(1,1,48,48)
face = np.expand_dims(face,0)
face = np.expand_dims(face,0)
# 人脸数据归一化,将像素值从0-255映射到0-1之间
face = preprocess_input(face)
new_face=torch.from_numpy(face)
new_new_face = new_face.float().requires_grad_(False)
# 调用我们训练好的表情识别模型,预测分类
emotion_arg = np.argmax(emotion_classifier.forward(new_new_face).detach().numpy())
emotion = emotion_labels[emotion_arg]
emotion_window.append(emotion)
if len(emotion_window) >= frame_window:
emotion_window.pop(0)
try:
# 获得出现次数最多的分类
emotion_mode = mode(emotion_window)
except:
continue
# 在矩形框上部,输出分类文字
cv2.putText(frame,emotion_mode,(x,y-30), font, .7,(0,0,255),1,cv2.LINE_AA)
try:
# 将图片从内存中显示到屏幕上
cv2.imshow('window_frame', frame)
except:
continue
# 按q退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()