-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
104 lines (80 loc) · 2.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
Facial Emotions Recognition System
Author: Wajahat Ullah
This file can be used for inference using the pretrained model, which
incorporated CLIP encoder to extract features and then classifies facial
expression using a fully connected layer. This model achieved about 65%
accuracy on the test data.
"""
import cv2
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
from model import FERModel
PRETRAINED_MODEL = 'models/fer2013_model.pth'
def load_model(model_path, num_classes=7):
"""This model loads the custom defined FERModel.
Args:
model_path (str): path to the pretrained model
num_classes (int, optional): number of classes in the dataset.
defaults to 7.
Returns:
FERModel: Custom FERModel
"""
model = FERModel(num_classes)
state_dict = torch.load(model_path,
map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
model.eval()
return model
def preprocess_image(image):
"""Preprocess an image so that it can be used by the FERModel for inference
Args:
image (ndarray): a grayscale image
Returns:
tensor: preprocessed image
"""
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.Grayscale(num_output_channels=3),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
return transform(image).unsqueeze(0)
def main():
"""Program entry point
"""
model = load_model(PRETRAINED_MODEL)
# We will detect faces using Haarcascades
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# Class lables
emotions = ['angry', 'disgust', 'fear',
'happy', 'sad', 'surprise', 'neutral']
# Inference on video feed
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 10)
for (x, y, w, h) in faces:
face = frame[y:y+h, x:x+w]
face_tensor = preprocess_image(face)
with torch.no_grad():
output = model(face_tensor)
_, predicted = torch.max(output, 1)
emotion = emotions[predicted.item()]
# Draw rectangle around face and label with predicted emotion
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.putText(frame, emotion, (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
cv2.imshow('Facial Emotion Recognition', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()