-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
74 lines (60 loc) · 2.52 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import cv2
import torch
import torch.nn.functional as F
from model import SimpleNet
class DigitRecognizer:
def __init__(self, model_path='model.pth'):
self.canvas = np.ones((280, 280), dtype="uint8") * 255
self.model_path = model_path
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = self.load_model()
def load_model(self):
model = SimpleNet().to(self.device)
model.load_state_dict(torch.load(self.model_path, weights_only=True))
model.eval()
return model
def draw(self, event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN or (flags & cv2.EVENT_FLAG_LBUTTON):
cv2.circle(self.canvas, (x, y), 10, (0,), -1)
def preprocess_image(self):
img = cv2.resize(self.canvas, (28, 28))
_, img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
coords = cv2.findNonZero(img)
x, y, w, h = cv2.boundingRect(coords)
cx, cy = x + w // 2, y + h // 2
shift_x, shift_y = 14 - cx, 14 - cy
M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
img = cv2.warpAffine(img, M, (28, 28))
img = np.array(img, dtype=np.float32) / 255.0
img = (img - 0.5) / 0.5
return torch.tensor(img).unsqueeze(0).unsqueeze(0).to(self.device)
def predict_digit(self):
img = self.preprocess_image()
with torch.no_grad():
output = self.model(img)
probabilities = F.softmax(output, dim=1)
certainty, predicted_digit = torch.max(probabilities, 1)
return predicted_digit.item(), certainty.item() * 100
def reset_canvas(self):
self.canvas[:] = 255
def run(self):
cv2.namedWindow("Digit Recognizer")
cv2.setMouseCallback("Digit Recognizer", self.draw)
while True:
cv2.imshow("Digit Recognizer", self.canvas)
key = cv2.waitKey(1) & 0xFF
if key == ord("q") or key == 27:
break
elif key == ord("p") or key == 13:
digit, certainty = self.predict_digit()
print(f"Predicted digit: {digit} (certainty: {certainty:.2f}%)")
self.reset_canvas()
cv2.imshow("Digit Recognizer", self.canvas)
cv2.waitKey(500)
cv2.destroyAllWindows()
def main():
recognizer = DigitRecognizer()
recognizer.run()
if __name__ == '__main__':
main()