-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfinal_waldo.py
200 lines (168 loc) · 9.13 KB
/
final_waldo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# CMSC 495 Current Trends and Projects
# Fall 2020
# Contributors: John Kaiser, Orry Newell, Cody Ambrose, Rodney French
# Our Capstone project was to research different Computer Vision algorithms and techniques to produce an image
# detection program that would find a target image in a larger 'puzzle' image. We used Where's Waldo puzzles as test
# data to find Waldo and his friends in various scenes from the book.
from tkinter import *
from tkinter.ttk import *
from pathlib import Path
from PIL import Image, ImageTk
import numpy as np
import cv2
def get_scene(scene):
return str(puzzle_images / puzzle_dict[scene])
def get_character(character):
return str(target_images / target_dict[character])
# the my_preview method is called when the 'Preview' button is pressed
def my_preview(mst, character_dd, scene_dd, scene_label, character_label, zoomed_label):
print("Searching for {} in {}!".format(character_dd.get(), scene_dd.get()))
scene = scene_dd.get()
character = character_dd.get()
# openCV read in the scene image selected from dropdown menu
scene_image = cv2.imread(str(puzzle_images / puzzle_dict[scene]))
# resize the scene image to 80% of the user's screen width
scene_image = resize_with_aspect_ratio(scene_image, height=int(mst.winfo_screenheight()*0.8))
# process the image and return it, set it to master.show_scene for tkinter GUI
mst.show_scene = process_image(scene_image)
target_image = cv2.imread(str(target_images / target_dict[character]))
target_image = resize_with_aspect_ratio(target_image, width=100)
mst.show_character = process_image(target_image)
# configure scene_label and character_label to show both images
scene_label.configure(image=mst.show_scene)
character_label.configure(image=mst.show_character)
# remove the zoomed in feature when Preview is used
zoomed_label.grid_remove()
# search_image method is called when clicking the 'Search' button
def search_image(mst, target_pic, puzzle_pic, scene_label, character_label, zoomed_label, search_b):
# process_label = Label(mst, text='Processing...')
# process_label.grid(row=1, column=4, pady=10, sticky=W)
search_b.configure(text="Processing...")
# read in the 2 images (template image to search for and the larger image to search)
target = cv2.imread(get_character(target_pic.get()))
puzzle = cv2.imread(get_scene(puzzle_pic.get()))
# convert both images to grayscale
target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
puzzle_gray = cv2.cvtColor(puzzle, cv2.COLOR_BGR2GRAY)
# use adaptive thresholding for the target image
# this had better results than Canny edge detection for template matching
target_edge = cv2.adaptiveThreshold(target_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# get and assign the height and width of the target image
(height, width) = target.shape[:2]
found = None
# np.linspace(start, stop, number) returns a list of 'number' evenly spaced numbers between start and stop
# the list slicing [::-1] goes in reverse to start with the largest scale to the smallest
for scale in np.linspace(0.5, 2.0, 10)[::-1]:
# resize the grayscale puzzle image for each scale in the list
resized = resize_with_aspect_ratio(puzzle_gray, width=int(puzzle.shape[1] * scale))
# set ratio to puzzle width divided by resized width
ratio = puzzle.shape[1] / float(resized.shape[1])
# if the resized image gets smaller than the target image, break from for loop
if resized.shape[0] < height or resized.shape[1] < width:
break
# now apply adaptive thresholding to the resized image
edged = cv2.adaptiveThreshold(resized, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# perform the template matching on the 'edged' target and puzzle images
# different matching methods can produce different results
# we have mostly used TM_CCOEFF
result = cv2.matchTemplate(edged, target_edge, cv2.TM_CCOEFF)
# when using TM_CCOEFF, maxLoc from minMaxLoc() gives best matching results
(minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(result)
# check to see if found is still set to None or if maxVal is better than found's maxVal
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, ratio)
(maxVal, maxLoc, ratio) = found
# x and y of maxLoc are best results from template matching method
(startX, startY) = (int(maxLoc[0] * ratio), int(maxLoc[1] * ratio))
(endX, endY) = (int((maxLoc[0] + width) * ratio), int((maxLoc[1] + height) * ratio))
# region of interest is the section of puzzle with best results
roi = puzzle[startY:endY, startX:endX]
# create a darkened mask over the puzzle to easily see the located target
mask = np.zeros(puzzle.shape, dtype="uint8")
puzzle = cv2.addWeighted(puzzle, 0.25, mask, 0.75, 0)
puzzle[startY:endY, startX:endX] = roi
# draw a green box around the target (ROI)
puzzle = cv2.rectangle(puzzle, (startX, startY), (endX, endY), (0, 255, 0), 3)
# expand the ROI to create a zoomed-in feature
zoomed = puzzle[startY-100:endY+100, startX-100:endX+100]
# resize the new image to 80% of the screen height
resize = resize_with_aspect_ratio(puzzle, height=int(mst.winfo_screenheight()*0.8))
# process the 3 images for displaying in TkInter GUI and configure them
mst.show_scene = process_image(resize)
target = resize_with_aspect_ratio(target, width=100)
mst.show_character = process_image(target)
mst.show_zoomed = process_image(zoomed)
scene_label.configure(image=mst.show_scene)
character_label.configure(image=mst.show_character)
# set zoomed_label.grid to be under the target image, 'South' to stick at the bottom
zoomed_label.configure(image=mst.show_zoomed)
zoomed_label.grid(row=2, column=4, sticky=S, pady=2)
search_b.configure(text="Search")
def process_image(image):
# Convert image from openCV BGR to RGB for displaying
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Convert to PIL format
image = Image.fromarray(image)
# ImageTk format
image = ImageTk.PhotoImage(image)
return image
def open_wheres_waldo(mst):
mst.title("Where's Waldo")
screen_width = mst.winfo_screenwidth()
screen_height = mst.winfo_screenheight()
mst.geometry(f'{screen_width}x{screen_height}')
mst.resizable(0, 0)
scene_dd = StringVar(mst)
scene_dd.set('City')
scene_text = Label(mst, text='Choose scene: ')
s_dd = OptionMenu(mst, scene_dd, *puzzle_dict.keys())
scene_text.grid(row=0, column=0, sticky=E, padx=10, pady=2)
s_dd.grid(row=0, column=1, sticky=W, pady=2)
character_dd = StringVar(mst)
character_dd.set('Waldo')
character_text = Label(mst, text='Choose character: ')
c_dd = OptionMenu(mst, character_dd, *target_dict.keys())
character_text.grid(row=1, column=0, sticky=E, padx=10, pady=2)
c_dd.grid(row=1, column=1, sticky=W, pady=2)
preview_b = Button(mst, text="Preview", command=lambda: my_preview(mst, character_dd, scene_dd, scene_label,
character_label, zoomed_label), width=20)
preview_b.grid(row=0, column=2, sticky=W, pady=2)
search_b = Button(mst, text='Search', command=lambda: search_image(mst, character_dd, scene_dd, scene_label,
character_label, zoomed_label, search_b), width=20)
search_b.grid(row=1, column=2, sticky=W, pady=2)
default_image = cv2.imread(str(puzzle_images / puzzle_dict['City']))
default_image = resize_with_aspect_ratio(default_image, height=int(screen_height * 0.8))
mst.default = process_image(default_image)
scene_label = Label(mst, image=mst.default)
scene_label.grid(row=2, column=0, columnspan=3, rowspan=1, sticky=W, pady=2)
waldo_image = cv2.imread(str(target_images / target_dict['Waldo']))
waldo_image = resize_with_aspect_ratio(waldo_image, width=100)
mst.waldo = process_image(waldo_image)
character_label = Label(mst, image=mst.waldo)
character_label.grid(row=2, column=4, sticky=NW, pady=2)
zoomed_label = Label(mst)
zoomed_label.grid(row=2, column=4, sticky=S, pady=2)
def resize_with_aspect_ratio(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
return cv2.resize(image, dim, interpolation=inter)
def main():
master = Tk()
open_wheres_waldo(master)
mainloop()
if __name__ == '__main__':
target_dict = {'': '', 'Waldo': 'waldo_face.jpeg', 'Wenda': 'wenda.jpg', 'Wizard': 'wizard.jpg',
'Odlaw': 'odlaw.jpg'}
puzzle_dict = {'': '', 'Beach': 'puzzle3.jpeg', 'City': 'puzzle2.jpeg', 'Zoo': 'zoo.jpeg',
'Ski Resort': 'ski.jpeg', 'Train Station': 'train.jpeg', 'Museum': 'museum.jpeg'}
puzzle_images = Path('Puzzle Images')
target_images = Path('Target Images')
main()