-
Notifications
You must be signed in to change notification settings - Fork 8
/
yolo-arbitrary-square-crop-min-224.py
80 lines (62 loc) · 3.07 KB
/
yolo-arbitrary-square-crop-min-224.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import cv2
import numpy as np
import os
# Load the YOLO model
net = cv2.dnn.readNet("yolov7.weights", "yolov7.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]
# Load class labels
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
# Define input and output directories
input_dir = "COCO_train/train2014"
output_dir = "COCO_yolo"
valid_extensions = (".jpg", ".jpeg", ".png", ".bmp")
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Process each image in the input directory
for filename in os.listdir(input_dir):
if filename.endswith(valid_extensions):
image_path = os.path.join(input_dir, filename)
image = cv2.imread(image_path)
height, width, _ = image.shape
# Detect objects
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Loop through detections and crop
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.1: # Confidence threshold
class_name = classes[class_id]
print(f"Detected {class_name} in {filename} with confidence {confidence}")
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Determine square crop size based on detected object, ensuring it's at least 224
square_crop_size = max(max(w, h), 224)
# Calculate the top-left corner of the crop
x = int(center_x - square_crop_size / 2)
y = int(center_y - square_crop_size / 2)
# Ensure the crop does not exceed image boundaries and adjust if necessary
# Adjust the X coordinate and width
if x < 0:
x = 0
elif x + square_crop_size > width:
x = width - square_crop_size
# Adjust the Y coordinate and height
if y < 0:
y = 0
elif y + square_crop_size > height:
y = height - square_crop_size
# Now we have ensured the crop is within image boundaries and at least 224x224
cropped_img = image[y:y + square_crop_size, x:x + square_crop_size]
output_path = os.path.join(output_dir, class_name + "_" + filename)
cv2.imwrite(output_path, cropped_img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
print(f"Processed {filename}")
print("Processing complete.")