-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
102 lines (87 loc) · 3.43 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import sys
import os
import argparse
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, confusion_matrix
from torch.utils.data import DataLoader
from tqdm import tqdm
from data.DufercoDataset import DufercoDataset
from data.transforms import test_transforms
from models.EfficientNet import EfficientNetBinaryClassifier
def load_dataloaders(args):
test_dataset = DufercoDataset(
args.data_config_path,
split='test',
transform=test_transforms
)
test_loader = DataLoader(
test_dataset,
batch_size=args.batch_size,
shuffle=False
)
return test_loader
def evaluate_model(model, test_loader, device, beta=0.5):
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
for images, labels in tqdm(test_loader):
images, labels = images.to(device), labels.to(device)
labels = labels.unsqueeze(1).to(torch.float32)
outputs = model(images)
predicted = (outputs > 0.5).float()
all_labels.extend(labels.cpu().detach().numpy().astype(int))
all_predictions.extend(predicted.cpu().detach().numpy().astype(int))
# Convert lists to numpy arrays for metric calculations
all_labels = np.array(all_labels).flatten()
all_predictions = np.array(all_predictions).flatten()
# Calculate evaluation metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)
fbeta = fbeta_score(all_labels, all_predictions, beta=beta)
confusion = confusion_matrix(all_labels, all_predictions)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F-beta Score (beta={beta}): {fbeta:.4f}")
print(f"Confusion Matrix:\n{confusion}")
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Evaluating on: {device}")
# Load test data
test_loader = load_dataloaders(args)
# Load model
model = EfficientNetBinaryClassifier()
model = nn.DataParallel(model) # Wrap for multi-GPU support if necessary
model.load_state_dict(torch.load(args.model_path)["model_state_dict"])
model = model.to(device)
# Evaluate model
evaluate_model(model, test_loader, device, beta=args.beta)
def argument_parser():
parser = argparse.ArgumentParser(description="Evaluate EfficientNet on Duferco test dataset")
parser.add_argument('--data_config_path',
type=str,
required=True,
help='Path to dataset JSON')
parser.add_argument('--batch_size',
type=int,
default=16,
help='Batch size')
parser.add_argument('--model_path',
type=str,
required=True,
help='Path to the trained model checkpoint')
parser.add_argument('--beta',
type=float,
default=0.5,
help='Beta value for F-beta score')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = argument_parser()
main(args)