-
Notifications
You must be signed in to change notification settings - Fork 4
/
split_manual.py
91 lines (74 loc) · 2.56 KB
/
split_manual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from glob import glob
from pathlib import Path
import shutil
from tqdm import tqdm
from lib_utils_cvat2yolo import is_txt_file_empty
def get_file_paths_lists_for_subset(subset_folder_pth, img_extention, lbl_extention):
img_file_pths = []
lbl_file_pths = []
if os.path.exists(subset_folder_pth):
img_file_pths.extend(glob(f"{subset_folder_pth}/*.{img_extention}"))
lbl_file_pths.extend(
[p.replace(f".{img_extention}", f".{lbl_extention}") for p in img_file_pths]
)
return list(zip(img_file_pths, lbl_file_pths))
def manualsplit(
out_folder,
train_folder_pth,
val_folder_pth,
test_folder_pth,
img_extention,
percentage_empty,
lbl_extention="txt",
):
img_lbl_file_pth_train = get_file_paths_lists_for_subset(
train_folder_pth, img_extention, lbl_extention
)
img_lbl_file_pth_val = get_file_paths_lists_for_subset(
val_folder_pth, img_extention, lbl_extention
)
img_lbl_file_pth_test = get_file_paths_lists_for_subset(
test_folder_pth, img_extention, lbl_extention
)
print("Creating train dataset")
N_files_train = len(img_lbl_file_pth_train)
acceptable_N_of_empty_files = int(N_files_train * percentage_empty / 100)
count_of_empty_files = 0
for img, lbl in tqdm(img_lbl_file_pth_train):
if is_txt_file_empty(lbl):
count_of_empty_files += 1
if count_of_empty_files > acceptable_N_of_empty_files:
continue
shutil.copy(
lbl, os.path.join(out_folder, "labels", "train", os.path.basename(lbl))
)
shutil.copy(
img, os.path.join(out_folder, "images", "train", os.path.basename(img))
)
print("Creating val dataset")
for img, lbl in tqdm(img_lbl_file_pth_val):
shutil.copy(
lbl, os.path.join(out_folder, "labels", "val", os.path.basename(lbl))
)
shutil.copy(
img, os.path.join(out_folder, "images", "val", os.path.basename(img))
)
print("Creating test dataset")
for img, lbl in tqdm(img_lbl_file_pth_test):
shutil.copy(
lbl, os.path.join(out_folder, "labels", "test", os.path.basename(lbl))
)
shutil.copy(
img, os.path.join(out_folder, "images", "test", os.path.basename(img))
)
if __name__ == "__main__":
manualsplit(
"out_man",
"my_dataset_full/obj_Train_data",
"my_dataset_full/obj_Validation_data",
"my_dataset_full/obj_Test_data",
"png",
10,
lbl_extention="txt",
)