-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#1 2-28 / RAW DATA has been uploaded in Submission_codes.
- Loading branch information
1 parent
a23809d
commit cd0055a
Showing
11 changed files
with
440 additions
and
154 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import os | ||
import shutil | ||
|
||
|
||
# 0. 터미널에 conda install shutil을 설치해 주세요. | ||
# 0. 해당 파일을 그대로 실행하면 백업 폴더 만들고 그 폴더에 있는 오류파일들을 수정합니다. 여기까지만 하시면 ready_to_data.py로 넘어가도 됩니다. | ||
# 0. 추가적인 수정을 원하시면 아래부터 읽어보시고 하단에 선언된 클래스와 메소드를 수정하시면 됩니다. | ||
try: | ||
shutil.copytree("/opt/ml/input", "/opt/ml/backup/input") | ||
except: | ||
print("복사한 폴더가 이미 있어요.") | ||
|
||
# 1. 이미지 디렉토리 주소 설정(이미지 디렉토리 백업을 권장) | ||
images_path = '/opt/ml/backup/input/data/train/images' | ||
|
||
# 2. 원하는 속성의 값을 바꿔주는 클래스 | ||
class ChangeAttr(object): | ||
|
||
def __init__(self,images_path): | ||
# 이미지 디렉토리가 있는 주소를 받아서 이미지 디렉토리 안에 있는 사람별로 정리된 디렉토리 리스트들을 만든다. | ||
image_directory_names = [i for i in os.listdir(images_path) if '._' not in i] | ||
image_directory_names.sort() | ||
self.image_directory_names = image_directory_names | ||
|
||
|
||
def change_incorrect_mask_and_normal(self,mask_error_list:list): | ||
# incorrect_mask.jpg파일과 normal.jpg파일의 이름을 바꾸는 메소드다. | ||
# 바꾸기를 원하는 인덱스 리스트를 입력하면 실행가능하다. | ||
mask_error_directory_names = [i for i in self.image_directory_names if str(i[:6]) in mask_error_list] | ||
for i in mask_error_directory_names: | ||
|
||
change_path = os.path.join(images_path,i) | ||
a1_file = os.path.join(change_path ,'incorrect_mask.jpg') | ||
A1_file = os.path.join(change_path ,'A1.txt') | ||
a2_file = os.path.join(change_path ,'normal.jpg') | ||
|
||
os.rename(a1_file,A1_file) | ||
os.rename(a2_file,a1_file) | ||
os.rename(A1_file,a2_file) | ||
|
||
print(f"{i}'s file names have been changed.") | ||
|
||
|
||
def change_gender(self,gender_error_list:list): | ||
# gender를 바꾸는 메소드다. | ||
# 바꾸기를 원하는 인덱스 리스트를 입력하면 실행가능하다. | ||
gender_error_directory_names = [i for i in self.image_directory_names if str(i[:6]) in gender_error_list] | ||
for i in gender_error_directory_names: | ||
before = os.path.join(images_path,i) | ||
print(before) | ||
index_gender_region_age_list = i.split('_') | ||
if index_gender_region_age_list[1] == 'female': | ||
index_gender_region_age_list[1] = 'male' | ||
elif index_gender_region_age_list[1] == 'male': | ||
index_gender_region_age_list[1] = 'female' | ||
i = '_'.join(index_gender_region_age_list) | ||
after = os.path.join(images_path,i) | ||
print(after) | ||
os.rename(before,after) | ||
|
||
print(f"{i}'s gender has been changed.") | ||
|
||
def change_age(self,age_error_list:list, age_which_would_be_changed:int): | ||
# 나이를 바꾸는 메소드다. | ||
# 바꾸기를 원하는 인덱스 리스트와 나이를 입력하면 실행 가능하다. | ||
# 나이는 마스크착용유무와 성별처럼 반대로 전환이 안되게 때문에 원상복귀하는 방법은 수정 전 나이를 기록했다가 적용하거나 백업파일을 초기화 해야한다. | ||
gender_error_directory_names = [i for i in self.image_directory_names if str(i[:6]) in age_error_list] | ||
for i in gender_error_directory_names: | ||
before = os.path.join(images_path,i) | ||
print(before) | ||
index_gender_region_age_list = i.split('_') | ||
index_gender_region_age_list[3] = str(age_which_would_be_changed) | ||
i = '_'.join(index_gender_region_age_list) | ||
after = os.path.join(images_path,i) | ||
print(after) | ||
os.rename(before,after) | ||
print(f"{i}'s age has been changed.") | ||
|
||
# 수정 할 디렉토리의 인덱스. 사람별 디렉토리 앞에 있는 인덱스를 기준으로 하며 str이다. | ||
mask_error_list = ['000020','004418','005227'] | ||
female_error_list = ['000225','000664','000767','001509','003113','003223','004281','006359','006360','006361','006362','006363','006364','006424','000667','000725','000736','000817','003780','006504'] | ||
male_error_list = ['001498-1','004432','005223'] | ||
age29_error_list = ['001009','001064','001637','001666','001852'] | ||
age61_error_list = ['004348'] | ||
|
||
CA = ChangeAttr(images_path) | ||
CA.change_incorrect_mask_and_normal(mask_error_list) | ||
CA.change_gender(female_error_list) | ||
CA.change_gender(male_error_list) | ||
CA.change_age(age29_error_list,29) | ||
CA.change_age(age61_error_list,61) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import os | ||
import random | ||
from PIL import Image | ||
|
||
|
||
# 샘플링을 할지 결정하는 클래스입니다. 이미지 디렉토리 주소와 샘플 사이즈 비율을 받습니다. 샘플 사이즈 비율이 0이면 test 데이터를 만들지 않습니다. | ||
# 변수를 받으면 이미지 디렉토리 이름들이 모여있는 리스트를 출력합니다. | ||
# 클래스와 함수를 선언했을뿐 동작은 데이터를 사용하는 파일로 가서 불러옵니다. test_data.py에 예시가 있습니다. | ||
class Sampling(object): | ||
|
||
def __init__(self,images_path,sampling_size_rate=0): | ||
|
||
image_directory_names = [i for i in os.listdir(images_path) if '._' not in i] | ||
image_directory_names.sort() | ||
|
||
if sampling_size_rate==0: | ||
self.train_image_directory_names = image_directory_names | ||
else: | ||
# systematic sampling | ||
first_number_choise = random.randrange(0,int(1/sampling_size_rate)) | ||
# divide original directory names with test directory names and trian directory names | ||
self.test_image_directory_names = [image_directory_names[i] for i in range(len(image_directory_names)) if (i+int(1/sampling_size_rate)-first_number_choise)%int(1/sampling_size_rate)==0] | ||
self.train_image_directory_names = [image_directory_names[i] for i in range(len(image_directory_names)) if (i+int(1/sampling_size_rate)-first_number_choise)%int(1/sampling_size_rate)!=0] | ||
|
||
|
||
# 각종 변수들(리턴값 확인)을 출력하는 함수입니다. 이미지 디렉토리 주소와 Sampling 클래스에서 나온 이미지 디렉토리 이름 리스트를 받습니다. | ||
def refine_data(images_path,image_directory_names:list): | ||
|
||
def create_image_list(image_directory_names:list): | ||
image_directory_path = [images_path + "/" + i + "/" for i in image_directory_names] | ||
image_file_path = [i + j for i in image_directory_path for j in os.listdir(i) if '._' not in j] | ||
image_list = [Image.open(i) for i in image_file_path] | ||
return image_list, image_file_path | ||
|
||
def create_individual_class_lists(image_file_path:list)->list: | ||
|
||
splited_path = [i.split('/') for i in image_file_path] | ||
|
||
directory_name = [] | ||
file_name = [] | ||
|
||
for i in splited_path: | ||
directory_name.append(i[-2]) | ||
file_name.append(i[-1]) | ||
|
||
splited_directory_name = [i.split('_') for i in directory_name] | ||
|
||
gender =[] | ||
age = [] | ||
|
||
for _,i,_,j in splited_directory_name: | ||
gender.append(i) | ||
age.append(int(j)) | ||
|
||
mask = [] | ||
|
||
for i in file_name: | ||
mask.append(i.split('.')[0]) | ||
|
||
gender_dict = { | ||
'male': 0, | ||
'female': 1, | ||
} | ||
mask_dict = { | ||
'mask1':0, | ||
'mask2':0, | ||
'mask3':0, | ||
'mask4':0, | ||
'mask5':0, | ||
'incorrect_mask':1, | ||
'normal':2, | ||
} | ||
|
||
gender_class = [] | ||
for i in gender: | ||
gender_class.append(gender_dict[i]) | ||
|
||
mask_class = [] | ||
for i in mask: | ||
mask_class.append(mask_dict[i]) | ||
|
||
age_class = [] | ||
|
||
def change_int_to_class(x:int) -> int: | ||
answer = 0 | ||
if x < 30: | ||
answer = 0 | ||
elif x >= 30 and x < 60: | ||
answer = 1 | ||
elif x >= 60: | ||
answer =2 | ||
return answer | ||
|
||
for i in age: | ||
age_class.append(change_int_to_class(i)) | ||
|
||
return mask_class, gender_class, age_class | ||
|
||
def create_mixed_class_list(mask:list,gender:list,age:list)->list: | ||
|
||
mixed_class_dict = { | ||
(0,0,0):0, | ||
(0,0,1):1, | ||
(0,0,2):2, | ||
(0,1,0):3, | ||
(0,1,1):4, | ||
(0,1,2):5, | ||
(1,0,0):6, | ||
(1,0,1):7, | ||
(1,0,2):8, | ||
(1,1,0):9, | ||
(1,1,1):10, | ||
(1,1,2):11, | ||
(2,0,0):12, | ||
(2,0,1):13, | ||
(2,0,2):14, | ||
(2,1,0):15, | ||
(2,1,1):16, | ||
(2,1,2):17, | ||
} | ||
|
||
mixed_class = [] | ||
for i,j,k in zip(mask,gender,age): | ||
mixed_class.append(mixed_class_dict[(i,j,k)]) | ||
|
||
return mixed_class | ||
|
||
image_list,image_file_path = create_image_list(image_directory_names) | ||
mask_class,gender_class,age_class = create_individual_class_lists(image_file_path) | ||
mixed_class = create_mixed_class_list(mask_class,gender_class,age_class) | ||
return image_list,mask_class,gender_class,age_class,mixed_class | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
# S = Sampling(images_path,sampling_size_rate) | ||
# test_image_list,test_mask_class,test_gender_class,test_age_class,test_mixed_class = refine_data(S.test_image_directory_names) | ||
# train_image_list,train_mask_class,train_gender_class,train_age_class,train_mixed_class = refine_data(S.train_image_directory_names) | ||
|
||
# print(set(train_gender_class)) | ||
|
||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from ready_to_data import Sampling, refine_data | ||
|
||
|
||
images_path = '/opt/ml/backup/input/data/train/images' | ||
sampling_size_rate = 0.2 | ||
|
||
# 클래스와 함수가 받는 변수는 이미지 디렉토리 주소와 샘플링 사이즈 비율입니다. | ||
|
||
# 아래와 같이 샘플링을하고 데이터를 불러올 수 있습니다. | ||
# 샘플링 사이즈를 0으로 설정하면 test_image_directory_names는 만들어지지 않습니다. | ||
S = Sampling(images_path,sampling_size_rate) | ||
test_image_list,test_mask_class,test_gender_class,test_age_class,test_mixed_class = refine_data(images_path,S.test_image_directory_names) | ||
train_image_list,train_mask_class,train_gender_class,train_age_class,train_mixed_class = refine_data(images_path,S.train_image_directory_names) | ||
|
||
# 간단히 몇가지 출력해보면 | ||
print(set(test_mask_class)) | ||
print(test_image_list[:5]) | ||
print(set(train_mixed_class)) | ||
# 이런식으로 불러올 수 있습니다. |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.