-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
139 lines (110 loc) · 5.02 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import os
import requests
from zipfile import ZipFile
from tempfile import mkdtemp
import wget
import shutil
import sys, argparse
def run_download_data(download_emozionalmente=True, download_emovo=False):
#Download the emozionalmente data from our dropbox and setup the expected file structure
#TODO add EMOVO and make more generalizable
if not os.path.exists('./data'):
os.makedirs('./data')
if download_emozionalmente:
if not os.path.exists('./data/audio4analysis/'):
os.makedirs('./data/audio4analysis/')
metadata_file = wget.download('https://www.dropbox.com/s/gi1iwc3xwwl0a4z/metadata.zip?dl=1')
audio_file = wget.download('https://www.dropbox.com/s/tlbxkdabow9w03i/audio.zip?dl=1')
print('Downloaded the metadata and audio zip files')
print('Extracting the audio zip file. This may take a few minutes')
with ZipFile(audio_file, 'r') as zip_ref:
zip_ref.extractall('./data/')
print('Successfully extracted the audio zip file')
with ZipFile(metadata_file, 'r') as zip_ref:
zip_ref.extractall('./data/')
print('Successfully extracted the metadata zip file')
os.remove(metadata_file)
os.remove(audio_file)
print('removed the zip files')
print('Generating data files in audio4analysis. This may take a few minutes')
generate_data_files()
if download_emovo:
emovo_file = wget.download('https://drive.google.com/u/0/uc?id=1SUtaKeA-LYnKaD3qv87Y5wYgihJiNJAo&export=download&confirm=1')
print('Downloaded emovo zip')
print('Extracting the emovo zip file. This may take a few minutes')
with ZipFile(emovo_file, 'r') as zip_ref:
zip_ref.extractall('./data/')
os.remove(emovo_file)
process_emovo_data('./data/EMOVO/')
print('processed emovo data files to generate a test.csv')
def process_emovo_data(emovo_dir):
import torchaudio
actors = ['f1', 'f2', 'f3', 'm1', 'm2', 'm3']
emotions = {
'dis': 'disgust',
'gio': 'joy',
'neu': 'neutrality',
'pau': 'fear',
'rab': 'anger',
'sor': 'surprise',
'tri': 'sadness'
}
data = []
for actor in actors:
directory = os.path.join(emovo_dir,actor)
for f in os.listdir(directory):
with open(os.path.join(directory, f)) as file:
letter = actor[0]
#print(letter)
gender = 'male' if bool(str(actor[0])=="m") else 'female' #no other's in this dataset
emotion = emotions[f[0:3]]
person = 3*(actor[0]=='m')+int(actor[1])
path = os.path.join(emovo_dir,f'{actor}/{f}')
data.append({
'gender': gender,
'emotion': emotion,
'actor': str(person),
'path': path
})
df = pd.DataFrame(data)
df.to_csv(f"{emovo_dir}/metadata.csv", sep="\t", encoding="utf-8", index=False)
def generate_data_files():
users_df = pd.read_csv("data/metadata/users.csv")
samples_df = pd.read_csv("data/metadata/samples.csv")
samples_df['gender'] = None
samples_df['age'] = None
samples_df['new_file_name'] = None
samples_df['path'] = None
samples_df['class_id'] = None
for index, sample in samples_df.iterrows():
actor = sample['actor']
file_name = sample['file_name']
emotion_expressed = sample['emotion_expressed']
age = users_df[users_df['username'] == actor]['age'].values[0]
gender = users_df[users_df['username'] == actor]['gender'].values[0]
new_file_name = gender + '____' + actor + '____' + emotion_expressed + '____' + file_name
samples_df.iloc[index, samples_df.columns.get_loc('age')] = age
samples_df.iloc[index, samples_df.columns.get_loc('gender')] = gender
samples_df.iloc[index, samples_df.columns.get_loc('new_file_name')] = new_file_name
#shutil.copyfile(f'data/audio/{file_name}', f'data/audio4analysis/{new_file_name}')
samples_df.iloc[index, samples_df.columns.get_loc('path')] = f'data/audio4analysis/{new_file_name}'
samples_df.iloc[index, samples_df.columns.get_loc('class_id')] = emotion_expressed
samples_df.to_csv('data/audio4analysis/metadata.csv', sep="\t", encoding="utf-8", index=False)
def main():
parser=argparse.ArgumentParser()
parser.add_argument("--only_emovo", type=int, help="Download only emovo, uses 0 or 1")
parser.add_argument("--include_emovo", type=int, help="Download all data including emovo, uses 0 or 1")
args=parser.parse_args()
if args.include_emovo == 1:
run_download_data(True, True)
elif args.only_emovo == 1:
run_download_data(False, True)
else:
run_download_data(True, False)
if args.only_emovo and args.only_emovo != 1 and args.only_emovo != 0:
print('Expected only 0 and 1 for --only_emovo')
if args.include_emovo and args.include_emovo != 1 and args.include_emovo != 0:
print('Expected only 0 and 1 for --include_emovo')
if __name__=="__main__":
main()