-
Notifications
You must be signed in to change notification settings - Fork 2
/
ratio_gen.py
56 lines (49 loc) · 1.69 KB
/
ratio_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
import os
import sys
import pandas as pd
import zipfile
from multiprocessing import Pool
def data_collector(values):
data = {}
os.chdir(values[0])
zipper = zipfile.ZipFile(f'{values[1].split(".")[0]}.zip',"w")
zipper.write(values[1],compress_type=zipfile.ZIP_DEFLATED)
zipper.close()
data['name'] = values[1].split(".")[0]
data['size'] = os.path.getsize(values[1])
data['csize'] = os.path.getsize(f'{values[1].split(".")[0]}.zip')
data['type'] = values[1].split(".")[1]
os.remove(f'{values[1].split(".")[0]}.zip')
return data
if __name__ == '__main__':
o_path = os.getcwd()
byt_path = os.path.join(o_path, "Dataset", "bytes")
asm_path = os.path.join(o_path,"Dataset", "asm")
values = [[byt_path,i] for i in os.listdir(byt_path) if ".bytes" in i]
values = values + [[asm_path,i] for i in os.listdir(asm_path) if ".asm" in i]
df = 0;struct_dat = []
with Pool(4) as p:
df = p.map(data_collector, values)
bytes_data = [i for i in df if i['type']=="bytes"]
asm_data = [i for i in df if i['type']=="asm"]
for i in range(len(bytes_data)):
ind_data = {}
b = bytes_data[i]
a = asm_data[i]
if b['name'] != a['name']:
print("Error")
sys.exit(1)
ind_data['name'] = b['name']
ind_data["ratio(bytes/asm)"] = b['size']/a['size']
ind_data["ratio(c_bytes/c_asm)"] = b['csize']/a['csize']
ind_data["size(bytes)"] = b['size']
ind_data["size(c_bytes)"] = b['csize']
ind_data["size(asm)"] = a['size']
ind_data["size(c_asm)"] = a['csize']
struct_dat.append(ind_data)
os.chdir(o_path)
frame = pd.DataFrame(struct_dat)
frame = frame.set_index('name')
print(frame)
frame.to_csv(os.path.join("results",'ratio_data.csv'), sep = ',', encoding = 'utf-8', index = True)