-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathanonymizer.py
123 lines (105 loc) · 3.48 KB
/
anonymizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
run clustering_based_k_anon with given parameters
"""
# !/usr/bin/env python
# coding=utf-8
from clustering_based_k_anon import clustering_based_k_anon
from utils.read_adult_data import read_data as read_adult
from utils.read_adult_data import read_tree as read_adult_tree
import sys
import copy
import pdb
import random
import cProfile
__DEBUG = True
def extend_result(val):
"""
separated with ',' if it is a list
"""
if isinstance(val, list):
return ','.join(val)
return val
def write_to_file(result):
"""
write the anonymized result to anonymized.data
"""
with open("result/anonymized.data", "w") as output:
for r in result:
output.write(';'.join(map(extend_result, r)) + '\n')
def get_result_one(att_trees, data, type_alg, k=10):
"run clustering_based_k_anon for one time, with k=10"
print("K=%d" % k)
data_back = copy.deepcopy(data)
result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
write_to_file(result)
data = copy.deepcopy(data_back)
print("NCP %0.2f" % eval_result[0] + "%")
print("Running time %0.2f" % eval_result[1] + "seconds")
def get_result_n(att_trees, data, type_alg, k=10, n=10):
"""
run clustering_based_k_anon for n time, with k=10
"""
print("K=%d" % k)
data_back = copy.deepcopy(data)
n_ncp = 0.0
n_time = 0.0
for i in range(n):
_, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
data = copy.deepcopy(data_back)
n_ncp += eval_result[0]
n_time += eval_result[1]
n_ncp = n_ncp / n
n_time = n_ncp / n
print("Run %d times" % n)
print("NCP %0.2f" % n_ncp + "%")
print("Running time %0.2f" % n_time + " seconds")
def get_result_k(att_trees, data, type_alg):
"""
change k, whle fixing QD and size of dataset
"""
data_back = copy.deepcopy(data)
all_ncp = []
all_rtime = []
# for k in range(50,100,50):
for k in [50,100,150,200,250,300,350,400,450,500]:
print('#' * 30)
print("K=%d" % k)
_, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
data = copy.deepcopy(data_back)
print("NCP %0.2f" % eval_result[0] + "%")
all_ncp.append(round(eval_result[0], 2))
print("Running time %0.2f" % eval_result[1] + "seconds")
all_rtime.append(round(eval_result[1], 2))
print("All NCP", all_ncp)
print("All Running time", all_rtime)
if __name__ == '__main__':
print("Using Adult Dataset")
DATA = read_adult()
ATT_TREES = read_adult_tree()
TYPE_ALG = 'oka'
FLAG = ''
try:
FLAG = sys.argv[1]
except IndexError:
pass
if FLAG == 'k':
get_result_k(ATT_TREES, DATA, TYPE_ALG)
elif FLAG == 'n':
get_result_n(ATT_TREES, DATA, TYPE_ALG)
elif FLAG == '':
if __DEBUG:
cProfile.run('get_result_one(ATT_TREES, DATA, TYPE_ALG)')
else:
get_result_one(ATT_TREES, DATA, TYPE_ALG)
else:
try:
INPUT_K = int(FLAG)
get_result_one(ATT_TREES, DATA, TYPE_ALG, INPUT_K)
except ValueError:
print("Usage: python anonymizer [k | n] number of iterations")
print("k: varying k")
print("example: python anonymizer.py 200")
print("example: python anonymizer.py n 10")
# anonymized dataset is stored in result
print('Anonymized data is stored at result/anonymized.data.')
print("Finish Cluster based K-Anon!!")