-
Notifications
You must be signed in to change notification settings - Fork 0
/
plaintext_sensitivity_analysis.py
124 lines (102 loc) · 4.81 KB
/
plaintext_sensitivity_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 24 19:11:52 2018
@author: [email protected]
"""
import re
import json
import hashlib
import matplotlib.pyplot as plt
import seaborn as sns
if __name__ == '__main__':
with open('pre_processed_table200d_2epoch.json', 'r', encoding='utf-8') as f: #加载中文
wordDict = json.load(f)
with open('pre_processed_table_ks.json', 'r', encoding='utf-8') as f: #加载英文
wordDict1 = json.load(f)
similarList = []
testword1 = '群众'
testword2 = '男人'
testword3 = '女人'
testwords1 = ['大众','万众','民众','公众','众生','千夫']
testwords2 = ['男人家','爱人','女婿','先生','壮汉','汉','汉子','士','人夫','男士','老公',
'那口子','官人','丈夫','男子汉','男子','男儿','当家的','光身汉','须眉','夫']
testwords3 = ['妇道','太太','妇','老婆','娘','娘子','女人家','红装','妻子','女儿','媳妇儿',
'老婆子','内','家','婆姨','妻','女性','巾帼','婆娘','家庭妇女','娘儿们','女子',
'小娘子','妻妾','老小','娘子军','农妇','女郎','才女','妻室','妇女','半边天',
'内助','贤内助','石女','爱妻','爱人','家里','妇人','女士','女','老伴','夫人']
testword4 = 'people'
testword5 = 'male'
testword6 = 'female'
testwords4 = ['persons','humans','individuals','folk','human beings','humanity','mankind','mortals','the human race']
testwords5 = ['masculine','manly','macho','virile','manlike']
testwords6 = ['Woman','girl','lady','lass','shelia','charlie','chook']
for word in testwords1:
if word not in wordDict:
testwords1.remove(word)
for word in testwords2:
if word not in wordDict:
testwords2.remove(word)
for word in testwords3:
if word not in wordDict:
testwords3.remove(word)
for word in testwords4:
if word not in wordDict1:
testwords4.remove(word)
for word in testwords5:
if word not in wordDict1:
testwords5.remove(word)
for word in testwords6:
if word not in wordDict1:
testwords6.remove(word)
m = hashlib.sha256()
m1 = hashlib.sha256()
#中文
temp = wordDict[testword1]
for word in testwords1:
temp1 = wordDict[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)
temp = wordDict[testword2]
for word in testwords2:
temp1 = wordDict[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)
temp = wordDict[testword3]
for word in testwords3:
temp1 = wordDict[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)
#英文
temp = wordDict1[testword4]
for word in testwords4:
temp1 = wordDict1[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)
temp = wordDict1[testword5]
for word in testwords5:
temp1 = wordDict1[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)
temp = wordDict1[testword6]
for word in testwords6:
temp1 = wordDict1[word]
m.update(''.join(temp).encode('utf-8'))
m1.update(''.join(temp1).encode('utf-8'))
similarNum = bin(int(m.hexdigest(),16) ^ int(m1.hexdigest(),16)).count('1') #检测对比两个hash值的相似度
similarProb = 1-similarNum/256
similarList.append(similarProb)