-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluationMetric.py
134 lines (110 loc) · 4.28 KB
/
evaluationMetric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# coding=utf-8
import numpy as np
class EvaluationMetric:
@staticmethod
def getJaccardIndex(score, label, threshold = 0.5):
'''
预测为正的标签和真实标签之间交集除以他们之间的并集
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:param threshold: 阈值,默认0.5
:return: 计算出来的Jaccard index
'''
tempScore = score.copy()
tempScore[tempScore < threshold] = 0.0
tempScore[tempScore >= threshold] = 1.0
intersection = tempScore * label # 计算预测结果和真实标签的交集
union = tempScore + label # 计算预测结果和真实标签的并集
union[union > 1] = 1 # 将交集的元素置为1
jaccardIndex = np.mean(intersection.sum(axis = 1) / union.sum(axis = 1))
return jaccardIndex
@staticmethod
def getAveragePrecision(score, label):
'''
按照概率值从大到小排序,计算每个样本中每个真实标签之前的真实标签概率个数,最后求平均
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:return: 计算出来的Average Precision
'''
sortArg = np.argsort(-score, axis = 1) # 概率值从大到小排序,得到下标
count4AllPre = 0.0 # 最终结果的叠加值
for ind4Row, row in enumerate(sortArg):
count4EachData, count4PreEachLabel = 0.0, 0.0
for ind4Col, col in enumerate(row):
if label[ind4Row, col] == 1:
count4PreEachLabel += 1
count4EachData += count4PreEachLabel / (ind4Col + 1)
count4AllPre += count4EachData / np.sum(label[ind4Row])
averagePrecision = count4AllPre / sortArg.shape[0]
return averagePrecision
@staticmethod
def getCoverage(score, label):
'''
按照概率值从大到小排序,计算概率值排序最靠后的真实标签的排序平均值
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:return: 计算出来的Coverage
'''
# 对概率进行从小到大排序,得到下标,这里从小到大是为了倒序查找提高效率
sortArg = np.argsort(score, axis = 1)
coverage = 0.0
for ind4Row, row in enumerate(sortArg):
for ind4Col, col in enumerate(row):
if label[ind4Row, col] == 1:
coverage += score.shape[1] - ind4Col - 1
break
coverage = coverage / score.shape[0]
return coverage
@staticmethod
def getOneError(score, label):
'''
预测的概率值最大的标签不在真实标签集中的数量
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:return: 计算出来的One-Error
'''
oneError = 1 - np.mean(
label[range(len(score)),
np.argmax(score, axis = 1)])
return oneError
@staticmethod
def getHammingLoss(score, label, threshold = 0.5):
'''
计算预测错误的标签占标签总数的比例
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:param threshold: 阈值,默认0.5
:return: 计算出来的Hamming Loss
'''
tempScore = score.copy()
tempLabel = label.copy()
tempScore[tempScore < threshold] = 0
tempScore[tempScore >= threshold] = 1
hammingLoss = np.mean(
np.count_nonzero(tempScore != tempLabel, axis = 1) / \
float(tempScore.shape[1]))
return hammingLoss
@staticmethod
def getRankingLoss(score, label):
'''
相关标签集合与不相关标签集合进行两两比较,
然后统计相关标签的预测可能性比不相关标签额预测可能性要小的次数
:param score: 预测概率矩阵
:param label: 真实标签矩阵
:return: 计算出来的Ranking Loss
'''
tempScore = score.copy()
tempLabel = label.copy()
rankingLoss = 0
for ind4Row, row in enumerate(tempLabel):
ind4Positives = np.where(row == 1)[0] # 正样本所在的下标
ind4Negatives = np.where(row == 0)[0] # 负样本所在的下标
count4EachData = 0
for ind4Positive in ind4Positives:
for ind4Negative in ind4Negatives:
if tempScore[ind4Row, ind4Positive] < tempScore[ind4Row, ind4Negative]:
count4EachData += 1
rankingLoss += count4EachData / \
float(ind4Positives.shape[0] * ind4Negatives.shape[0])
rankingLoss /= tempLabel.shape[0]
return rankingLoss