-
Notifications
You must be signed in to change notification settings - Fork 564
/
Copy pathlogme.py
95 lines (81 loc) · 2.84 KB
/
logme.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
@author: Yong Liu
@contact: [email protected]
"""
import numpy as np
from numba import njit
__all__ = ['log_maximum_evidence']
def log_maximum_evidence(features: np.ndarray, targets: np.ndarray, regression=False, return_weights=False):
r"""
Log Maximum Evidence in `LogME: Practical Assessment of Pre-trained Models
for Transfer Learning (ICML 2021) <https://arxiv.org/pdf/2102.11005.pdf>`_.
Args:
features (np.ndarray): feature matrix from pre-trained model.
targets (np.ndarray): targets labels/values.
regression (bool, optional): whether to apply in regression setting. (Default: False)
return_weights (bool, optional): whether to return bayesian weight. (Default: False)
Shape:
- features: (N, F) with element in [0, :math:`C_t`) and feature dimension F, where :math:`C_t` denotes the number of target class
- targets: (N, ) or (N, C), with C regression-labels.
- weights: (F, :math:`C_t`).
- score: scalar.
"""
f = features.astype(np.float64)
y = targets
if regression:
y = targets.astype(np.float64)
fh = f
f = f.transpose()
D, N = f.shape
v, s, vh = np.linalg.svd(f @ fh, full_matrices=True)
evidences = []
weights = []
if regression:
C = y.shape[1]
for i in range(C):
y_ = y[:, i]
evidence, weight = each_evidence(y_, f, fh, v, s, vh, N, D)
evidences.append(evidence)
weights.append(weight)
else:
C = int(y.max() + 1)
for i in range(C):
y_ = (y == i).astype(np.float64)
evidence, weight = each_evidence(y_, f, fh, v, s, vh, N, D)
evidences.append(evidence)
weights.append(weight)
score = np.mean(evidences)
weights = np.vstack(weights)
if return_weights:
return score, weights
else:
return score
@njit
def each_evidence(y_, f, fh, v, s, vh, N, D):
"""
compute the maximum evidence for each class
"""
alpha = 1.0
beta = 1.0
lam = alpha / beta
tmp = (vh @ (f @ y_))
for _ in range(11):
# should converge after at most 10 steps
# typically converge after two or three steps
gamma = (s / (s + lam)).sum()
m = v @ (tmp * beta / (alpha + beta * s))
alpha_de = (m * m).sum()
alpha = gamma / alpha_de
beta_de = ((y_ - fh @ m) ** 2).sum()
beta = (N - gamma) / beta_de
new_lam = alpha / beta
if np.abs(new_lam - lam) / lam < 0.01:
break
lam = new_lam
evidence = D / 2.0 * np.log(alpha) \
+ N / 2.0 * np.log(beta) \
- 0.5 * np.sum(np.log(alpha + beta * s)) \
- beta / 2.0 * beta_de \
- alpha / 2.0 * alpha_de \
- N / 2.0 * np.log(2 * np.pi)
return evidence / N, m