-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsym_reg.py
104 lines (91 loc) · 3.94 KB
/
sym_reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pickle
from pysr import PySRRegressor
import itertools
from tqdm import tqdm
from utils import get_scalings, get_naming_str, sublist_idx
def symbolic_regression(inp, x_vals, y_vals):
'''
ARGUMENTS
---------
inp: Info object containing input parameter specifications
x_vals: list of points, where each point contains value of Acomp1, Acomp2, etc.
y_vals: Clpq evaluated at scaled points / Clpq evaluated at all unscaled points
RETURNS
-------
sympy expression of best fit model
'''
model = PySRRegressor(
niterations = 50, # < Increase me for better results
ncycles_per_iteration = 1000,
progress = False,
maxsize = 12,
binary_operators = ["*", "+", "-", "/"],
unary_operators = ["exp", "square", "cube", "inv(x) = 1/x"],
extra_sympy_mappings = {"inv": lambda x: 1 / x},
elementwise_loss = "loss(prediction, target) = (prediction - target)^2",
verbosity = 0,
temp_equation_file = True,
tempdir = inp.output_dir,
delete_tempfiles = True
)
model.fit(x_vals, y_vals)
return model.sympy()
def call_fit(A_vec, expr):
'''
ARGUMENTS
---------
A_vec: list of [Acomp1, Acomp2, etc.] independent variables
expr: sympy expression of best fit involving parameters Acomp1, Acomp2, etc.
which map to x0, x1, etc., respectively
RETURNS
-------
numerical evaluation of expr at the point given by A_vec
'''
for i in range(len(A_vec)):
expr = expr.subs(f'x{i}', A_vec[i])
return expr
def get_parameter_dependence(inp, Clpq, HILC=False):
'''
ARGUMENTS
---------
inp: Info object containing input paramter specifications
Clpq: (Nsims_for_fits, Nscalings, 2**Ncomps, Ncomps, Ncomps, Nbins) ndarray
containing HILC/NILC map auto- and cross-spectra with different component scalings.
dim1: idx0 if "scaled" means maps are scaled according to scaling factor 0 from input,
idx1 if "scaled" means maps are scaled according to scaling factor 1 from input, etc. up to idx Nscalings
dim2: indices correspond to different combinations of scaled and unscaled components
HILC: Bool, set to True if computing paramter dependence for harmonic ILC, False if for needlet ILC
RETURNS
-------
best_fits: (Ncomps, Ncomps, Nbins) list
containing sympy expressions with best fits in terms of parameters
'''
print('\nRunning symbolic regression to get parameter dependence. This may take some time.', flush=True)
Ncomps = len(inp.comps)
Clpq_mean = np.mean(Clpq[:inp.Nsims_for_fits], axis=0)
comp_scalings = [list(i) for i in itertools.product([0, 1], repeat=Ncomps)]
best_fits = np.zeros((Ncomps, Ncomps, inp.Nbins)).tolist() #need list to store sympy expressions
scalings = get_scalings(inp)
pbar = tqdm(total=Ncomps**2*inp.Nbins)
for p in range(Ncomps):
for q in range(Ncomps):
for bin in range(inp.Nbins):
x_vals, y_vals = [], []
for s in scalings:
scaling_factor = (inp.scaling_factors[s[0]])**2
x = np.ones(Ncomps)
x[np.array(s[1:])==1] = scaling_factor
x_vals.append(x)
y_vals.append(Clpq_mean[s[0], sublist_idx(comp_scalings, s[1:]), p, q, bin]/Clpq_mean[0,0,p,q,bin])
best_fits[p][q][bin] = symbolic_regression(inp, x_vals, y_vals)
if inp.verbose: print(f'estimated parameter dependence for p,q,bin={p},{q},{bin}', flush=True)
pbar.update(1)
pbar.close()
if inp.save_files:
pipeline = 'HILC' if HILC else 'NILC'
naming_str = get_naming_str(inp, pipeline)
filename = f'{inp.output_dir}/data_vecs/best_fits_{naming_str}.p'
pickle.dump(best_fits, open(filename, 'wb'), protocol=4)
print(f'saved {filename}', flush=True)
return best_fits