Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

surrogate model #77

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
needs: [ lint_and_type_check ]
strategy:
matrix:
python-version: [ 3.6, 3.7, 3.8, 3.9 ]
python-version: [ 3.7, 3.8, 3.9 ]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,5 @@ catalog-*.xml
*.ttl
*.sqlite3
*.sqlite3-journal
*.dat
*.owl
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ repos:
args: ['--maxkb=100']
- id: check-yaml

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.910
hooks:
- id: mypy
args: ['--install-types', '--non-interactive', '--ignore-missing-imports']
#- repo: https://github.com/pre-commit/mirrors-mypy
# rev: v0.910
# hooks:
# - id: mypy
# args: ['--install-types', '--non-interactive', '--ignore-missing-imports']

- repo: https://github.com/psf/black
rev: 22.3.0
Expand Down
339 changes: 339 additions & 0 deletions examples/example_surrogate_hartman6D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
"""
Example of a Bayesian parameter estimation problem using surrogate
modeling with probeye.

The Hartmann test function f:[0, 1]^6 -> R^1 is used to simulate a
physical model. The last two dimensions are considered as space and
time coordinates, while the first four dimensions are taken as
latent variables to be inferred. Measurements are generated by
adding I.i.d. Gaussian noise to samples from this function.

Notes:
* A specific version of `harlow` is required to run this example, which can
be found here:
https://github.com/TNO/harlow/tree/implement-data-container
"""

# =========================================================================
# Imports
# =========================================================================

# third party imports
import numpy as np

# local imports (problem definition)
from probeye.definition.inverse_problem import InverseProblem
from probeye.definition.forward_model import ForwardModelBase
from probeye.definition.sensor import Sensor
from probeye.definition.likelihood_model import GaussianLikelihoodModel
from probeye.inference.emcee.solver import EmceeSolver
from probeye.definition.distribution import Uniform
from probeye.definition.surrogate_model import HarlowModelFactory

# local imports (inference data post-processing)
from probeye.postprocessing.sampling_plots import create_pair_plot
from probeye.postprocessing.sampling_plots import create_posterior_plot

# Surrogate model imports
from harlow.sampling import LatinHypercube, FuzzyLolaVoronoi
from harlow.surrogating import (
Surrogate,
ModelListGaussianProcess,
VanillaGaussianProcess,
)
from harlow.utils.transforms import (
ExpandDims,
TensorTransform,
ChainTransform,
Identity,
)

import torch
from botorch.test_functions.synthetic import Hartmann
from matplotlib import pyplot as plt

# =========================================================================
# General settings
# =========================================================================

plot = True

# Emcee settings
n_steps = 1_000
n_init_steps = 200
n_walkers = 20

# Sampler settings
n_init = 250
n_iter = 0
n_point_per_iter = 2

# Surrogate settings
N_train_iter = 50
show_progress = True

# =========================================================================
# Define parameters
# =========================================================================

# Ground truth
X_true = np.array([0.5, 0.5, 0.5, 0.5])

# Bounds for function defined on unit hypercube
X_low = 0.0
X_high = 1.0

# Ground truth and prior for measurement uncertainty std. dev.
std_true = 0.05
std_low = 0.0
std_high = 1.0

# =========================================================================
# Define physical model
# =========================================================================

# Number of sensors and number of points in timeseries
Nx = 3
Nt = 5

# Sensor names and positions
sensor_names = ["S" + str(i + 1) for i in range(Nx)]
x_vec = np.linspace(0, 1, Nx)
t_vec = np.linspace(0, 1, Nt)

isensor = Sensor("t")
osensor_list = [
Sensor(sensor_names[i], x=float(x_vec[i]), std_model="sigma") for i in range(Nx)
]

# =========================================================================
# Define forward model
# =========================================================================

# Initialize model
expensive_model = Hartmann(noise_std=0.00001)


class SyntheticModel(ForwardModelBase):
def interface(self):
self.parameters = ["X" + str(i + 1) for i in range(4)]
self.input_sensors = isensor
self.output_sensors = osensor_list

def response(self, inp: dict) -> dict:

# Arange input vector
params = np.tile([inp["X" + str(i + 1)] for i in range(4)], (Nx * Nt, 1))
xt = np.array(np.meshgrid(x_vec, t_vec)).T.reshape(-1, 2)
X = torch.tensor(np.hstack((params, xt)))

# Evaluate function and arange output on grid
f = np.array(expensive_model(X)).reshape(Nx, Nt)

# Store prediction as dict
response = dict()
for idx_x, os in enumerate(self.output_sensors):
response[os.name] = f[idx_x, :]
return response


# =========================================================================
# Define inference problem
# =========================================================================
problem = InverseProblem("Parameter estimation using surrogate model")

# Parameters of the Hartmann function
for i in range(4):
problem.add_parameter(
"X" + str(i + 1),
"model",
prior=Uniform(low=X_low, high=X_high),
info="Parameter of the 6D Hartmann function",
tex=r"$X_{{{}}}$".format(i + 1),
)

# Noise std. dev.
problem.add_parameter(
"sigma",
"likelihood",
prior=Uniform(low=std_low, high=std_high),
info="Std. dev. of zero-mean noise model",
tex=r"$\sigma$",
)

# add the forward model to the problem
forward_model = SyntheticModel("ExpensiveModel")

# =========================================================================
# Add test data to the inference problem
# =========================================================================
def generate_data():
inp = {"X" + str(idx + 1): X_i for idx, X_i in enumerate(X_true)}
sensors = forward_model(inp)
for sname, svals in sensors.items():
sensors[sname] = list(
np.array(svals) + np.random.normal(loc=0.0, scale=std_true, size=Nt)
)

# To avoid errors when `Nt = 1`
if len(t_vec) == 1:
sensors[isensor.name] = float(t_vec[0])
else:
sensors[isensor.name] = t_vec

# Add experiments
problem.add_experiment("TestSeriesFull", sensor_data=sensors)
problem.add_experiment("TestSeriesSurrogate", sensor_data=sensors)


# Generate data and add expensive forward model
generate_data()
problem.add_forward_model(forward_model, experiments="TestSeriesFull")

# =========================================================================
# Create surrogate model
# =========================================================================
n_params = 4
list_params = [[i for i in range(n_params)]] * len(sensor_names) * len(t_vec)

surrogate_kwargs = {
"training_max_iter": N_train_iter,
joergfunger marked this conversation as resolved.
Show resolved Hide resolved
"list_params": list_params,
"show_progress": True,
"silence_warnings": True,
"fast_pred_var": True,
"input_transform": Identity,
"output_transform": Identity,
}

# Generate surrogate class using model factory
model_factory = HarlowModelFactory(
problem,
forward_model,
FuzzyLolaVoronoi,
VanillaGaussianProcess,
**surrogate_kwargs,
)

# Sample
model_factory.sample(
n_iter=n_iter,
n_initial_point=n_init,
n_new_point_per_iteration=n_point_per_iter,
)

# Get forward model instance
forward_surrogate_model = model_factory.get_harlow_model("FastModel")

# =========================================================================
# Add forward models
# =========================================================================

# Add surrogate model to forward models
problem.add_forward_model(forward_surrogate_model, experiments="TestSeriesSurrogate")

# add the likelihood models to the problem
for osensor in osensor_list:
problem.add_likelihood_model(
GaussianLikelihoodModel(
experiment_name="TestSeriesSurrogate",
model_error="additive",
)
)

# ====================================================================
# Plot surrogate vs. FE model prediction
# ====================================================================

# Physical model prediction
arr_x = np.linspace(X_low, X_high, 100)
inp = {"X" + str(idx + 1): X_i for idx, X_i in enumerate(X_true)}

# Initialize zero arrays to store results
y_true = np.zeros((len(arr_x), len(sensor_names) * len(t_vec)))
y_pred = np.zeros((len(arr_x), len(sensor_names) * len(t_vec)))

# Evaluate physical model for each input vector
for idx_xi, xi in enumerate(arr_x):
inp["X1"] = xi
res_true = forward_model.response(inp)
res_pred = forward_surrogate_model.response(inp)
sensor_out_true = []
sensor_out_pred = []
for idx_os, os in enumerate(osensor_list):
sensor_out_true.append(res_true[os.name])
sensor_out_pred.append(res_pred[os.name])
y_true[idx_xi, :] = np.ravel(sensor_out_true)
y_pred[idx_xi, :] = np.ravel(sensor_out_pred)

# Plot
nrows = 3
ncols = int(np.ceil(len(sensor_names) * len(t_vec) / 3))
f, axes = plt.subplots(nrows, ncols, sharex=True, figsize=(3 * ncols, 3 * nrows))
for j in range(len(sensor_names) * len(t_vec)):
ax_i = axes.ravel()[j]
grid_idx = np.unravel_index(j, (nrows, ncols))
ax_i.plot(arr_x, y_true[:, j], color="blue", label="True")
ax_i.plot(arr_x, y_pred[:, j], color="red", linestyle="dashed", label="Surrogate")
ax_i.set_title(
f"Sensor: {str(sensor_names[grid_idx[0]]) + '_' + str(t_vec[grid_idx[1]])}"
)

axes = np.atleast_2d(axes)
[ax_i.set_xlabel(r"$X_1$") for ax_i in axes[-1, :]]
axes[0, 0].legend()
plt.show()


# =========================================================================
# Add noise models
# =========================================================================
# Problem overview
problem.info()

true_values = {
"X1": X_true[0],
"X2": X_true[1],
"X3": X_true[2],
"X4": X_true[3],
"sigma": std_true,
}

# =========================================================================
# Initialize and run solver
# =========================================================================

solver = EmceeSolver(
problem,
show_progress=True,
)


inference_data = solver.run(
n_walkers=n_walkers, n_steps=n_steps, n_initial_steps=n_init_steps, vectorize=False
)

# =========================================================================
# Plotting
# =========================================================================
create_pair_plot(
inference_data,
solver.problem,
show=False,
true_values=true_values,
title="Joint posterior",
)

create_posterior_plot(
inference_data,
solver.problem,
show=False,
true_values=true_values,
title="Marginal posteriors",
)


if plot:
plt.show() # shows all plots at once due to 'show=False' above
else:
plt.close("all")
1 change: 1 addition & 0 deletions probeye/definition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from probeye.definition import inverse_problem
from probeye.definition import inference_problem
from probeye.definition import forward_model
from probeye.definition import surrogate_model
from probeye.definition import likelihood_model
from probeye.definition import distribution
from probeye.definition import parameter
Expand Down
Loading