diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 41f09c72c..39ccaab09 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/ambv/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/pycqa/isort @@ -10,6 +10,7 @@ repos: args: - "--profile=black" - "--filter-files" + - "--project=autora" - repo: https://github.com/pycqa/flake8 rev: 5.0.4 hooks: @@ -19,7 +20,7 @@ repos: - "--extend-ignore=E203" - "--per-file-ignores=__init__.py:F401" - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v0.971" + rev: "v0.991" hooks: - id: mypy additional_dependencies: [types-requests] diff --git a/autora/experimentalist/filter.py b/autora/experimentalist/filter.py index ff2bc6873..58a9139cd 100644 --- a/autora/experimentalist/filter.py +++ b/autora/experimentalist/filter.py @@ -1,2 +1,128 @@ +from enum import Enum +from typing import Callable, Iterable, Tuple + +import numpy as np + + def weber_filter(values): return filter(lambda s: s[0] <= s[1], values) + + +def train_test_filter( + seed: int = 180, train_p: float = 0.5 +) -> Tuple[Callable[[Iterable], Iterable], Callable[[Iterable], Iterable]]: + """ + A pipeline filter which pseudorandomly assigns values from the input into "train" or "test" + groups. This is particularly useful when working with streams of data of potentially + unbounded length. + + This isn't a great method for small datasets, as it doesn't guarantee producing training + and test sets which are as close as possible to the specified desired proportions. + Consider using the scikit-learn `train_test_split` for cases where it's practical to + enumerate the full dataset in advance. + + Args: + seed: random number generator seeding value + train_p: proportion of data which go into the training set. A float between 0 and 1. + + Returns: + a tuple of callables `(train_filter, test_filter)` which split the input data + into two complementary streams. + + + Examples: + We can create complementary train and test filters using the function: + >>> train_filter, test_filter = train_test_filter(train_p=0.6, seed=180) + + The `train_filter` generates a sequence of ~60% of the input list – + in this case, 15 of 20 datapoints. + Note that the correct split would be 12 of 20 data points. + Again, for data with bounded length it is advisable + to use scikit-learn `train_test_split` instead. + >>> list(train_filter(range(20))) + [0, 2, 3, 4, 5, 6, 9, 10, 11, 12, 15, 16, 17, 18, 19] + + When we run the `test_filter`, it fills in the gaps, giving us the remaining 5 values: + >>> list(test_filter(range(20))) + [1, 7, 8, 13, 14] + + We can continue to generate new values for as long as we like using the same filter and the + continuation of the input range: + >>> list(train_filter(range(20, 40))) + [20, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39] + + ... and some more. + >>> list(train_filter(range(40, 50))) + [41, 42, 44, 45, 46, 49] + + As the number of samples grows, the fraction in the train and test sets + will approach `train_p` and `1 - train_p`. + + The test_filter fills in the gaps again. + >>> list(test_filter(range(20, 30))) + [21, 24, 25, 26] + + If you rerun the *same* test_filter on a fresh range, then the results will be different + to the first time around: + >>> list(test_filter(range(20))) + [5, 10, 13, 17, 18] + + ... but if you regenerate the test_filter, it'll reproduce the original sequence + >>> _, test_filter_regenerated = train_test_filter(train_p=0.6, seed=180) + >>> list(test_filter_regenerated(range(20))) + [1, 7, 8, 13, 14] + + It also works on tuple-valued lists: + >>> from itertools import product + >>> train_filter_tuple, test_filter_tuple = train_test_filter(train_p=0.3, seed=42) + >>> list(test_filter_tuple(product(["a", "b"], [1, 2, 3]))) + [('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 3)] + + >>> list(train_filter_tuple(product(["a","b"], [1,2,3]))) + [('b', 2)] + + >>> from itertools import count, takewhile + >>> train_filter_unbounded, test_filter_unbounded = train_test_filter(train_p=0.5, seed=21) + + >>> list(takewhile(lambda s: s < 90, count(79))) + [79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] + + >>> train_pool = train_filter_unbounded(count(79)) + >>> list(takewhile(lambda s: s < 90, train_pool)) + [82, 85, 86, 89] + + >>> test_pool = test_filter_unbounded(count(79)) + >>> list(takewhile(lambda s: s < 90, test_pool)) + [79, 80, 81, 83, 84, 87, 88] + + >>> list(takewhile(lambda s: s < 110, test_pool)) + [91, 93, 94, 97, 100, 105, 106, 109] + + """ + + test_p = 1 - train_p + + _TrainTest = Enum("_TrainTest", ["train", "test"]) + + def train_test_stream(): + """Generates a pseudorandom stream of _TrainTest.train and _TrainTest.test.""" + rng = np.random.default_rng(seed) + while True: + yield rng.choice([_TrainTest.train, _TrainTest.test], p=(train_p, test_p)) + + def _factory(allow): + """Factory to make complementary generators which split their input + corresponding to the values of the pseudorandom train_test_stream.""" + _stream = train_test_stream() + + def _generator(values): + """Generator which yields items from the `values` depending on + whether the corresponding item from the `_stream` + matches the `allow` parameter.""" + for v, train_test in zip(values, _stream): + if train_test == allow: + yield v + + return _generator + + return _factory(_TrainTest.train), _factory(_TrainTest.test) diff --git a/autora/experimentalist/sampler/model_disagreement.py b/autora/experimentalist/sampler/model_disagreement.py new file mode 100644 index 000000000..20a9b805f --- /dev/null +++ b/autora/experimentalist/sampler/model_disagreement.py @@ -0,0 +1,66 @@ +import itertools +from typing import Iterable, List + +import numpy as np + + +def model_disagreement_sampler(X: np.array, models: List, num_samples: int = 1): + """ + A sampler that returns selected samples for independent variables + for which the models disagree the most in terms of their predictions. + + Args: + X: pool of IV conditions to evaluate in terms of model disagreement + models: List of Scikit-learn (regression or classification) models to compare + num_samples: number of samples to select + + Returns: Sampled pool + """ + + if isinstance(X, Iterable): + X = np.array(list(X)) + + X_predict = np.array(X) + if len(X_predict.shape) == 1: + X_predict = X_predict.reshape(-1, 1) + + model_disagreement = list() + + # collect diagreements for each model pair + for model_a, model_b in itertools.combinations(models, 2): + + # determine the prediction method + if hasattr(model_a, "predict_proba") and hasattr(model_b, "predict_proba"): + model_a_predict = model_a.predict_proba + model_b_predict = model_b.predict_proba + elif hasattr(model_a, "predict") and hasattr(model_b, "predict"): + model_a_predict = model_a.predict + model_b_predict = model_b.predict + else: + raise AttributeError( + "Models must both have `predict_proba` or `predict` method." + ) + + # get predictions from both models + y_a = model_a_predict(X_predict) + y_b = model_b_predict(X_predict) + + assert y_a.shape == y_b.shape, "Models must have same output shape." + + # determine the disagreement between the two models in terms of mean-squared error + if len(y_a.shape) == 1: + disagreement = (y_a - y_b) ** 2 + else: + disagreement = np.mean((y_a - y_b) ** 2, axis=1) + + model_disagreement.append(disagreement) + + assert len(model_disagreement) >= 1, "No disagreements to compare." + + # sum up all model disagreements + summed_disagreement = np.sum(model_disagreement, axis=0) + + # sort the summed disagreements and select the top n + idx = (-summed_disagreement).argsort()[:num_samples] + + return X[idx] diff --git a/autora/experimentalist/sampler/poppernet.py b/autora/experimentalist/sampler/poppernet.py new file mode 100644 index 000000000..9508deaad --- /dev/null +++ b/autora/experimentalist/sampler/poppernet.py @@ -0,0 +1,340 @@ +from typing import Iterable, Optional, Tuple, cast + +import numpy as np +import torch +from torch import nn +from torch.autograd import Variable + +from autora.variable import ValueType, VariableCollection + + +def poppernet_pooler( + model, + x_train: np.ndarray, + y_train: np.ndarray, + metadata: VariableCollection, + num_samples: int = 100, + training_epochs: int = 1000, + optimization_epochs: int = 1000, + training_lr: float = 1e-3, + optimization_lr: float = 1e-3, + mse_scale: float = 1, + limit_offset: float = 10**-10, + limit_repulsion: float = 0, +): + """ + A pooler that generates samples for independent variables with the objective of maximizing the + (approximated) loss of the model. The samples are generated by first training a neural network + to approximate the loss of a model for all patterns in the training data. Once trained, the + network is then inverted to generate samples that maximize the approximated loss of the model. + + Note: If the pooler returns samples that are close to the boundaries of the variable space, + then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001). + + Args: + model: Scikit-learn model, could be either a classification or regression model + x_train: data that the model was trained on + y_train: labels that the model was trained on + metadata: Meta-data about the dependent and independent variables + num_samples: number of samples to return + training_epochs: number of epochs to train the popper network for approximating the + error fo the model + optimization_epochs: number of epochs to optimize the samples based on the trained + popper network + training_lr: learning rate for training the popper network + optimization_lr: learning rate for optimizing the samples + mse_scale: scale factor for the MSE loss + limit_offset: a limited offset to prevent the samples from being too close to the value + boundaries + limit_repulsion: a limited repulsion to prevent the samples from being too close to the + allowed value boundaries + verbose: print out the prediction of the popper network as well as its training loss + + Returns: Sampled pool + + """ + + # format input + + x_train = np.array(x_train) + if len(x_train.shape) == 1: + x_train = x_train.reshape(-1, 1) + + x = np.empty([num_samples, x_train.shape[1]]) + + y_train = np.array(y_train) + if len(y_train.shape) == 1: + y_train = y_train.reshape(-1, 1) + + if metadata.dependent_variables[0].type == ValueType.CLASS: + # find all unique values in y_train + num_classes = len(np.unique(y_train)) + y_train = class_to_onehot(y_train, n_classes=num_classes) + + x_train_tensor = torch.from_numpy(x_train).float() + + # create list of IV limits + ivs = metadata.independent_variables + iv_limit_list = list() + for iv in ivs: + if hasattr(iv, "value_range"): + value_range = cast(Tuple, iv.value_range) + lower_bound = value_range[0] + upper_bound = value_range[1] + iv_limit_list.append(([lower_bound, upper_bound])) + + # get dimensions of input and output + n_input = len(metadata.independent_variables) + n_output = len(metadata.dependent_variables) + + # get input pattern for popper net + popper_input = Variable(torch.from_numpy(x_train), requires_grad=False).float() + + # get target pattern for popper net + model_predict = getattr(model, "predict_proba", None) + if callable(model_predict) is False: + model_predict = getattr(model, "predict", None) + + if callable(model_predict) is False or model_predict is None: + raise Exception("Model must have `predict` or `predict_proba` method.") + + model_prediction = model_predict(x_train) + + criterion = nn.MSELoss() + model_loss = (model_prediction - y_train) ** 2 * mse_scale + model_loss = np.mean(model_loss, axis=1) + model_loss = torch.from_numpy(model_loss).float() + popper_target = Variable(model_loss, requires_grad=False) + + # create the network + popper_net = PopperNet(n_input, n_output) + + # reformat input in case it is 1D + if len(popper_input.shape) == 1: + popper_input = popper_input.flatten() + popper_input = popper_input.reshape(-1, 1) + + # define the optimizer + popper_optimizer = torch.optim.Adam(popper_net.parameters(), lr=training_lr) + + # train the network + losses = [] + for epoch in range(training_epochs): + popper_prediction = popper_net(popper_input) + loss = criterion(popper_prediction, popper_target.reshape(-1, 1)) + popper_optimizer.zero_grad() + loss.backward() + popper_optimizer.step() + losses.append(loss.item()) + + # now that the popper network is trained we can sample new data points + # to sample data points we need to provide the popper network with an initial condition + # we will sample those initial conditions proportional to the loss of the current model + + # feed average model losses through softmax + # model_loss_avg= torch.from_numpy(np.mean(model_loss.detach().numpy(), axis=1)).float() + softmax_func = torch.nn.Softmax(dim=0) + probabilities = softmax_func(model_loss) + # sample data point in proportion to model loss + transform_category = torch.distributions.categorical.Categorical(probabilities) + + popper_net.freeze_weights() + + for condition in range(num_samples): + + index = transform_category.sample() + input_sample = torch.flatten(x_train_tensor[index, :]) + popper_input = Variable(input_sample, requires_grad=True) + + # invert the popper network to determine optimal experiment conditions + for optimization_epoch in range(optimization_epochs): + # feedforward pass on popper network + popper_prediction = popper_net(popper_input) + # compute gradient that maximizes output of popper network + # (i.e. predicted loss of original model) + popper_loss_optim = -popper_prediction + popper_loss_optim.backward() + # compute new input + # with torch.no_grad(): + # delta = -optimization_lr * popper_input.grad + # popper_input += -optimization_lr * popper_input.grad + # print(delta) + # popper_input.grad.zero_() + + with torch.no_grad(): + + # first add repulsion from variable limits + for idx in range(len(input_sample)): + iv_value = input_sample[idx] + iv_limits = iv_limit_list[idx] + dist_to_min = np.abs(iv_value - np.min(iv_limits)) + dist_to_max = np.abs(iv_value - np.max(iv_limits)) + repulsion_from_min = limit_repulsion / (dist_to_min**2) + repulsion_from_max = limit_repulsion / (dist_to_max**2) + iv_value_repulsed = ( + iv_value + repulsion_from_min - repulsion_from_max + ) + popper_input[idx] = iv_value_repulsed + + # now add gradient for theory loss maximization + delta = -optimization_lr * popper_input.grad + popper_input += delta + popper_input.grad.zero_() + + # finally, clip input variable from its limits + for idx in range(len(input_sample)): + iv_raw_value = input_sample[idx] + iv_limits = iv_limit_list[idx] + iv_clipped_value = np.min( + [iv_raw_value, np.max(iv_limits) - limit_offset] + ) + iv_clipped_value = np.max( + [ + iv_clipped_value, + np.min(iv_limits) + limit_offset, + ] + ) + popper_input[idx] = iv_clipped_value + + # add condition to new experiment sequence + for idx in range(len(input_sample)): + iv_limits = iv_limit_list[idx] + + # first clip value + iv_clipped_value = np.min([iv_raw_value, np.max(iv_limits) - limit_offset]) + iv_clipped_value = np.max( + [iv_clipped_value, np.min(iv_limits) + limit_offset] + ) + # make sure to convert variable to original scale + iv_clipped_scaled_value = iv_clipped_value + + x[condition, idx] = iv_clipped_scaled_value + + return x + + +def plot_popper_diagnostics(losses, popper_input, popper_prediction, popper_target): + print("Finished training Popper Network...") + import matplotlib.pyplot as plt + + if popper_input.shape[1] > 1: + plot_input = popper_input[:, 0] + plt.scatter(plot_input, popper_target.detach().numpy(), label="target") + plt.scatter(plot_input, popper_prediction.detach().numpy(), label="prediction") + else: + plot_input = popper_input + plt.plot(plot_input, popper_target.detach().numpy(), label="target") + plt.plot(plot_input, popper_prediction.detach().numpy(), label="prediction") + plt.xlabel("x") + plt.ylabel("y") + plt.legend() + plt.show() + plt.plot(losses) + plt.xlabel("epoch") + plt.ylabel("loss") + plt.show() + + +def nearest_values_sampler( + samples, + allowed_values, +): + """ + A sampler which returns the nearest values between the input samples and the allowed values, + without replacement. + + Args: + samples: input conditions + allowed_samples: allowed conditions to sample from + + Returns: + the nearest values from `allowed_samples` to the `samples` + + """ + + if isinstance(allowed_values, Iterable): + allowed_values = np.array(list(allowed_values)) + + if len(allowed_values.shape) == 1: + allowed_values = allowed_values.reshape(-1, 1) + + num_samples = samples.shape[0] + + if allowed_values.shape[0] <= num_samples: + raise Exception("More samples requested than samples available in the pool x.") + + x_new = np.empty((num_samples, allowed_values.shape[1])) + + # get index of row in x that is closest to each sample + for row, sample in enumerate(samples): + dist = np.linalg.norm(allowed_values - sample, axis=1) + idx = np.argmin(dist) + x_new[row, :] = allowed_values[idx, :] + allowed_values = np.delete(allowed_values, idx, axis=0) + + return x_new + + +# define the network +class PopperNet(nn.Module): + def __init__(self, n_input: torch.Tensor, n_output: torch.Tensor): + # Perform initialization of the pytorch superclass + super(PopperNet, self).__init__() + + # Define network layer dimensions + D_in, H1, H2, H3, D_out = [n_input, 64, 64, 64, n_output] + + # Define layer types + self.linear1 = nn.Linear(D_in, H1) + self.linear2 = nn.Linear(H1, H2) + self.linear3 = nn.Linear(H2, H3) + self.linear4 = nn.Linear(H3, D_out) + + def forward(self, x: torch.Tensor): + """ + This method defines the network layering and activation functions + """ + x = self.linear1(x) # hidden layer + x = torch.tanh(x) # activation function + + x = self.linear2(x) # hidden layer + x = torch.tanh(x) # activation function + + x = self.linear3(x) # hidden layer + x = torch.tanh(x) # activation function + + x = self.linear4(x) # output layer + + return x + + def freeze_weights(self): + for param in self.parameters(): + param.requires_grad = False + + +def class_to_onehot(y: np.array, n_classes: Optional[int] = None): + """Converts a class vector (integers) to binary class matrix. + + E.g. for use with categorical_crossentropy. + + # Arguments + y: class vector to be converted into a matrix + (integers from 0 to num_classes). + n_classes: total number of classes. + + # Returns + A binary matrix representation of the input. + """ + y = np.array(y, dtype="int") + input_shape = y.shape + if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: + input_shape = tuple(input_shape[:-1]) + y = y.ravel() + if not n_classes: + n_classes = np.max(y) + 1 + n = y.shape[0] + categorical = np.zeros((n, n_classes)) + categorical[np.arange(n), y] = 1 + output_shape = input_shape + (n_classes,) + categorical = np.reshape(categorical, output_shape) + return categorical diff --git a/autora/theorist/darts/architect.py b/autora/theorist/darts/architect.py index 63bc09f01..c7d723e29 100755 --- a/autora/theorist/darts/architect.py +++ b/autora/theorist/darts/architect.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np import torch import torch.nn.functional as F @@ -150,8 +152,8 @@ def step( target_valid: torch.Tensor, network_optimizer: torch.optim.Optimizer, unrolled: bool, - input_train: torch.Tensor = None, - target_train: torch.Tensor = None, + input_train: Optional[torch.Tensor] = None, + target_train: Optional[torch.Tensor] = None, eta: float = 1, ): """ diff --git a/autora/theorist/darts/model_search.py b/autora/theorist/darts/model_search.py index 490675912..738c39dc2 100755 --- a/autora/theorist/darts/model_search.py +++ b/autora/theorist/darts/model_search.py @@ -1,7 +1,7 @@ import random import warnings from enum import Enum -from typing import Callable, List, Literal, Sequence, Tuple +from typing import Callable, List, Literal, Optional, Sequence, Tuple import numpy as np import torch @@ -11,11 +11,11 @@ from autora.theorist.darts.fan_out import Fan_Out from autora.theorist.darts.operations import ( - OPS, PRIMITIVES, Genotype, get_operation_label, isiterable, + operation_factory, ) @@ -58,7 +58,7 @@ def __init__(self, primitives: Sequence[str] = PRIMITIVES): # loop through all the 8 primitive operations for primitive in primitives: # OPS returns an nn module for a given primitive (defines as a string) - op = OPS[primitive] + op = operation_factory(primitive) # add the operation self._ops.append(op) @@ -400,7 +400,9 @@ def arch_parameters(self) -> List: return self._arch_parameters # fixes architecture - def fix_architecture(self, switch: bool, new_weights: torch.Tensor = None): + def fix_architecture( + self, switch: bool, new_weights: Optional[torch.Tensor] = None + ): """ Freezes or unfreezes the architecture weights. diff --git a/autora/theorist/darts/operations.py b/autora/theorist/darts/operations.py index 69c9e5fd0..05603b04b 100755 --- a/autora/theorist/darts/operations.py +++ b/autora/theorist/darts/operations.py @@ -538,85 +538,115 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # defines all the operations. affine is turned off for cuda (optimization prposes) -OPS = { - "none": Zero(1), - "add": nn.Sequential(Identity()), - "subtract": nn.Sequential(NegIdentity()), - "mult": nn.Sequential( - nn.Linear(1, 1, bias=False), - ), - "linear": nn.Sequential(nn.Linear(1, 1, bias=True)), - "relu": nn.Sequential( - nn.ReLU(inplace=False), - ), - "linear_relu": nn.Sequential( - nn.Linear(1, 1, bias=True), - nn.ReLU(inplace=False), - ), - "logistic": nn.Sequential( - nn.Sigmoid(), - ), - "linear_logistic": nn.Sequential( - nn.Linear(1, 1, bias=True), - nn.Sigmoid(), - ), - "exp": nn.Sequential( - Exponential(), - ), - "linear_exp": nn.Sequential( - nn.Linear(1, 1, bias=True), - Exponential(), - ), - "cos": nn.Sequential( - Cosine(), - ), - "linear_cos": nn.Sequential( - nn.Linear(1, 1, bias=True), - Cosine(), - ), - "sin": nn.Sequential( - Sine(), - ), - "linear_sin": nn.Sequential( - nn.Linear(1, 1, bias=True), - Sine(), - ), - "tanh": nn.Sequential( - Tangens_Hyperbolicus(), - ), - "linear_tanh": nn.Sequential( - nn.Linear(1, 1, bias=True), - Tangens_Hyperbolicus(), - ), - "reciprocal": nn.Sequential( - MultInverse(), - ), - "linear_reciprocal": nn.Sequential( - nn.Linear(1, 1, bias=False), - MultInverse(), - ), - "ln": nn.Sequential( - NatLogarithm(), - ), - "linear_ln": nn.Sequential( - nn.Linear(1, 1, bias=False), - NatLogarithm(), - ), - "softplus": nn.Sequential( - Softplus(), - ), - "linear_softplus": nn.Sequential( - nn.Linear(1, 1, bias=False), - Softplus(), - ), - "softminus": nn.Sequential( - Softminus(), - ), - "linear_softminus": nn.Sequential( - nn.Linear(1, 1, bias=False), - Softminus(), - ), -} + + +def operation_factory(name): + + if name == "none": + return Zero(1) + elif name == "add": + return nn.Sequential(Identity()) + elif name == "subtract": + return nn.Sequential(NegIdentity()) + elif name == "mult": + return nn.Sequential( + nn.Linear(1, 1, bias=False), + ) + elif name == "linear": + return nn.Sequential(nn.Linear(1, 1, bias=True)) + elif name == "relu": + return nn.Sequential( + nn.ReLU(inplace=False), + ) + elif name == "linear_relu": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + nn.ReLU(inplace=False), + ) + elif name == "logistic": + return nn.Sequential( + nn.Sigmoid(), + ) + elif name == "linear_logistic": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + nn.Sigmoid(), + ) + elif name == "exp": + return nn.Sequential( + Exponential(), + ) + elif name == "linear_exp": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + Exponential(), + ) + elif name == "cos": + return nn.Sequential( + Cosine(), + ) + elif name == "linear_cos": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + Cosine(), + ) + elif name == "sin": + return nn.Sequential( + Sine(), + ) + elif name == "linear_sin": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + Sine(), + ) + elif name == "tanh": + return nn.Sequential( + Tangens_Hyperbolicus(), + ) + elif name == "linear_tanh": + return nn.Sequential( + nn.Linear(1, 1, bias=True), + Tangens_Hyperbolicus(), + ) + elif name == "reciprocal": + return nn.Sequential( + MultInverse(), + ) + elif name == "linear_reciprocal": + return nn.Sequential( + nn.Linear(1, 1, bias=False), + MultInverse(), + ) + elif name == "ln": + return nn.Sequential( + NatLogarithm(), + ) + elif name == "linear_ln": + return nn.Sequential( + nn.Linear(1, 1, bias=False), + NatLogarithm(), + ) + elif name == "softplus": + return nn.Sequential( + Softplus(), + ) + elif name == "linear_softplus": + return nn.Sequential( + nn.Linear(1, 1, bias=False), + Softplus(), + ) + elif name == "softminus": + return nn.Sequential( + Softminus(), + ) + elif name == "linear_softminus": + return nn.Sequential( + nn.Linear(1, 1, bias=False), + Softminus(), + ) + else: + raise NotImplementedError(f"operation {name=} it not implemented") + # this is the list of primitives actually used, # and it should be a set of names contained in the OPS dictionary @@ -632,4 +662,4 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # make sure that every primitive is in the OPS dictionary for name in PRIMITIVES: - assert name in OPS + assert operation_factory(name) is not None diff --git a/autora/theorist/darts/plot_utils.py b/autora/theorist/darts/plot_utils.py index 6524393a6..d256bf412 100755 --- a/autora/theorist/darts/plot_utils.py +++ b/autora/theorist/darts/plot_utils.py @@ -1,5 +1,6 @@ import os import typing +from typing import Optional import imageio import matplotlib @@ -33,9 +34,9 @@ def generate_darts_summary_figures( y_limit: typing.List[float], best_model_name: str, figure_size: typing.Tuple[int, int], - y_reference: typing.List[float] = None, + y_reference: Optional[typing.List[float]] = None, y_reference_label: str = "", - arch_samp_filter: str = None, + arch_samp_filter: Optional[str] = None, ): """ Generates a summary figure for a given DARTS study. @@ -118,22 +119,22 @@ def plot_darts_summary( y_label: str = "", x1_label: str = "", x2_label: str = "", - y_sem_name: str = None, + y_sem_name: Optional[str] = None, metric: str = "min", - y_reference: typing.List[float] = None, + y_reference: Optional[typing.List[float]] = None, y_reference_label: str = "", - figure_dimensions: typing.Tuple[int, int] = None, + figure_dimensions: Optional[typing.Tuple[int, int]] = None, title: str = "", legend_loc: int = 0, legend_font_size: int = 8, axis_font_size: int = 10, title_font_size: int = 10, show_legend: bool = True, - y_limit: typing.List[float] = None, - x_limit: typing.List[float] = None, - theorist_filter: str = None, - arch_samp_filter: str = None, - best_model_name: str = None, + y_limit: Optional[typing.List[float]] = None, + x_limit: Optional[typing.List[float]] = None, + theorist_filter: Optional[str] = None, + arch_samp_filter: Optional[str] = None, + best_model_name: Optional[str] = None, save: bool = False, figure_name: str = "figure", ): @@ -996,18 +997,18 @@ def __init__( def update( self, - train_error: np.array = None, - valid_error: np.array = None, - weights: np.array = None, - BIC: np.array = None, - AIC: np.array = None, - model_graph: str = None, - range_input1: np.array = None, - range_input2: np.array = None, - range_target: np.array = None, - range_prediction: np.array = None, - target: np.array = None, - prediction: np.array = None, + train_error: Optional[np.array] = None, + valid_error: Optional[np.array] = None, + weights: Optional[np.array] = None, + BIC: Optional[np.array] = None, + AIC: Optional[np.array] = None, + model_graph: Optional[str] = None, + range_input1: Optional[np.array] = None, + range_input2: Optional[np.array] = None, + range_target: Optional[np.array] = None, + range_prediction: Optional[np.array] = None, + target: Optional[np.array] = None, + prediction: Optional[np.array] = None, ): """ Update the debug plot with new data. diff --git a/autora/theorist/darts/utils.py b/autora/theorist/darts/utils.py index 7fa8b467f..79d6affbc 100755 --- a/autora/theorist/darts/utils.py +++ b/autora/theorist/darts/utils.py @@ -2,7 +2,7 @@ import glob import os import shutil -from typing import Callable, List, Tuple +from typing import Callable, List, Optional, Tuple import numpy as np import torch @@ -14,11 +14,11 @@ def create_output_file_name( file_prefix: str, - log_version: int = None, - weight_decay: float = None, - k: int = None, - seed: int = None, - theorist: str = None, + log_version: Optional[int] = None, + weight_decay: Optional[float] = None, + k: Optional[int] = None, + seed: Optional[int] = None, + theorist: Optional[str] = None, ) -> str: """ Creates a file name for the output file of a theorist study. @@ -278,7 +278,7 @@ def count_parameters_in_MB(model: Network) -> int: ) -def save(model: torch.nn.Module, model_path: str, exp_folder: str = None): +def save(model: torch.nn.Module, model_path: str, exp_folder: Optional[str] = None): """ Saves a model to a file. @@ -303,9 +303,9 @@ def load(model: torch.nn.Module, model_path: str): def create_exp_dir( path: str, - scripts_to_save: List = None, + scripts_to_save: Optional[List] = None, parent_folder: str = "exps", - results_folder: str = None, + results_folder: Optional[str] = None, ): """ Creates an experiment directory and saves all necessary scripts and files. diff --git a/autora/theorist/darts/visualize.py b/autora/theorist/darts/visualize.py index ac6648211..bf3055332 100755 --- a/autora/theorist/darts/visualize.py +++ b/autora/theorist/darts/visualize.py @@ -1,5 +1,6 @@ import logging import typing +from typing import Optional from graphviz import Digraph @@ -12,12 +13,12 @@ def plot( genotype: Genotype, filename: str, file_format: str = "pdf", - view_file: bool = None, + view_file: Optional[bool] = None, full_label: bool = False, param_list: typing.Tuple = (), input_labels: typing.Tuple = (), - out_dim: int = None, - out_fnc: str = None, + out_dim: Optional[int] = None, + out_fnc: Optional[str] = None, ): """ Generates a graphviz plot for a DARTS model based on the genotype of the model. @@ -58,8 +59,8 @@ def darts_model_plot( full_label: bool = False, param_list: typing.Sequence = (), input_labels: typing.Sequence = (), - out_dim: int = None, - out_fnc: str = None, + out_dim: Optional[int] = None, + out_fnc: Optional[str] = None, decimals_to_display: int = 2, ) -> Digraph: """ diff --git a/example/pipeline/Experimentalist Pipeline Examples.ipynb b/example/pipeline/Experimentalist Pipeline Examples.ipynb index 53ab8718f..1f28a8c32 100644 --- a/example/pipeline/Experimentalist Pipeline Examples.ipynb +++ b/example/pipeline/Experimentalist Pipeline Examples.ipynb @@ -274,4 +274,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/tests/test_model_disagreement_sampler.py b/tests/test_model_disagreement_sampler.py new file mode 100644 index 000000000..ed987681e --- /dev/null +++ b/tests/test_model_disagreement_sampler.py @@ -0,0 +1,131 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures + +from autora.experimentalist.sampler.model_disagreement import model_disagreement_sampler + + +def get_classification_data(n: int = 100): + x1 = np.linspace(0, 1, n) + x2 = np.linspace(0, 1, n) + + # cross product of x1 and x2 + X = np.array([(x1[i], x2[j]) for i in range(len(x1)) for j in range(len(x2))]) + + # create a vector of 0s and 1s which is 0 whenever x1 < 0.5 and x2 < 0.5 and 1 otherwise + y_A = np.zeros(n * n) + y_B = np.zeros(n * n) + y_A[(X[:, 0] >= 0.5) | (X[:, 1] >= 0.5)] = 1 + y_B[(X[:, 0] >= 0.5)] = 1 + + return X, y_A, y_B + + +def get_polynomial_data(n: int = 100): + x = np.linspace(-1, 1, 100) + y = x**2 + return x, y + + +@pytest.fixture +def synthetic_lr_models(): + """ + Creates two logistic regression classifier for 2 classes based on synthetic data. + Each classifier is trained on a different data set and thus should yield different predictions. + """ + X, y_A, y_B = get_classification_data() + model_A = LogisticRegression() + model_B = LogisticRegression() + model_A.fit(X, y_A) + model_B.fit(X, y_B) + + models = [model_A, model_B] + return models + + +@pytest.fixture +def synthetic_linr_model(): + """ + Creates linear regression based on synthetic data. + """ + x, y = get_polynomial_data() + model = LinearRegression() + model.fit(x.reshape(-1, 1), y) + return model + + +@pytest.fixture +def synthetic_poly_model(): + """ + Creates polynomial regression based on synthetic data. + """ + x, y = get_polynomial_data() + + # define the steps in the pipeline + steps = [ + ( + "poly", + PolynomialFeatures(degree=3), + ), # transform input data into polynomial features + ("lr", LinearRegression()), # fit a linear regression model + ] + # create the pipeline + model = Pipeline(steps) + model.fit(x.reshape(-1, 1), y) + return model + + +@pytest.fixture +def classification_data_to_test(n=10): + x1 = np.linspace(0, 1, n) + x2 = np.linspace(0, 1, n) + + # cross product of x1 and x2 + X = np.array([(x1[i], x2[j]) for i in range(len(x1)) for j in range(len(x2))]) + return X + + +@pytest.fixture +def regression_data_to_test(n=100): + data = np.linspace(-2, 2, n) + return data + + +def test_model_disagreement_classification( + synthetic_lr_models, classification_data_to_test +): + + num_requested_samples = 10 + + # Import model and data + X = classification_data_to_test + models = synthetic_lr_models + + # Run model disagreement sampler + samples = model_disagreement_sampler(X, models, num_requested_samples) + + assert samples.shape[0] == num_requested_samples + assert samples[0, 0] < 0.25 and samples[0, 1] > 0.75 + assert samples[1, 0] < 0.25 and samples[1, 1] > 0.75 + + +def test_model_disagreement_regression( + synthetic_linr_model, synthetic_poly_model, regression_data_to_test +): + + num_requested_samples = 2 + + # Import model and data + X = regression_data_to_test + model_A = synthetic_linr_model + model_B = synthetic_poly_model + models = [model_A, model_B] + + # Run model disagreement sampler + samples = model_disagreement_sampler(X, models, num_requested_samples) + + assert len(samples) == num_requested_samples + assert samples[0] == 2.0 or samples[0] == -2.0 + assert samples[1] == 2.0 or samples[1] == -2.0 diff --git a/tests/test_poppernet_sampler.py b/tests/test_poppernet_sampler.py new file mode 100644 index 000000000..7f5c0a5bd --- /dev/null +++ b/tests/test_poppernet_sampler.py @@ -0,0 +1,184 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from autora.experimentalist.pipeline import Pipeline +from autora.experimentalist.sampler.poppernet import ( + nearest_values_sampler, + poppernet_pooler, +) +from autora.variable import DV, IV, ValueType, VariableCollection + + +def get_xor_data(n: int = 3): + X = ([[1, 0]] * n) + ([[0, 1]] * n) + ([[0, 0]] * n) + ([[1, 1]]) + y = ([0] * n) + ([0] * n) + ([1] * n) + ([1]) + return X, y + + +def get_sin_data(n: int = 100): + x = np.linspace(0, 2 * np.pi, 100) + y = np.sin(x) + return x, y + + +@pytest.fixture +def synthetic_logr_model(): + """ + Creates logistic regression classifier for 3 classes based on synthetic data. + """ + X, y = get_xor_data() + model = LogisticRegression() + model.fit(X, y) + return model + + +@pytest.fixture +def synthetic_linr_model(): + """ + Creates linear regression based on synthetic data. + """ + x, y = get_sin_data() + model = LinearRegression() + model.fit(x.reshape(-1, 1), y) + return model + + +@pytest.fixture +def classification_data_to_test(): + data = np.array( + [ + [1, 0], + [0, 1], + [0, 0], + [1, 1], + ] + ) + return data + + +@pytest.fixture +def regression_data_to_test(): + data = [-10, 0, 1.5, 3, 4.5, 6, 10] + return data + + +def test_poppernet_classification(synthetic_logr_model, classification_data_to_test): + + # Import model and data + X_train, Y_train = get_xor_data() + X = classification_data_to_test + model = synthetic_logr_model + + # Specify independent variables + iv1 = IV( + name="x", + value_range=(0, 5), + units="intensity", + variable_label="stimulus 1", + ) + + # specify dependent variables + dv1 = DV( + name="y", + value_range=(0, 1), + units="class", + variable_label="class", + type=ValueType.CLASS, + ) + + # Variable collection with ivs and dvs + metadata = VariableCollection( + independent_variables=[iv1, iv1], + dependent_variables=[dv1], + ) + + # Run popper net sampler + poppernet_pipeline = Pipeline( + [("pool", poppernet_pooler), ("sampler", nearest_values_sampler)], + params={ + "pool": dict( + model=model, + x_train=X_train, + y_train=Y_train, + metadata=metadata, + num_samples=2, + training_epochs=1000, + optimization_epochs=1000, + training_lr=1e-3, + optimization_lr=1e-3, + mse_scale=1, + limit_offset=10**-10, + limit_repulsion=0, + ), + "sampler": {"allowed_values": X}, + }, + ) + + samples = poppernet_pipeline.run() + + print(samples) + # Check that at least one of the resulting samples is the one that is + # underrepresented in the data used for model training + + assert (samples[0, :] == [1, 1]).all or (samples[1, :] == [1, 1]).all + + +def test_poppernet_regression(synthetic_linr_model, regression_data_to_test): + + # Import model and data + X_train, Y_train = get_sin_data() + X = regression_data_to_test + model = synthetic_linr_model + + # specify meta data + + # Specify independent variables + iv = IV( + name="x", + value_range=(0, 2 * np.pi), + units="intensity", + variable_label="stimulus", + ) + + # specify dependent variables + dv = DV( + name="y", + value_range=(-1, 1), + units="real", + variable_label="response", + type=ValueType.REAL, + ) + + # Variable collection with ivs and dvs + metadata = VariableCollection( + independent_variables=[iv], + dependent_variables=[dv], + ) + + poppernet_pipeline = Pipeline( + [("pool", poppernet_pooler), ("sampler", nearest_values_sampler)], + params={ + "pool": dict( + model=model, + x_train=X_train, + y_train=Y_train, + metadata=metadata, + num_samples=5, + training_epochs=1000, + optimization_epochs=1000, + training_lr=1e-3, + optimization_lr=1e-3, + mse_scale=1, + limit_offset=10**-10, + limit_repulsion=0, + ), + "sampler": {"allowed_values": X}, + }, + ) + + sample = poppernet_pipeline.run() + + # the first value should be close to one of the local maxima of the + # sine function + assert sample[0] == 1.5 or sample[0] == 4.5 diff --git a/tests/test_sklearn_darts.py b/tests/test_sklearn_darts.py index 86e912ded..03ac817c7 100644 --- a/tests/test_sklearn_darts.py +++ b/tests/test_sklearn_darts.py @@ -129,8 +129,10 @@ def test_primitive_selection(): DARTSRegressor(primitives=["add", "subtract", "none"], **kwargs).fit(X, y) DARTSRegressor(primitives=PRIMITIVES, **kwargs).fit(X, y) - with pytest.raises(KeyError): - KeyError, DARTSRegressor(primitives=["doesnt_exist"], **kwargs).fit(X, y) + with pytest.raises(NotImplementedError): + NotImplementedError, DARTSRegressor(primitives=["doesnt_exist"], **kwargs).fit( + X, y + ) def test_fit_with_fixed_architecture():