diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 41f09c72c..39ccaab09 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/ambv/black
-    rev: 22.8.0
+    rev: 22.10.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/isort
@@ -10,6 +10,7 @@ repos:
         args:
         - "--profile=black"
         - "--filter-files"
+        - "--project=autora"
   - repo: https://github.com/pycqa/flake8
     rev: 5.0.4
     hooks:
@@ -19,7 +20,7 @@ repos:
         - "--extend-ignore=E203"
         - "--per-file-ignores=__init__.py:F401"
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v0.971"
+    rev: "v0.991"
     hooks:
       - id: mypy
         additional_dependencies: [types-requests]
diff --git a/autora/experimentalist/filter.py b/autora/experimentalist/filter.py
index ff2bc6873..58a9139cd 100644
--- a/autora/experimentalist/filter.py
+++ b/autora/experimentalist/filter.py
@@ -1,2 +1,128 @@
+from enum import Enum
+from typing import Callable, Iterable, Tuple
+
+import numpy as np
+
+
 def weber_filter(values):
     return filter(lambda s: s[0] <= s[1], values)
+
+
+def train_test_filter(
+    seed: int = 180, train_p: float = 0.5
+) -> Tuple[Callable[[Iterable], Iterable], Callable[[Iterable], Iterable]]:
+    """
+    A pipeline filter which pseudorandomly assigns values from the input into "train" or "test"
+    groups. This is particularly useful when working with streams of data of potentially
+    unbounded length.
+
+    This isn't a great method for small datasets, as it doesn't guarantee producing training
+    and test sets which are as close as possible to the specified desired proportions.
+    Consider using the scikit-learn `train_test_split` for cases where it's practical to
+    enumerate the full dataset in advance.
+
+    Args:
+        seed: random number generator seeding value
+        train_p: proportion of data which go into the training set. A float between 0 and 1.
+
+    Returns:
+        a tuple of callables `(train_filter, test_filter)` which split the input data
+            into two complementary streams.
+
+
+    Examples:
+        We can create complementary train and test filters using the function:
+        >>> train_filter, test_filter = train_test_filter(train_p=0.6, seed=180)
+
+        The `train_filter` generates a sequence of ~60% of the input list –
+        in this case, 15 of 20 datapoints.
+        Note that the correct split would be 12 of 20 data points.
+        Again, for data with bounded length it is advisable
+        to use scikit-learn `train_test_split` instead.
+        >>> list(train_filter(range(20)))
+        [0, 2, 3, 4, 5, 6, 9, 10, 11, 12, 15, 16, 17, 18, 19]
+
+        When we run the `test_filter`, it fills in the gaps, giving us the remaining 5 values:
+        >>> list(test_filter(range(20)))
+        [1, 7, 8, 13, 14]
+
+        We can continue to generate new values for as long as we like using the same filter and the
+        continuation of the input range:
+        >>> list(train_filter(range(20, 40)))
+        [20, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39]
+
+        ... and some more.
+        >>> list(train_filter(range(40, 50)))
+        [41, 42, 44, 45, 46, 49]
+
+        As the number of samples grows, the fraction in the train and test sets
+        will approach `train_p` and `1 - train_p`.
+
+        The test_filter fills in the gaps again.
+        >>> list(test_filter(range(20, 30)))
+        [21, 24, 25, 26]
+
+        If you rerun the *same* test_filter on a fresh range, then the results will be different
+        to the first time around:
+        >>> list(test_filter(range(20)))
+        [5, 10, 13, 17, 18]
+
+        ... but if you regenerate the test_filter, it'll reproduce the original sequence
+        >>> _, test_filter_regenerated = train_test_filter(train_p=0.6, seed=180)
+        >>> list(test_filter_regenerated(range(20)))
+        [1, 7, 8, 13, 14]
+
+        It also works on tuple-valued lists:
+        >>> from itertools import product
+        >>> train_filter_tuple, test_filter_tuple = train_test_filter(train_p=0.3, seed=42)
+        >>> list(test_filter_tuple(product(["a", "b"], [1, 2, 3])))
+        [('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 3)]
+
+        >>> list(train_filter_tuple(product(["a","b"], [1,2,3])))
+        [('b', 2)]
+
+        >>> from itertools import count, takewhile
+        >>> train_filter_unbounded, test_filter_unbounded = train_test_filter(train_p=0.5, seed=21)
+
+        >>> list(takewhile(lambda s: s < 90, count(79)))
+        [79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]
+
+        >>> train_pool = train_filter_unbounded(count(79))
+        >>> list(takewhile(lambda s: s < 90, train_pool))
+        [82, 85, 86, 89]
+
+        >>> test_pool = test_filter_unbounded(count(79))
+        >>> list(takewhile(lambda s: s < 90, test_pool))
+        [79, 80, 81, 83, 84, 87, 88]
+
+        >>> list(takewhile(lambda s: s < 110, test_pool))
+        [91, 93, 94, 97, 100, 105, 106, 109]
+
+    """
+
+    test_p = 1 - train_p
+
+    _TrainTest = Enum("_TrainTest", ["train", "test"])
+
+    def train_test_stream():
+        """Generates a pseudorandom stream of _TrainTest.train and _TrainTest.test."""
+        rng = np.random.default_rng(seed)
+        while True:
+            yield rng.choice([_TrainTest.train, _TrainTest.test], p=(train_p, test_p))
+
+    def _factory(allow):
+        """Factory to make complementary generators which split their input
+        corresponding to the values of the pseudorandom train_test_stream."""
+        _stream = train_test_stream()
+
+        def _generator(values):
+            """Generator which yields items from the `values` depending on
+            whether the corresponding item from the `_stream`
+            matches the `allow` parameter."""
+            for v, train_test in zip(values, _stream):
+                if train_test == allow:
+                    yield v
+
+        return _generator
+
+    return _factory(_TrainTest.train), _factory(_TrainTest.test)
diff --git a/autora/experimentalist/sampler/model_disagreement.py b/autora/experimentalist/sampler/model_disagreement.py
new file mode 100644
index 000000000..20a9b805f
--- /dev/null
+++ b/autora/experimentalist/sampler/model_disagreement.py
@@ -0,0 +1,66 @@
+import itertools
+from typing import Iterable, List
+
+import numpy as np
+
+
+def model_disagreement_sampler(X: np.array, models: List, num_samples: int = 1):
+    """
+    A sampler that returns selected samples for independent variables
+    for which the models disagree the most in terms of their predictions.
+
+    Args:
+        X: pool of IV conditions to evaluate in terms of model disagreement
+        models: List of Scikit-learn (regression or classification) models to compare
+        num_samples: number of samples to select
+
+    Returns: Sampled pool
+    """
+
+    if isinstance(X, Iterable):
+        X = np.array(list(X))
+
+    X_predict = np.array(X)
+    if len(X_predict.shape) == 1:
+        X_predict = X_predict.reshape(-1, 1)
+
+    model_disagreement = list()
+
+    # collect diagreements for each model pair
+    for model_a, model_b in itertools.combinations(models, 2):
+
+        # determine the prediction method
+        if hasattr(model_a, "predict_proba") and hasattr(model_b, "predict_proba"):
+            model_a_predict = model_a.predict_proba
+            model_b_predict = model_b.predict_proba
+        elif hasattr(model_a, "predict") and hasattr(model_b, "predict"):
+            model_a_predict = model_a.predict
+            model_b_predict = model_b.predict
+        else:
+            raise AttributeError(
+                "Models must both have `predict_proba` or `predict` method."
+            )
+
+        # get predictions from both models
+        y_a = model_a_predict(X_predict)
+        y_b = model_b_predict(X_predict)
+
+        assert y_a.shape == y_b.shape, "Models must have same output shape."
+
+        # determine the disagreement between the two models in terms of mean-squared error
+        if len(y_a.shape) == 1:
+            disagreement = (y_a - y_b) ** 2
+        else:
+            disagreement = np.mean((y_a - y_b) ** 2, axis=1)
+
+        model_disagreement.append(disagreement)
+
+    assert len(model_disagreement) >= 1, "No disagreements to compare."
+
+    # sum up all model disagreements
+    summed_disagreement = np.sum(model_disagreement, axis=0)
+
+    # sort the summed disagreements and select the top n
+    idx = (-summed_disagreement).argsort()[:num_samples]
+
+    return X[idx]
diff --git a/autora/experimentalist/sampler/poppernet.py b/autora/experimentalist/sampler/poppernet.py
new file mode 100644
index 000000000..9508deaad
--- /dev/null
+++ b/autora/experimentalist/sampler/poppernet.py
@@ -0,0 +1,340 @@
+from typing import Iterable, Optional, Tuple, cast
+
+import numpy as np
+import torch
+from torch import nn
+from torch.autograd import Variable
+
+from autora.variable import ValueType, VariableCollection
+
+
+def poppernet_pooler(
+    model,
+    x_train: np.ndarray,
+    y_train: np.ndarray,
+    metadata: VariableCollection,
+    num_samples: int = 100,
+    training_epochs: int = 1000,
+    optimization_epochs: int = 1000,
+    training_lr: float = 1e-3,
+    optimization_lr: float = 1e-3,
+    mse_scale: float = 1,
+    limit_offset: float = 10**-10,
+    limit_repulsion: float = 0,
+):
+    """
+    A pooler that generates samples for independent variables with the objective of maximizing the
+    (approximated) loss of the model. The samples are generated by first training a neural network
+    to approximate the loss of a model for all patterns in the training data. Once trained, the
+    network is then inverted to generate samples that maximize the approximated loss of the model.
+
+    Note: If the pooler returns samples that are close to the boundaries of the variable space,
+    then it is advisable to increase the limit_repulsion parameter (e.g., to 0.000001).
+
+    Args:
+        model: Scikit-learn model, could be either a classification or regression model
+        x_train: data that the model was trained on
+        y_train: labels that the model was trained on
+        metadata: Meta-data about the dependent and independent variables
+        num_samples: number of samples to return
+        training_epochs: number of epochs to train the popper network for approximating the
+        error fo the model
+        optimization_epochs: number of epochs to optimize the samples based on the trained
+        popper network
+        training_lr: learning rate for training the popper network
+        optimization_lr: learning rate for optimizing the samples
+        mse_scale: scale factor for the MSE loss
+        limit_offset: a limited offset to prevent the samples from being too close to the value
+        boundaries
+        limit_repulsion: a limited repulsion to prevent the samples from being too close to the
+        allowed value boundaries
+        verbose: print out the prediction of the popper network as well as its training loss
+
+    Returns: Sampled pool
+
+    """
+
+    # format input
+
+    x_train = np.array(x_train)
+    if len(x_train.shape) == 1:
+        x_train = x_train.reshape(-1, 1)
+
+    x = np.empty([num_samples, x_train.shape[1]])
+
+    y_train = np.array(y_train)
+    if len(y_train.shape) == 1:
+        y_train = y_train.reshape(-1, 1)
+
+    if metadata.dependent_variables[0].type == ValueType.CLASS:
+        # find all unique values in y_train
+        num_classes = len(np.unique(y_train))
+        y_train = class_to_onehot(y_train, n_classes=num_classes)
+
+    x_train_tensor = torch.from_numpy(x_train).float()
+
+    # create list of IV limits
+    ivs = metadata.independent_variables
+    iv_limit_list = list()
+    for iv in ivs:
+        if hasattr(iv, "value_range"):
+            value_range = cast(Tuple, iv.value_range)
+            lower_bound = value_range[0]
+            upper_bound = value_range[1]
+            iv_limit_list.append(([lower_bound, upper_bound]))
+
+    # get dimensions of input and output
+    n_input = len(metadata.independent_variables)
+    n_output = len(metadata.dependent_variables)
+
+    # get input pattern for popper net
+    popper_input = Variable(torch.from_numpy(x_train), requires_grad=False).float()
+
+    # get target pattern for popper net
+    model_predict = getattr(model, "predict_proba", None)
+    if callable(model_predict) is False:
+        model_predict = getattr(model, "predict", None)
+
+    if callable(model_predict) is False or model_predict is None:
+        raise Exception("Model must have `predict` or `predict_proba` method.")
+
+    model_prediction = model_predict(x_train)
+
+    criterion = nn.MSELoss()
+    model_loss = (model_prediction - y_train) ** 2 * mse_scale
+    model_loss = np.mean(model_loss, axis=1)
+    model_loss = torch.from_numpy(model_loss).float()
+    popper_target = Variable(model_loss, requires_grad=False)
+
+    # create the network
+    popper_net = PopperNet(n_input, n_output)
+
+    # reformat input in case it is 1D
+    if len(popper_input.shape) == 1:
+        popper_input = popper_input.flatten()
+        popper_input = popper_input.reshape(-1, 1)
+
+    # define the optimizer
+    popper_optimizer = torch.optim.Adam(popper_net.parameters(), lr=training_lr)
+
+    # train the network
+    losses = []
+    for epoch in range(training_epochs):
+        popper_prediction = popper_net(popper_input)
+        loss = criterion(popper_prediction, popper_target.reshape(-1, 1))
+        popper_optimizer.zero_grad()
+        loss.backward()
+        popper_optimizer.step()
+        losses.append(loss.item())
+
+    # now that the popper network is trained we can sample new data points
+    # to sample data points we need to provide the popper network with an initial condition
+    # we will sample those initial conditions proportional to the loss of the current model
+
+    # feed average model losses through softmax
+    # model_loss_avg= torch.from_numpy(np.mean(model_loss.detach().numpy(), axis=1)).float()
+    softmax_func = torch.nn.Softmax(dim=0)
+    probabilities = softmax_func(model_loss)
+    # sample data point in proportion to model loss
+    transform_category = torch.distributions.categorical.Categorical(probabilities)
+
+    popper_net.freeze_weights()
+
+    for condition in range(num_samples):
+
+        index = transform_category.sample()
+        input_sample = torch.flatten(x_train_tensor[index, :])
+        popper_input = Variable(input_sample, requires_grad=True)
+
+        # invert the popper network to determine optimal experiment conditions
+        for optimization_epoch in range(optimization_epochs):
+            # feedforward pass on popper network
+            popper_prediction = popper_net(popper_input)
+            # compute gradient that maximizes output of popper network
+            # (i.e. predicted loss of original model)
+            popper_loss_optim = -popper_prediction
+            popper_loss_optim.backward()
+            # compute new input
+            # with torch.no_grad():
+            #     delta = -optimization_lr * popper_input.grad
+            #     popper_input += -optimization_lr * popper_input.grad
+            #     print(delta)
+            #     popper_input.grad.zero_()
+
+            with torch.no_grad():
+
+                # first add repulsion from variable limits
+                for idx in range(len(input_sample)):
+                    iv_value = input_sample[idx]
+                    iv_limits = iv_limit_list[idx]
+                    dist_to_min = np.abs(iv_value - np.min(iv_limits))
+                    dist_to_max = np.abs(iv_value - np.max(iv_limits))
+                    repulsion_from_min = limit_repulsion / (dist_to_min**2)
+                    repulsion_from_max = limit_repulsion / (dist_to_max**2)
+                    iv_value_repulsed = (
+                        iv_value + repulsion_from_min - repulsion_from_max
+                    )
+                    popper_input[idx] = iv_value_repulsed
+
+                # now add gradient for theory loss maximization
+                delta = -optimization_lr * popper_input.grad
+                popper_input += delta
+                popper_input.grad.zero_()
+
+                # finally, clip input variable from its limits
+                for idx in range(len(input_sample)):
+                    iv_raw_value = input_sample[idx]
+                    iv_limits = iv_limit_list[idx]
+                    iv_clipped_value = np.min(
+                        [iv_raw_value, np.max(iv_limits) - limit_offset]
+                    )
+                    iv_clipped_value = np.max(
+                        [
+                            iv_clipped_value,
+                            np.min(iv_limits) + limit_offset,
+                        ]
+                    )
+                    popper_input[idx] = iv_clipped_value
+
+        # add condition to new experiment sequence
+        for idx in range(len(input_sample)):
+            iv_limits = iv_limit_list[idx]
+
+            # first clip value
+            iv_clipped_value = np.min([iv_raw_value, np.max(iv_limits) - limit_offset])
+            iv_clipped_value = np.max(
+                [iv_clipped_value, np.min(iv_limits) + limit_offset]
+            )
+            # make sure to convert variable to original scale
+            iv_clipped_scaled_value = iv_clipped_value
+
+            x[condition, idx] = iv_clipped_scaled_value
+
+    return x
+
+
+def plot_popper_diagnostics(losses, popper_input, popper_prediction, popper_target):
+    print("Finished training Popper Network...")
+    import matplotlib.pyplot as plt
+
+    if popper_input.shape[1] > 1:
+        plot_input = popper_input[:, 0]
+        plt.scatter(plot_input, popper_target.detach().numpy(), label="target")
+        plt.scatter(plot_input, popper_prediction.detach().numpy(), label="prediction")
+    else:
+        plot_input = popper_input
+        plt.plot(plot_input, popper_target.detach().numpy(), label="target")
+        plt.plot(plot_input, popper_prediction.detach().numpy(), label="prediction")
+    plt.xlabel("x")
+    plt.ylabel("y")
+    plt.legend()
+    plt.show()
+    plt.plot(losses)
+    plt.xlabel("epoch")
+    plt.ylabel("loss")
+    plt.show()
+
+
+def nearest_values_sampler(
+    samples,
+    allowed_values,
+):
+    """
+    A sampler which returns the nearest values between the input samples and the allowed values,
+    without replacement.
+
+    Args:
+        samples: input conditions
+        allowed_samples: allowed conditions to sample from
+
+    Returns:
+        the nearest values from `allowed_samples` to the `samples`
+
+    """
+
+    if isinstance(allowed_values, Iterable):
+        allowed_values = np.array(list(allowed_values))
+
+    if len(allowed_values.shape) == 1:
+        allowed_values = allowed_values.reshape(-1, 1)
+
+    num_samples = samples.shape[0]
+
+    if allowed_values.shape[0] <= num_samples:
+        raise Exception("More samples requested than samples available in the pool x.")
+
+    x_new = np.empty((num_samples, allowed_values.shape[1]))
+
+    # get index of row in x that is closest to each sample
+    for row, sample in enumerate(samples):
+        dist = np.linalg.norm(allowed_values - sample, axis=1)
+        idx = np.argmin(dist)
+        x_new[row, :] = allowed_values[idx, :]
+        allowed_values = np.delete(allowed_values, idx, axis=0)
+
+    return x_new
+
+
+# define the network
+class PopperNet(nn.Module):
+    def __init__(self, n_input: torch.Tensor, n_output: torch.Tensor):
+        # Perform initialization of the pytorch superclass
+        super(PopperNet, self).__init__()
+
+        # Define network layer dimensions
+        D_in, H1, H2, H3, D_out = [n_input, 64, 64, 64, n_output]
+
+        # Define layer types
+        self.linear1 = nn.Linear(D_in, H1)
+        self.linear2 = nn.Linear(H1, H2)
+        self.linear3 = nn.Linear(H2, H3)
+        self.linear4 = nn.Linear(H3, D_out)
+
+    def forward(self, x: torch.Tensor):
+        """
+        This method defines the network layering and activation functions
+        """
+        x = self.linear1(x)  # hidden layer
+        x = torch.tanh(x)  # activation function
+
+        x = self.linear2(x)  # hidden layer
+        x = torch.tanh(x)  # activation function
+
+        x = self.linear3(x)  # hidden layer
+        x = torch.tanh(x)  # activation function
+
+        x = self.linear4(x)  # output layer
+
+        return x
+
+    def freeze_weights(self):
+        for param in self.parameters():
+            param.requires_grad = False
+
+
+def class_to_onehot(y: np.array, n_classes: Optional[int] = None):
+    """Converts a class vector (integers) to binary class matrix.
+
+    E.g. for use with categorical_crossentropy.
+
+    # Arguments
+        y: class vector to be converted into a matrix
+            (integers from 0 to num_classes).
+        n_classes: total number of classes.
+
+    # Returns
+        A binary matrix representation of the input.
+    """
+    y = np.array(y, dtype="int")
+    input_shape = y.shape
+    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
+        input_shape = tuple(input_shape[:-1])
+    y = y.ravel()
+    if not n_classes:
+        n_classes = np.max(y) + 1
+    n = y.shape[0]
+    categorical = np.zeros((n, n_classes))
+    categorical[np.arange(n), y] = 1
+    output_shape = input_shape + (n_classes,)
+    categorical = np.reshape(categorical, output_shape)
+    return categorical
diff --git a/autora/theorist/darts/architect.py b/autora/theorist/darts/architect.py
index 63bc09f01..c7d723e29 100755
--- a/autora/theorist/darts/architect.py
+++ b/autora/theorist/darts/architect.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 import torch
 import torch.nn.functional as F
@@ -150,8 +152,8 @@ def step(
         target_valid: torch.Tensor,
         network_optimizer: torch.optim.Optimizer,
         unrolled: bool,
-        input_train: torch.Tensor = None,
-        target_train: torch.Tensor = None,
+        input_train: Optional[torch.Tensor] = None,
+        target_train: Optional[torch.Tensor] = None,
         eta: float = 1,
     ):
         """
diff --git a/autora/theorist/darts/model_search.py b/autora/theorist/darts/model_search.py
index 490675912..738c39dc2 100755
--- a/autora/theorist/darts/model_search.py
+++ b/autora/theorist/darts/model_search.py
@@ -1,7 +1,7 @@
 import random
 import warnings
 from enum import Enum
-from typing import Callable, List, Literal, Sequence, Tuple
+from typing import Callable, List, Literal, Optional, Sequence, Tuple
 
 import numpy as np
 import torch
@@ -11,11 +11,11 @@
 
 from autora.theorist.darts.fan_out import Fan_Out
 from autora.theorist.darts.operations import (
-    OPS,
     PRIMITIVES,
     Genotype,
     get_operation_label,
     isiterable,
+    operation_factory,
 )
 
 
@@ -58,7 +58,7 @@ def __init__(self, primitives: Sequence[str] = PRIMITIVES):
         # loop through all the 8 primitive operations
         for primitive in primitives:
             # OPS returns an nn module for a given primitive (defines as a string)
-            op = OPS[primitive]
+            op = operation_factory(primitive)
 
             # add the operation
             self._ops.append(op)
@@ -400,7 +400,9 @@ def arch_parameters(self) -> List:
         return self._arch_parameters
 
     # fixes architecture
-    def fix_architecture(self, switch: bool, new_weights: torch.Tensor = None):
+    def fix_architecture(
+        self, switch: bool, new_weights: Optional[torch.Tensor] = None
+    ):
         """
         Freezes or unfreezes the architecture weights.
 
diff --git a/autora/theorist/darts/operations.py b/autora/theorist/darts/operations.py
index 69c9e5fd0..05603b04b 100755
--- a/autora/theorist/darts/operations.py
+++ b/autora/theorist/darts/operations.py
@@ -538,85 +538,115 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 # defines all the operations. affine is turned off for cuda (optimization prposes)
-OPS = {
-    "none": Zero(1),
-    "add": nn.Sequential(Identity()),
-    "subtract": nn.Sequential(NegIdentity()),
-    "mult": nn.Sequential(
-        nn.Linear(1, 1, bias=False),
-    ),
-    "linear": nn.Sequential(nn.Linear(1, 1, bias=True)),
-    "relu": nn.Sequential(
-        nn.ReLU(inplace=False),
-    ),
-    "linear_relu": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        nn.ReLU(inplace=False),
-    ),
-    "logistic": nn.Sequential(
-        nn.Sigmoid(),
-    ),
-    "linear_logistic": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        nn.Sigmoid(),
-    ),
-    "exp": nn.Sequential(
-        Exponential(),
-    ),
-    "linear_exp": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        Exponential(),
-    ),
-    "cos": nn.Sequential(
-        Cosine(),
-    ),
-    "linear_cos": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        Cosine(),
-    ),
-    "sin": nn.Sequential(
-        Sine(),
-    ),
-    "linear_sin": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        Sine(),
-    ),
-    "tanh": nn.Sequential(
-        Tangens_Hyperbolicus(),
-    ),
-    "linear_tanh": nn.Sequential(
-        nn.Linear(1, 1, bias=True),
-        Tangens_Hyperbolicus(),
-    ),
-    "reciprocal": nn.Sequential(
-        MultInverse(),
-    ),
-    "linear_reciprocal": nn.Sequential(
-        nn.Linear(1, 1, bias=False),
-        MultInverse(),
-    ),
-    "ln": nn.Sequential(
-        NatLogarithm(),
-    ),
-    "linear_ln": nn.Sequential(
-        nn.Linear(1, 1, bias=False),
-        NatLogarithm(),
-    ),
-    "softplus": nn.Sequential(
-        Softplus(),
-    ),
-    "linear_softplus": nn.Sequential(
-        nn.Linear(1, 1, bias=False),
-        Softplus(),
-    ),
-    "softminus": nn.Sequential(
-        Softminus(),
-    ),
-    "linear_softminus": nn.Sequential(
-        nn.Linear(1, 1, bias=False),
-        Softminus(),
-    ),
-}
+
+
+def operation_factory(name):
+
+    if name == "none":
+        return Zero(1)
+    elif name == "add":
+        return nn.Sequential(Identity())
+    elif name == "subtract":
+        return nn.Sequential(NegIdentity())
+    elif name == "mult":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=False),
+        )
+    elif name == "linear":
+        return nn.Sequential(nn.Linear(1, 1, bias=True))
+    elif name == "relu":
+        return nn.Sequential(
+            nn.ReLU(inplace=False),
+        )
+    elif name == "linear_relu":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            nn.ReLU(inplace=False),
+        )
+    elif name == "logistic":
+        return nn.Sequential(
+            nn.Sigmoid(),
+        )
+    elif name == "linear_logistic":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            nn.Sigmoid(),
+        )
+    elif name == "exp":
+        return nn.Sequential(
+            Exponential(),
+        )
+    elif name == "linear_exp":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            Exponential(),
+        )
+    elif name == "cos":
+        return nn.Sequential(
+            Cosine(),
+        )
+    elif name == "linear_cos":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            Cosine(),
+        )
+    elif name == "sin":
+        return nn.Sequential(
+            Sine(),
+        )
+    elif name == "linear_sin":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            Sine(),
+        )
+    elif name == "tanh":
+        return nn.Sequential(
+            Tangens_Hyperbolicus(),
+        )
+    elif name == "linear_tanh":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=True),
+            Tangens_Hyperbolicus(),
+        )
+    elif name == "reciprocal":
+        return nn.Sequential(
+            MultInverse(),
+        )
+    elif name == "linear_reciprocal":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=False),
+            MultInverse(),
+        )
+    elif name == "ln":
+        return nn.Sequential(
+            NatLogarithm(),
+        )
+    elif name == "linear_ln":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=False),
+            NatLogarithm(),
+        )
+    elif name == "softplus":
+        return nn.Sequential(
+            Softplus(),
+        )
+    elif name == "linear_softplus":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=False),
+            Softplus(),
+        )
+    elif name == "softminus":
+        return nn.Sequential(
+            Softminus(),
+        )
+    elif name == "linear_softminus":
+        return nn.Sequential(
+            nn.Linear(1, 1, bias=False),
+            Softminus(),
+        )
+    else:
+        raise NotImplementedError(f"operation {name=} it not implemented")
+
 
 # this is the list of primitives actually used,
 # and it should be a set of names contained in the OPS dictionary
@@ -632,4 +662,4 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 # make sure that every primitive is in the OPS dictionary
 for name in PRIMITIVES:
-    assert name in OPS
+    assert operation_factory(name) is not None
diff --git a/autora/theorist/darts/plot_utils.py b/autora/theorist/darts/plot_utils.py
index 6524393a6..d256bf412 100755
--- a/autora/theorist/darts/plot_utils.py
+++ b/autora/theorist/darts/plot_utils.py
@@ -1,5 +1,6 @@
 import os
 import typing
+from typing import Optional
 
 import imageio
 import matplotlib
@@ -33,9 +34,9 @@ def generate_darts_summary_figures(
     y_limit: typing.List[float],
     best_model_name: str,
     figure_size: typing.Tuple[int, int],
-    y_reference: typing.List[float] = None,
+    y_reference: Optional[typing.List[float]] = None,
     y_reference_label: str = "",
-    arch_samp_filter: str = None,
+    arch_samp_filter: Optional[str] = None,
 ):
     """
     Generates a summary figure for a given DARTS study.
@@ -118,22 +119,22 @@ def plot_darts_summary(
     y_label: str = "",
     x1_label: str = "",
     x2_label: str = "",
-    y_sem_name: str = None,
+    y_sem_name: Optional[str] = None,
     metric: str = "min",
-    y_reference: typing.List[float] = None,
+    y_reference: Optional[typing.List[float]] = None,
     y_reference_label: str = "",
-    figure_dimensions: typing.Tuple[int, int] = None,
+    figure_dimensions: Optional[typing.Tuple[int, int]] = None,
     title: str = "",
     legend_loc: int = 0,
     legend_font_size: int = 8,
     axis_font_size: int = 10,
     title_font_size: int = 10,
     show_legend: bool = True,
-    y_limit: typing.List[float] = None,
-    x_limit: typing.List[float] = None,
-    theorist_filter: str = None,
-    arch_samp_filter: str = None,
-    best_model_name: str = None,
+    y_limit: Optional[typing.List[float]] = None,
+    x_limit: Optional[typing.List[float]] = None,
+    theorist_filter: Optional[str] = None,
+    arch_samp_filter: Optional[str] = None,
+    best_model_name: Optional[str] = None,
     save: bool = False,
     figure_name: str = "figure",
 ):
@@ -996,18 +997,18 @@ def __init__(
 
     def update(
         self,
-        train_error: np.array = None,
-        valid_error: np.array = None,
-        weights: np.array = None,
-        BIC: np.array = None,
-        AIC: np.array = None,
-        model_graph: str = None,
-        range_input1: np.array = None,
-        range_input2: np.array = None,
-        range_target: np.array = None,
-        range_prediction: np.array = None,
-        target: np.array = None,
-        prediction: np.array = None,
+        train_error: Optional[np.array] = None,
+        valid_error: Optional[np.array] = None,
+        weights: Optional[np.array] = None,
+        BIC: Optional[np.array] = None,
+        AIC: Optional[np.array] = None,
+        model_graph: Optional[str] = None,
+        range_input1: Optional[np.array] = None,
+        range_input2: Optional[np.array] = None,
+        range_target: Optional[np.array] = None,
+        range_prediction: Optional[np.array] = None,
+        target: Optional[np.array] = None,
+        prediction: Optional[np.array] = None,
     ):
         """
         Update the debug plot with new data.
diff --git a/autora/theorist/darts/utils.py b/autora/theorist/darts/utils.py
index 7fa8b467f..79d6affbc 100755
--- a/autora/theorist/darts/utils.py
+++ b/autora/theorist/darts/utils.py
@@ -2,7 +2,7 @@
 import glob
 import os
 import shutil
-from typing import Callable, List, Tuple
+from typing import Callable, List, Optional, Tuple
 
 import numpy as np
 import torch
@@ -14,11 +14,11 @@
 
 def create_output_file_name(
     file_prefix: str,
-    log_version: int = None,
-    weight_decay: float = None,
-    k: int = None,
-    seed: int = None,
-    theorist: str = None,
+    log_version: Optional[int] = None,
+    weight_decay: Optional[float] = None,
+    k: Optional[int] = None,
+    seed: Optional[int] = None,
+    theorist: Optional[str] = None,
 ) -> str:
     """
     Creates a file name for the output file of a theorist study.
@@ -278,7 +278,7 @@ def count_parameters_in_MB(model: Network) -> int:
     )
 
 
-def save(model: torch.nn.Module, model_path: str, exp_folder: str = None):
+def save(model: torch.nn.Module, model_path: str, exp_folder: Optional[str] = None):
     """
     Saves a model to a file.
 
@@ -303,9 +303,9 @@ def load(model: torch.nn.Module, model_path: str):
 
 def create_exp_dir(
     path: str,
-    scripts_to_save: List = None,
+    scripts_to_save: Optional[List] = None,
     parent_folder: str = "exps",
-    results_folder: str = None,
+    results_folder: Optional[str] = None,
 ):
     """
     Creates an experiment directory and saves all necessary scripts and files.
diff --git a/autora/theorist/darts/visualize.py b/autora/theorist/darts/visualize.py
index ac6648211..bf3055332 100755
--- a/autora/theorist/darts/visualize.py
+++ b/autora/theorist/darts/visualize.py
@@ -1,5 +1,6 @@
 import logging
 import typing
+from typing import Optional
 
 from graphviz import Digraph
 
@@ -12,12 +13,12 @@ def plot(
     genotype: Genotype,
     filename: str,
     file_format: str = "pdf",
-    view_file: bool = None,
+    view_file: Optional[bool] = None,
     full_label: bool = False,
     param_list: typing.Tuple = (),
     input_labels: typing.Tuple = (),
-    out_dim: int = None,
-    out_fnc: str = None,
+    out_dim: Optional[int] = None,
+    out_fnc: Optional[str] = None,
 ):
     """
     Generates a graphviz plot for a DARTS model based on the genotype of the model.
@@ -58,8 +59,8 @@ def darts_model_plot(
     full_label: bool = False,
     param_list: typing.Sequence = (),
     input_labels: typing.Sequence = (),
-    out_dim: int = None,
-    out_fnc: str = None,
+    out_dim: Optional[int] = None,
+    out_fnc: Optional[str] = None,
     decimals_to_display: int = 2,
 ) -> Digraph:
     """
diff --git a/example/pipeline/Experimentalist Pipeline Examples.ipynb b/example/pipeline/Experimentalist Pipeline Examples.ipynb
index 53ab8718f..1f28a8c32 100644
--- a/example/pipeline/Experimentalist Pipeline Examples.ipynb	
+++ b/example/pipeline/Experimentalist Pipeline Examples.ipynb	
@@ -274,4 +274,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/tests/test_model_disagreement_sampler.py b/tests/test_model_disagreement_sampler.py
new file mode 100644
index 000000000..ed987681e
--- /dev/null
+++ b/tests/test_model_disagreement_sampler.py
@@ -0,0 +1,131 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import PolynomialFeatures
+
+from autora.experimentalist.sampler.model_disagreement import model_disagreement_sampler
+
+
+def get_classification_data(n: int = 100):
+    x1 = np.linspace(0, 1, n)
+    x2 = np.linspace(0, 1, n)
+
+    # cross product of x1 and x2
+    X = np.array([(x1[i], x2[j]) for i in range(len(x1)) for j in range(len(x2))])
+
+    # create a vector of 0s and 1s which is 0 whenever x1 < 0.5 and x2 < 0.5 and 1 otherwise
+    y_A = np.zeros(n * n)
+    y_B = np.zeros(n * n)
+    y_A[(X[:, 0] >= 0.5) | (X[:, 1] >= 0.5)] = 1
+    y_B[(X[:, 0] >= 0.5)] = 1
+
+    return X, y_A, y_B
+
+
+def get_polynomial_data(n: int = 100):
+    x = np.linspace(-1, 1, 100)
+    y = x**2
+    return x, y
+
+
+@pytest.fixture
+def synthetic_lr_models():
+    """
+    Creates two logistic regression classifier for 2 classes based on synthetic data.
+    Each classifier is trained on a different data set and thus should yield different predictions.
+    """
+    X, y_A, y_B = get_classification_data()
+    model_A = LogisticRegression()
+    model_B = LogisticRegression()
+    model_A.fit(X, y_A)
+    model_B.fit(X, y_B)
+
+    models = [model_A, model_B]
+    return models
+
+
+@pytest.fixture
+def synthetic_linr_model():
+    """
+    Creates linear regression based on synthetic data.
+    """
+    x, y = get_polynomial_data()
+    model = LinearRegression()
+    model.fit(x.reshape(-1, 1), y)
+    return model
+
+
+@pytest.fixture
+def synthetic_poly_model():
+    """
+    Creates polynomial regression based on synthetic data.
+    """
+    x, y = get_polynomial_data()
+
+    # define the steps in the pipeline
+    steps = [
+        (
+            "poly",
+            PolynomialFeatures(degree=3),
+        ),  # transform input data into polynomial features
+        ("lr", LinearRegression()),  # fit a linear regression model
+    ]
+    # create the pipeline
+    model = Pipeline(steps)
+    model.fit(x.reshape(-1, 1), y)
+    return model
+
+
+@pytest.fixture
+def classification_data_to_test(n=10):
+    x1 = np.linspace(0, 1, n)
+    x2 = np.linspace(0, 1, n)
+
+    # cross product of x1 and x2
+    X = np.array([(x1[i], x2[j]) for i in range(len(x1)) for j in range(len(x2))])
+    return X
+
+
+@pytest.fixture
+def regression_data_to_test(n=100):
+    data = np.linspace(-2, 2, n)
+    return data
+
+
+def test_model_disagreement_classification(
+    synthetic_lr_models, classification_data_to_test
+):
+
+    num_requested_samples = 10
+
+    # Import model and data
+    X = classification_data_to_test
+    models = synthetic_lr_models
+
+    # Run model disagreement sampler
+    samples = model_disagreement_sampler(X, models, num_requested_samples)
+
+    assert samples.shape[0] == num_requested_samples
+    assert samples[0, 0] < 0.25 and samples[0, 1] > 0.75
+    assert samples[1, 0] < 0.25 and samples[1, 1] > 0.75
+
+
+def test_model_disagreement_regression(
+    synthetic_linr_model, synthetic_poly_model, regression_data_to_test
+):
+
+    num_requested_samples = 2
+
+    # Import model and data
+    X = regression_data_to_test
+    model_A = synthetic_linr_model
+    model_B = synthetic_poly_model
+    models = [model_A, model_B]
+
+    # Run model disagreement sampler
+    samples = model_disagreement_sampler(X, models, num_requested_samples)
+
+    assert len(samples) == num_requested_samples
+    assert samples[0] == 2.0 or samples[0] == -2.0
+    assert samples[1] == 2.0 or samples[1] == -2.0
diff --git a/tests/test_poppernet_sampler.py b/tests/test_poppernet_sampler.py
new file mode 100644
index 000000000..7f5c0a5bd
--- /dev/null
+++ b/tests/test_poppernet_sampler.py
@@ -0,0 +1,184 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from autora.experimentalist.pipeline import Pipeline
+from autora.experimentalist.sampler.poppernet import (
+    nearest_values_sampler,
+    poppernet_pooler,
+)
+from autora.variable import DV, IV, ValueType, VariableCollection
+
+
+def get_xor_data(n: int = 3):
+    X = ([[1, 0]] * n) + ([[0, 1]] * n) + ([[0, 0]] * n) + ([[1, 1]])
+    y = ([0] * n) + ([0] * n) + ([1] * n) + ([1])
+    return X, y
+
+
+def get_sin_data(n: int = 100):
+    x = np.linspace(0, 2 * np.pi, 100)
+    y = np.sin(x)
+    return x, y
+
+
+@pytest.fixture
+def synthetic_logr_model():
+    """
+    Creates logistic regression classifier for 3 classes based on synthetic data.
+    """
+    X, y = get_xor_data()
+    model = LogisticRegression()
+    model.fit(X, y)
+    return model
+
+
+@pytest.fixture
+def synthetic_linr_model():
+    """
+    Creates linear regression based on synthetic data.
+    """
+    x, y = get_sin_data()
+    model = LinearRegression()
+    model.fit(x.reshape(-1, 1), y)
+    return model
+
+
+@pytest.fixture
+def classification_data_to_test():
+    data = np.array(
+        [
+            [1, 0],
+            [0, 1],
+            [0, 0],
+            [1, 1],
+        ]
+    )
+    return data
+
+
+@pytest.fixture
+def regression_data_to_test():
+    data = [-10, 0, 1.5, 3, 4.5, 6, 10]
+    return data
+
+
+def test_poppernet_classification(synthetic_logr_model, classification_data_to_test):
+
+    # Import model and data
+    X_train, Y_train = get_xor_data()
+    X = classification_data_to_test
+    model = synthetic_logr_model
+
+    # Specify independent variables
+    iv1 = IV(
+        name="x",
+        value_range=(0, 5),
+        units="intensity",
+        variable_label="stimulus 1",
+    )
+
+    # specify dependent variables
+    dv1 = DV(
+        name="y",
+        value_range=(0, 1),
+        units="class",
+        variable_label="class",
+        type=ValueType.CLASS,
+    )
+
+    # Variable collection with ivs and dvs
+    metadata = VariableCollection(
+        independent_variables=[iv1, iv1],
+        dependent_variables=[dv1],
+    )
+
+    # Run popper net sampler
+    poppernet_pipeline = Pipeline(
+        [("pool", poppernet_pooler), ("sampler", nearest_values_sampler)],
+        params={
+            "pool": dict(
+                model=model,
+                x_train=X_train,
+                y_train=Y_train,
+                metadata=metadata,
+                num_samples=2,
+                training_epochs=1000,
+                optimization_epochs=1000,
+                training_lr=1e-3,
+                optimization_lr=1e-3,
+                mse_scale=1,
+                limit_offset=10**-10,
+                limit_repulsion=0,
+            ),
+            "sampler": {"allowed_values": X},
+        },
+    )
+
+    samples = poppernet_pipeline.run()
+
+    print(samples)
+    # Check that at least one of the resulting samples is the one that is
+    # underrepresented in the data used for model training
+
+    assert (samples[0, :] == [1, 1]).all or (samples[1, :] == [1, 1]).all
+
+
+def test_poppernet_regression(synthetic_linr_model, regression_data_to_test):
+
+    # Import model and data
+    X_train, Y_train = get_sin_data()
+    X = regression_data_to_test
+    model = synthetic_linr_model
+
+    # specify meta data
+
+    # Specify independent variables
+    iv = IV(
+        name="x",
+        value_range=(0, 2 * np.pi),
+        units="intensity",
+        variable_label="stimulus",
+    )
+
+    # specify dependent variables
+    dv = DV(
+        name="y",
+        value_range=(-1, 1),
+        units="real",
+        variable_label="response",
+        type=ValueType.REAL,
+    )
+
+    # Variable collection with ivs and dvs
+    metadata = VariableCollection(
+        independent_variables=[iv],
+        dependent_variables=[dv],
+    )
+
+    poppernet_pipeline = Pipeline(
+        [("pool", poppernet_pooler), ("sampler", nearest_values_sampler)],
+        params={
+            "pool": dict(
+                model=model,
+                x_train=X_train,
+                y_train=Y_train,
+                metadata=metadata,
+                num_samples=5,
+                training_epochs=1000,
+                optimization_epochs=1000,
+                training_lr=1e-3,
+                optimization_lr=1e-3,
+                mse_scale=1,
+                limit_offset=10**-10,
+                limit_repulsion=0,
+            ),
+            "sampler": {"allowed_values": X},
+        },
+    )
+
+    sample = poppernet_pipeline.run()
+
+    # the first value should be close to one of the local maxima of the
+    # sine function
+    assert sample[0] == 1.5 or sample[0] == 4.5
diff --git a/tests/test_sklearn_darts.py b/tests/test_sklearn_darts.py
index 86e912ded..03ac817c7 100644
--- a/tests/test_sklearn_darts.py
+++ b/tests/test_sklearn_darts.py
@@ -129,8 +129,10 @@ def test_primitive_selection():
 
     DARTSRegressor(primitives=["add", "subtract", "none"], **kwargs).fit(X, y)
     DARTSRegressor(primitives=PRIMITIVES, **kwargs).fit(X, y)
-    with pytest.raises(KeyError):
-        KeyError, DARTSRegressor(primitives=["doesnt_exist"], **kwargs).fit(X, y)
+    with pytest.raises(NotImplementedError):
+        NotImplementedError, DARTSRegressor(primitives=["doesnt_exist"], **kwargs).fit(
+            X, y
+        )
 
 
 def test_fit_with_fixed_architecture():