Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernels working on a given set of features #476

Merged
merged 45 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ac7de64
kernels working on a given set of features
e-dorigatti Dec 4, 2024
a806dc0
pre-commit
e-dorigatti Dec 4, 2024
c3f1899
test map singletaskgp with additive kernel
R-M-Lee Dec 4, 2024
86b6ad6
test active_dims of mapped kernels
R-M-Lee Dec 4, 2024
b4458fb
add features_to_idx_mapper to outlier detection tutorial
R-M-Lee Dec 4, 2024
d243ad0
correctly handling categorical mol features
e-dorigatti Dec 4, 2024
842797f
validating mol features transforms
e-dorigatti Dec 4, 2024
22c4382
verifying proper type
e-dorigatti Dec 6, 2024
17d8350
custom hamming kernel enabling single task gp on categorical features
e-dorigatti Dec 19, 2024
6ad1dfd
removed unnecessary parameter from data model
e-dorigatti Dec 19, 2024
4a2a547
testing equivalence of mixed gp and single gp with custom kernel
e-dorigatti Dec 19, 2024
3750827
(temporary) running on all py versions
e-dorigatti Dec 19, 2024
7162983
(temporary) debug github actions by printing
e-dorigatti Dec 19, 2024
01a01e1
more printing
e-dorigatti Dec 19, 2024
1cd2776
Revert "testing equivalence of mixed gp and single gp with custom ker…
e-dorigatti Dec 19, 2024
8400fdb
Revert "removed unnecessary parameter from data model"
e-dorigatti Dec 19, 2024
2e29852
Revert "custom hamming kernel enabling single task gp on categorical …
e-dorigatti Dec 19, 2024
7e455b7
Revert "Revert "custom hamming kernel enabling single task gp on cate…
e-dorigatti Dec 19, 2024
25f947b
Revert "Revert "testing equivalence of mixed gp and single gp with cu…
e-dorigatti Dec 19, 2024
2c145b6
removed test debug and restored to latest implemented features
e-dorigatti Dec 19, 2024
30dd123
pinning compatible version of formulaic
e-dorigatti Dec 19, 2024
065824f
Merge branch 'main' into 474-kernels-on-feature-subsets
e-dorigatti Dec 19, 2024
b53d3bb
pinning compatible version of formulaic
e-dorigatti Dec 19, 2024
8d47cbd
removed old code
e-dorigatti Dec 19, 2024
ce38428
lint
e-dorigatti Dec 19, 2024
16bdc1f
removed scratch file
e-dorigatti Dec 19, 2024
e306d16
removed old code again
e-dorigatti Dec 19, 2024
9d5dfc6
silencing pyright false positive
e-dorigatti Dec 19, 2024
62ba2c2
compatibility with py39
e-dorigatti Dec 19, 2024
d2c1f5d
pin compatible version of formulaic
e-dorigatti Dec 19, 2024
966bf8b
restored old code
e-dorigatti Dec 19, 2024
231f9f6
pinning sklearn
e-dorigatti Dec 19, 2024
6a7c9d7
pinning sklearn
e-dorigatti Dec 19, 2024
6576547
pinning scikit everywhere
e-dorigatti Dec 19, 2024
3e79e31
Merge branch '488-tests-failing-with-formulaic=11' into 474-kernels-o…
e-dorigatti Dec 19, 2024
e70cc16
not testing for prediction quality
e-dorigatti Dec 20, 2024
54b3c7f
matching lengthscale constraints in hamming kernel
e-dorigatti Dec 20, 2024
9b32536
removed equivalence test
e-dorigatti Dec 20, 2024
831a03e
testing hamming kernel
e-dorigatti Dec 20, 2024
561ac20
added test for mol features in single task gp
e-dorigatti Jan 13, 2025
1867e7b
categorical onehot kernel uses the right lengthscale for multiple fea…
e-dorigatti Jan 13, 2025
f30ed6d
removed redundant check
e-dorigatti Jan 13, 2025
7afcd7c
more descriptive name for base kernel
e-dorigatti Jan 13, 2025
d6e2957
updated docstring
e-dorigatti Jan 13, 2025
16d831c
improved tests and comments
e-dorigatti Jan 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ def _validate_transform_specs(
"""
# first check that the keys in the specs dict are correct also correct feature keys
# next check that all values are of type CategoricalEncodingEnum or MolFeatures
checked_keys = set()
for key, value in specs.items():
try:
feat = self.get_by_key(key)
Expand Down Expand Up @@ -622,6 +623,21 @@ def _validate_transform_specs(
raise ValueError(
f"Forbidden transform type for feature with key {key}",
)
checked_keys.add(key)
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved

# now check that features that must be transformed do have a transformation defined
for key in self.get_keys():
if key in checked_keys:
continue

feat = self.get_by_key(key)
if isinstance(feat, MolecularInput):
trx = specs.get(key)
if trx is None or not isinstance(trx, MolFeatures):
raise ValueError(
"MolecularInput features must have a input processing of type MolFeatures defined"
)

return specs

def get_bounds(
Expand Down
8 changes: 4 additions & 4 deletions bofire/data_models/kernels/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from bofire.data_models.kernels.categorical import HammingDistanceKernel
from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import AggregationKernel
from bofire.data_models.kernels.molecular import TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel
from bofire.data_models.priors.api import AnyGeneralPrior


class AdditiveKernel(Kernel):
class AdditiveKernel(AggregationKernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"
kernels: Sequence[
Union[
Expand All @@ -26,7 +26,7 @@ class AdditiveKernel(Kernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"


class MultiplicativeKernel(Kernel):
class MultiplicativeKernel(AggregationKernel):
type: Literal["MultiplicativeKernel"] = "MultiplicativeKernel"
kernels: Sequence[
Union[
Expand All @@ -42,7 +42,7 @@ class MultiplicativeKernel(Kernel):
]


class ScaleKernel(Kernel):
class ScaleKernel(AggregationKernel):
type: Literal["ScaleKernel"] = "ScaleKernel"
base_kernel: Union[
RBFKernel,
Expand Down
11 changes: 9 additions & 2 deletions bofire/data_models/kernels/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,19 @@
PolynomialKernel,
RBFKernel,
)
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import AggregationKernel, ConcreteKernel, Kernel
from bofire.data_models.kernels.molecular import MolecularKernel, TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel


AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel]
AbstractKernel = Union[
Kernel,
CategoricalKernel,
ContinuousKernel,
MolecularKernel,
ConcreteKernel,
AggregationKernel,
]

AnyContinuousKernel = Union[
MaternKernel,
Expand Down
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import ConcreteKernel


class CategoricalKernel(Kernel):
class CategoricalKernel(ConcreteKernel):
pass


Expand Down
9 changes: 5 additions & 4 deletions bofire/data_models/kernels/continuous.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Literal, Optional
from typing import List, Literal, Optional

from pydantic import PositiveInt, field_validator

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import ConcreteKernel
from bofire.data_models.priors.api import AnyGeneralPrior, AnyPrior


class ContinuousKernel(Kernel):
class ContinuousKernel(ConcreteKernel):
pass


Expand Down Expand Up @@ -40,6 +40,7 @@ class PolynomialKernel(ContinuousKernel):
power: int = 2


class InfiniteWidthBNNKernel(Kernel):
class InfiniteWidthBNNKernel(ContinuousKernel):
features: Optional[List[str]] = None
type: Literal["InfiniteWidthBNNKernel"] = "InfiniteWidthBNNKernel"
depth: PositiveInt = 3
10 changes: 10 additions & 0 deletions bofire/data_models/kernels/kernel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from typing import List, Optional

from bofire.data_models.base import BaseModel


class Kernel(BaseModel):
type: str


class AggregationKernel(Kernel):
pass


class ConcreteKernel(Kernel):
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
features: Optional[List[str]] = None
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/molecular.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import ConcreteKernel


class MolecularKernel(Kernel):
class MolecularKernel(ConcreteKernel):
pass


Expand Down
25 changes: 25 additions & 0 deletions bofire/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import torch
from gpytorch.kernels.kernel import Kernel
from torch import Tensor


class HammingKernelWithOneHots(Kernel):
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
has_lengthscale = True

def forward(
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
self,
x1: Tensor,
x2: Tensor,
diag: bool = False,
last_dim_is_batch: bool = False,
) -> Tensor:
delta = (x1.unsqueeze(-2) - x2.unsqueeze(-3)) ** 2
dists = delta / self.lengthscale.unsqueeze(-2)
if last_dim_is_batch:
dists = dists.transpose(-3, -1)

dists = dists.sum(-1) / 2
res = torch.exp(-dists)
if diag:
res = torch.diagonal(res, dim1=-1, dim2=-2)
return res
69 changes: 62 additions & 7 deletions bofire/kernels/mapper.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
from typing import List
from typing import Callable, List, Optional

import gpytorch
import torch
from botorch.models.kernels.categorical import CategoricalKernel
from gpytorch.constraints import GreaterThan
from gpytorch.kernels import Kernel as GpytorchKernel

import bofire.data_models.kernels.api as data_models
import bofire.priors.api as priors
from bofire.kernels.categorical import HammingKernelWithOneHots
from bofire.kernels.fingerprint_kernels.tanimoto_kernel import TanimotoKernel
from bofire.kernels.shape import WassersteinKernel


def _compute_active_dims(
data_model: data_models.ConcreteKernel,
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> List[int]:
if data_model.features:
assert features_to_idx_mapper is not None
active_dims = features_to_idx_mapper(data_model.features)
return active_dims


def map_RBFKernel(
data_model: data_models.RBFKernel,
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.RBFKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return gpytorch.kernels.RBFKernel(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
Expand All @@ -34,7 +49,9 @@ def map_MaternKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.MaternKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return gpytorch.kernels.MaternKernel(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
Expand All @@ -53,6 +70,7 @@ def map_InfiniteWidthBNNKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> "InfiniteWidthBNNKernel": # type: ignore # noqa: F821
try:
from botorch.models.kernels.infinite_width_bnn import ( # type: ignore
Expand All @@ -66,6 +84,7 @@ def map_InfiniteWidthBNNKernel(
"requires python 3.10+.",
)

active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return InfiniteWidthBNNKernel(
batch_shape=batch_shape,
active_dims=tuple(active_dims),
Expand All @@ -78,7 +97,9 @@ def map_LinearKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.LinearKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return gpytorch.kernels.LinearKernel(
batch_shape=batch_shape,
active_dims=active_dims,
Expand All @@ -95,7 +116,9 @@ def map_PolynomialKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.PolynomialKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return gpytorch.kernels.PolynomialKernel(
batch_shape=batch_shape,
active_dims=active_dims,
Expand All @@ -113,6 +136,7 @@ def map_AdditiveKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.AdditiveKernel:
return gpytorch.kernels.AdditiveKernel(
*[ # type: ignore
Expand All @@ -121,6 +145,7 @@ def map_AdditiveKernel(
batch_shape=batch_shape,
ard_num_dims=ard_num_dims,
active_dims=active_dims,
features_to_idx_mapper=features_to_idx_mapper,
)
for k in data_model.kernels
],
Expand All @@ -132,6 +157,7 @@ def map_MultiplicativeKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.ProductKernel:
return gpytorch.kernels.ProductKernel(
*[ # type: ignore
Expand All @@ -140,6 +166,7 @@ def map_MultiplicativeKernel(
batch_shape=batch_shape,
ard_num_dims=ard_num_dims,
active_dims=active_dims,
features_to_idx_mapper=features_to_idx_mapper,
)
for k in data_model.kernels
],
Expand All @@ -151,13 +178,15 @@ def map_ScaleKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> gpytorch.kernels.ScaleKernel:
return gpytorch.kernels.ScaleKernel(
base_kernel=map(
data_model.base_kernel,
batch_shape=batch_shape,
ard_num_dims=ard_num_dims,
active_dims=active_dims,
features_to_idx_mapper=features_to_idx_mapper,
),
outputscale_prior=(
priors.map(data_model.outputscale_prior)
Expand All @@ -172,7 +201,9 @@ def map_TanimotoKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> TanimotoKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)
return TanimotoKernel(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
Expand All @@ -185,19 +216,41 @@ def map_HammingDistanceKernel(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
) -> CategoricalKernel:
return CategoricalKernel(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
active_dims=active_dims, # type: ignore
)
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> GpytorchKernel:
active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper)

with_one_hots = data_model.features is not None and len(active_dims) > 1
if with_one_hots and len(active_dims) == 1:
raise RuntimeError(
"only one feature for categorical kernel operating on one-hot features"
)
elif not with_one_hots and len(active_dims) > 1:
# this is not necessarily an issue since botorch's CategoricalKernel
# can work on multiple features at the same time
pass

if with_one_hots:
return HammingKernelWithOneHots(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
active_dims=active_dims, # type: ignore
lengthscale_constraint=GreaterThan(1e-06),
)
else:
return CategoricalKernel(
batch_shape=batch_shape,
ard_num_dims=len(active_dims) if data_model.ard else None,
active_dims=active_dims, # type: ignore
)


def map_WassersteinKernel(
data_model: data_models.WassersteinKernel,
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> WassersteinKernel:
return WassersteinKernel(
squared=data_model.squared,
Expand Down Expand Up @@ -230,10 +283,12 @@ def map(
batch_shape: torch.Size,
ard_num_dims: int,
active_dims: List[int],
features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]],
) -> GpytorchKernel:
return KERNEL_MAP[data_model.__class__](
data_model,
batch_shape,
ard_num_dims,
active_dims,
features_to_idx_mapper,
)
Loading
Loading