Skip to content

Commit

Permalink
kernels working on a given set of features (#476)
Browse files Browse the repository at this point in the history
* kernels working on a given set of features

* pre-commit

* test map singletaskgp with additive kernel

* test active_dims of mapped kernels

* add features_to_idx_mapper to outlier detection tutorial

* correctly handling categorical mol features

* validating mol features transforms

* verifying proper type

* custom hamming kernel enabling single task gp on categorical features

* removed unnecessary parameter from data model

* testing equivalence of mixed gp and single gp with custom kernel

* (temporary) running on all py versions

* (temporary) debug github actions by printing

* more printing

* Revert "testing equivalence of mixed gp and single gp with custom kernel"

This reverts commit 4a2a547.

* Revert "removed unnecessary parameter from data model"

This reverts commit 6ad1dfd.

* Revert "custom hamming kernel enabling single task gp on categorical features"

This reverts commit 17d8350.

* Revert "Revert "custom hamming kernel enabling single task gp on categorical features""

This reverts commit 2e29852.

* Revert "Revert "testing equivalence of mixed gp and single gp with custom kernel""

This reverts commit 1cd2776.

* removed test debug and restored to latest implemented features

* pinning compatible version of formulaic

* pinning compatible version of formulaic

* removed old code

* lint

* removed scratch file

* removed old code again

* silencing pyright false positive

* compatibility with py39

* pin compatible version of formulaic

* restored old code

* pinning sklearn

* pinning sklearn

* pinning scikit everywhere

* not testing for prediction quality

* matching lengthscale constraints in hamming kernel

* removed equivalence test

* testing hamming kernel

* added test for mol features in single task gp

* categorical onehot kernel uses the right lengthscale for multiple features

* removed redundant check

* more descriptive name for base kernel

* updated docstring

* improved tests and comments

---------

Co-authored-by: Robert Lee <84771576+R-M-Lee@users.noreply.github.com>
  • Loading branch information
e-dorigatti and R-M-Lee authored Jan 16, 2025
1 parent ecfc313 commit 7912431
Show file tree
Hide file tree
Showing 26 changed files with 667 additions and 29 deletions.
1 change: 1 addition & 0 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ def _validate_transform_specs(
raise ValueError(
f"Forbidden transform type for feature with key {key}",
)

return specs

def get_bounds(
Expand Down
8 changes: 4 additions & 4 deletions bofire/data_models/kernels/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from bofire.data_models.kernels.categorical import HammingDistanceKernel
from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import AggregationKernel
from bofire.data_models.kernels.molecular import TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel
from bofire.data_models.priors.api import AnyGeneralPrior


class AdditiveKernel(Kernel):
class AdditiveKernel(AggregationKernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"
kernels: Sequence[
Union[
Expand All @@ -26,7 +26,7 @@ class AdditiveKernel(Kernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"


class MultiplicativeKernel(Kernel):
class MultiplicativeKernel(AggregationKernel):
type: Literal["MultiplicativeKernel"] = "MultiplicativeKernel"
kernels: Sequence[
Union[
Expand All @@ -42,7 +42,7 @@ class MultiplicativeKernel(Kernel):
]


class ScaleKernel(Kernel):
class ScaleKernel(AggregationKernel):
type: Literal["ScaleKernel"] = "ScaleKernel"
base_kernel: Union[
RBFKernel,
Expand Down
15 changes: 13 additions & 2 deletions bofire/data_models/kernels/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,23 @@
PolynomialKernel,
RBFKernel,
)
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import (
AggregationKernel,
FeatureSpecificKernel,
Kernel,
)
from bofire.data_models.kernels.molecular import MolecularKernel, TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel


AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel]
AbstractKernel = Union[
Kernel,
CategoricalKernel,
ContinuousKernel,
MolecularKernel,
FeatureSpecificKernel,
AggregationKernel,
]

AnyContinuousKernel = Union[
MaternKernel,
Expand Down
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel


class CategoricalKernel(Kernel):
class CategoricalKernel(FeatureSpecificKernel):
pass


Expand Down
9 changes: 5 additions & 4 deletions bofire/data_models/kernels/continuous.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Literal, Optional
from typing import List, Literal, Optional

from pydantic import PositiveInt, field_validator

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel
from bofire.data_models.priors.api import AnyGeneralPrior, AnyPrior


class ContinuousKernel(Kernel):
class ContinuousKernel(FeatureSpecificKernel):
pass


Expand Down Expand Up @@ -40,6 +40,7 @@ class PolynomialKernel(ContinuousKernel):
power: int = 2


class InfiniteWidthBNNKernel(Kernel):
class InfiniteWidthBNNKernel(ContinuousKernel):
features: Optional[List[str]] = None
type: Literal["InfiniteWidthBNNKernel"] = "InfiniteWidthBNNKernel"
depth: PositiveInt = 3
10 changes: 10 additions & 0 deletions bofire/data_models/kernels/kernel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from typing import List, Optional

from bofire.data_models.base import BaseModel


class Kernel(BaseModel):
type: str


class AggregationKernel(Kernel):
pass


class FeatureSpecificKernel(Kernel):
features: Optional[List[str]] = None
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/molecular.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel


class MolecularKernel(Kernel):
class MolecularKernel(FeatureSpecificKernel):
pass


Expand Down
70 changes: 70 additions & 0 deletions bofire/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from typing import Dict

import torch
from botorch.models.transforms.input import OneHotToNumeric
from gpytorch.kernels.kernel import Kernel
from torch import Tensor


class HammingKernelWithOneHots(Kernel):
r"""
A Kernel for one-hot enocded categorical features. The inputs
may contain more than one categorical feature.

This kernel mimics the functionality of CategoricalKernel from
botorch, but assumes categorical features encoded as one-hot variables.
Computes `exp(-dist(x1, x2) / lengthscale)`, where
`dist(x1, x2)` is zero if `x1` and `x2` correspond to the
same category, and one otherwise. If the last dimension
is not a batch dimension, then the mean is considered.

Note: This kernel is NOT differentiable w.r.t. the inputs.
"""

has_lengthscale = True

def __init__(self, categorical_features: Dict[int, int], *args, **kwargs):
"""
Initialize.

Args:
categorical_features: A dictionary mapping the starting index of each
categorical feature to its cardinality. This assumes that categoricals
are one-hot encoded.
*args, **kwargs: Passed to gpytorch.kernels.kernel.Kernel.__init__
"""
super().__init__(*args, **kwargs)

onehot_dim = sum(categorical_features.values())
self.trx = OneHotToNumeric(
onehot_dim, categorical_features=categorical_features
)

def forward(
self,
x1: Tensor,
x2: Tensor,
diag: bool = False,
last_dim_is_batch: bool = False,
**params,
) -> Tensor:
x1 = self.trx(x1)
x2 = self.trx(x2)

delta = x1.unsqueeze(-2) != x2.unsqueeze(-3)
if self.ard_num_dims is not None:
# botorch forces ard_num_dims to be the same as the total size of the of one-hot encoded features
# however here we just need one length scale per categorical feature
ls = self.lengthscale[..., : delta.shape[-1]]
else:
ls = self.lengthscale

dists = delta / ls.unsqueeze(-2)
if last_dim_is_batch:
dists = dists.transpose(-3, -1)
else:
dists = dists.mean(-1)
res = torch.exp(-dists)
if diag:
res = torch.diagonal(res, dim1=-1, dim2=-2)
return res
Loading

0 comments on commit 7912431

Please sign in to comment.