kernels working on a given set of features (#476)

* kernels working on a given set of features * pre-commit * test map singletaskgp with additive kernel * test active_dims of mapped kernels * add features_to_idx_mapper to outlier detection tutorial * correctly handling categorical mol features * validating mol features transforms * verifying proper type * custom hamming kernel enabling single task gp on categorical features * removed unnecessary parameter from data model * testing equivalence of mixed gp and single gp with custom kernel * (temporary) running on all py versions * (temporary) debug github actions by printing * more printing * Revert "testing equivalence of mixed gp and single gp with custom kernel" This reverts commit 4a2a547. * Revert "removed unnecessary parameter from data model" This reverts commit 6ad1dfd. * Revert "custom hamming kernel enabling single task gp on categorical features" This reverts commit 17d8350. * Revert "Revert "custom hamming kernel enabling single task gp on categorical features"" This reverts commit 2e29852. * Revert "Revert "testing equivalence of mixed gp and single gp with custom kernel"" This reverts commit 1cd2776. * removed test debug and restored to latest implemented features * pinning compatible version of formulaic * pinning compatible version of formulaic * removed old code * lint * removed scratch file * removed old code again * silencing pyright false positive * compatibility with py39 * pin compatible version of formulaic * restored old code * pinning sklearn * pinning sklearn * pinning scikit everywhere * not testing for prediction quality * matching lengthscale constraints in hamming kernel * removed equivalence test * testing hamming kernel * added test for mol features in single task gp * categorical onehot kernel uses the right lengthscale for multiple features * removed redundant check * more descriptive name for base kernel * updated docstring * improved tests and comments --------- Co-authored-by: Robert Lee <84771576+R-M-Lee@users.noreply.github.com>
experimental-design · Jan 16, 2025 · 7912431 · 7912431
1 parent ecfc313
commit 7912431
Show file tree

Hide file tree

Showing 26 changed files with 667 additions and 29 deletions.
diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py
@@ -622,6 +622,7 @@ def _validate_transform_specs(
                     raise ValueError(
                         f"Forbidden transform type for feature with key {key}",
                     )
+
         return specs
 
     def get_bounds(

diff --git a/bofire/data_models/kernels/aggregation.py b/bofire/data_models/kernels/aggregation.py
@@ -3,13 +3,13 @@
 
 from bofire.data_models.kernels.categorical import HammingDistanceKernel
 from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel
-from bofire.data_models.kernels.kernel import Kernel
+from bofire.data_models.kernels.kernel import AggregationKernel
 from bofire.data_models.kernels.molecular import TanimotoKernel
 from bofire.data_models.kernels.shape import WassersteinKernel
 from bofire.data_models.priors.api import AnyGeneralPrior
 
 
-class AdditiveKernel(Kernel):
+class AdditiveKernel(AggregationKernel):
     type: Literal["AdditiveKernel"] = "AdditiveKernel"
     kernels: Sequence[
         Union[
@@ -26,7 +26,7 @@ class AdditiveKernel(Kernel):
     type: Literal["AdditiveKernel"] = "AdditiveKernel"
 
 
-class MultiplicativeKernel(Kernel):
+class MultiplicativeKernel(AggregationKernel):
     type: Literal["MultiplicativeKernel"] = "MultiplicativeKernel"
     kernels: Sequence[
         Union[
@@ -42,7 +42,7 @@ class MultiplicativeKernel(Kernel):
     ]
 
 
-class ScaleKernel(Kernel):
+class ScaleKernel(AggregationKernel):
     type: Literal["ScaleKernel"] = "ScaleKernel"
     base_kernel: Union[
         RBFKernel,

diff --git a/bofire/data_models/kernels/api.py b/bofire/data_models/kernels/api.py
@@ -17,12 +17,23 @@
     PolynomialKernel,
     RBFKernel,
 )
-from bofire.data_models.kernels.kernel import Kernel
+from bofire.data_models.kernels.kernel import (
+    AggregationKernel,
+    FeatureSpecificKernel,
+    Kernel,
+)
 from bofire.data_models.kernels.molecular import MolecularKernel, TanimotoKernel
 from bofire.data_models.kernels.shape import WassersteinKernel
 
 
-AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel]
+AbstractKernel = Union[
+    Kernel,
+    CategoricalKernel,
+    ContinuousKernel,
+    MolecularKernel,
+    FeatureSpecificKernel,
+    AggregationKernel,
+]
 
 AnyContinuousKernel = Union[
     MaternKernel,

diff --git a/bofire/data_models/kernels/categorical.py b/bofire/data_models/kernels/categorical.py
@@ -1,9 +1,9 @@
 from typing import Literal
 
-from bofire.data_models.kernels.kernel import Kernel
+from bofire.data_models.kernels.kernel import FeatureSpecificKernel
 
 
-class CategoricalKernel(Kernel):
+class CategoricalKernel(FeatureSpecificKernel):
     pass
 
 

diff --git a/bofire/data_models/kernels/continuous.py b/bofire/data_models/kernels/continuous.py
@@ -1,12 +1,12 @@
-from typing import Literal, Optional
+from typing import List, Literal, Optional
 
 from pydantic import PositiveInt, field_validator
 
-from bofire.data_models.kernels.kernel import Kernel
+from bofire.data_models.kernels.kernel import FeatureSpecificKernel
 from bofire.data_models.priors.api import AnyGeneralPrior, AnyPrior
 
 
-class ContinuousKernel(Kernel):
+class ContinuousKernel(FeatureSpecificKernel):
     pass
 
 
@@ -40,6 +40,7 @@ class PolynomialKernel(ContinuousKernel):
     power: int = 2
 
 
-class InfiniteWidthBNNKernel(Kernel):
+class InfiniteWidthBNNKernel(ContinuousKernel):
+    features: Optional[List[str]] = None
     type: Literal["InfiniteWidthBNNKernel"] = "InfiniteWidthBNNKernel"
     depth: PositiveInt = 3
diff --git a/bofire/data_models/kernels/kernel.py b/bofire/data_models/kernels/kernel.py
@@ -1,5 +1,15 @@
+from typing import List, Optional
+
 from bofire.data_models.base import BaseModel
 
 
 class Kernel(BaseModel):
     type: str
+
+
+class AggregationKernel(Kernel):
+    pass
+
+
+class FeatureSpecificKernel(Kernel):
+    features: Optional[List[str]] = None
diff --git a/bofire/data_models/kernels/molecular.py b/bofire/data_models/kernels/molecular.py
@@ -1,9 +1,9 @@
 from typing import Literal
 
-from bofire.data_models.kernels.kernel import Kernel
+from bofire.data_models.kernels.kernel import FeatureSpecificKernel
 
 
-class MolecularKernel(Kernel):
+class MolecularKernel(FeatureSpecificKernel):
     pass
 
 

diff --git a/bofire/kernels/categorical.py b/bofire/kernels/categorical.py
@@ -0,0 +1,70 @@
+from typing import Dict
+
+import torch
+from botorch.models.transforms.input import OneHotToNumeric
+from gpytorch.kernels.kernel import Kernel
+from torch import Tensor
+
+
+class HammingKernelWithOneHots(Kernel):
+    r"""
+    A Kernel for one-hot enocded categorical features. The inputs
+    may contain more than one categorical feature.
+
+    This kernel mimics the functionality of CategoricalKernel from
+    botorch, but assumes categorical features encoded as one-hot variables.
+    Computes `exp(-dist(x1, x2) / lengthscale)`, where
+    `dist(x1, x2)` is zero if `x1` and `x2` correspond to the
+    same category, and one otherwise. If the last dimension
+    is not a batch dimension, then the mean is considered.
+
+    Note: This kernel is NOT differentiable w.r.t. the inputs.
+    """
+
+    has_lengthscale = True
+
+    def __init__(self, categorical_features: Dict[int, int], *args, **kwargs):
+        """
+        Initialize.
+
+        Args:
+            categorical_features: A dictionary mapping the starting index of each
+                categorical feature to its cardinality. This assumes that categoricals
+                are one-hot encoded.
+            *args, **kwargs: Passed to gpytorch.kernels.kernel.Kernel.__init__
+        """
+        super().__init__(*args, **kwargs)
+
+        onehot_dim = sum(categorical_features.values())
+        self.trx = OneHotToNumeric(
+            onehot_dim, categorical_features=categorical_features
+        )
+
+    def forward(
+        self,
+        x1: Tensor,
+        x2: Tensor,
+        diag: bool = False,
+        last_dim_is_batch: bool = False,
+        **params,
+    ) -> Tensor:
+        x1 = self.trx(x1)
+        x2 = self.trx(x2)
+
+        delta = x1.unsqueeze(-2) != x2.unsqueeze(-3)
+        if self.ard_num_dims is not None:
+            # botorch forces ard_num_dims to be the same as the total size of the of one-hot encoded features
+            # however here we just need one length scale per categorical feature
+            ls = self.lengthscale[..., : delta.shape[-1]]
+        else:
+            ls = self.lengthscale
+
+        dists = delta / ls.unsqueeze(-2)
+        if last_dim_is_batch:
+            dists = dists.transpose(-3, -1)
+        else:
+            dists = dists.mean(-1)
+        res = torch.exp(-dists)
+        if diag:
+            res = torch.diagonal(res, dim1=-1, dim2=-2)
+        return res