-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
kernels working on a given set of features (#476)
* kernels working on a given set of features * pre-commit * test map singletaskgp with additive kernel * test active_dims of mapped kernels * add features_to_idx_mapper to outlier detection tutorial * correctly handling categorical mol features * validating mol features transforms * verifying proper type * custom hamming kernel enabling single task gp on categorical features * removed unnecessary parameter from data model * testing equivalence of mixed gp and single gp with custom kernel * (temporary) running on all py versions * (temporary) debug github actions by printing * more printing * Revert "testing equivalence of mixed gp and single gp with custom kernel" This reverts commit 4a2a547. * Revert "removed unnecessary parameter from data model" This reverts commit 6ad1dfd. * Revert "custom hamming kernel enabling single task gp on categorical features" This reverts commit 17d8350. * Revert "Revert "custom hamming kernel enabling single task gp on categorical features"" This reverts commit 2e29852. * Revert "Revert "testing equivalence of mixed gp and single gp with custom kernel"" This reverts commit 1cd2776. * removed test debug and restored to latest implemented features * pinning compatible version of formulaic * pinning compatible version of formulaic * removed old code * lint * removed scratch file * removed old code again * silencing pyright false positive * compatibility with py39 * pin compatible version of formulaic * restored old code * pinning sklearn * pinning sklearn * pinning scikit everywhere * not testing for prediction quality * matching lengthscale constraints in hamming kernel * removed equivalence test * testing hamming kernel * added test for mol features in single task gp * categorical onehot kernel uses the right lengthscale for multiple features * removed redundant check * more descriptive name for base kernel * updated docstring * improved tests and comments --------- Co-authored-by: Robert Lee <84771576+R-M-Lee@users.noreply.github.com>
- Loading branch information
1 parent
ecfc313
commit 7912431
Showing
26 changed files
with
667 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,15 @@ | ||
from typing import List, Optional | ||
|
||
from bofire.data_models.base import BaseModel | ||
|
||
|
||
class Kernel(BaseModel): | ||
type: str | ||
|
||
|
||
class AggregationKernel(Kernel): | ||
pass | ||
|
||
|
||
class FeatureSpecificKernel(Kernel): | ||
features: Optional[List[str]] = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from typing import Dict | ||
|
||
import torch | ||
from botorch.models.transforms.input import OneHotToNumeric | ||
from gpytorch.kernels.kernel import Kernel | ||
from torch import Tensor | ||
|
||
|
||
class HammingKernelWithOneHots(Kernel): | ||
r""" | ||
A Kernel for one-hot enocded categorical features. The inputs | ||
may contain more than one categorical feature. | ||
|
||
This kernel mimics the functionality of CategoricalKernel from | ||
botorch, but assumes categorical features encoded as one-hot variables. | ||
Computes `exp(-dist(x1, x2) / lengthscale)`, where | ||
`dist(x1, x2)` is zero if `x1` and `x2` correspond to the | ||
same category, and one otherwise. If the last dimension | ||
is not a batch dimension, then the mean is considered. | ||
|
||
Note: This kernel is NOT differentiable w.r.t. the inputs. | ||
""" | ||
|
||
has_lengthscale = True | ||
|
||
def __init__(self, categorical_features: Dict[int, int], *args, **kwargs): | ||
""" | ||
Initialize. | ||
|
||
Args: | ||
categorical_features: A dictionary mapping the starting index of each | ||
categorical feature to its cardinality. This assumes that categoricals | ||
are one-hot encoded. | ||
*args, **kwargs: Passed to gpytorch.kernels.kernel.Kernel.__init__ | ||
""" | ||
super().__init__(*args, **kwargs) | ||
|
||
onehot_dim = sum(categorical_features.values()) | ||
self.trx = OneHotToNumeric( | ||
onehot_dim, categorical_features=categorical_features | ||
) | ||
|
||
def forward( | ||
self, | ||
x1: Tensor, | ||
x2: Tensor, | ||
diag: bool = False, | ||
last_dim_is_batch: bool = False, | ||
**params, | ||
) -> Tensor: | ||
x1 = self.trx(x1) | ||
x2 = self.trx(x2) | ||
|
||
delta = x1.unsqueeze(-2) != x2.unsqueeze(-3) | ||
if self.ard_num_dims is not None: | ||
# botorch forces ard_num_dims to be the same as the total size of the of one-hot encoded features | ||
# however here we just need one length scale per categorical feature | ||
ls = self.lengthscale[..., : delta.shape[-1]] | ||
else: | ||
ls = self.lengthscale | ||
|
||
dists = delta / ls.unsqueeze(-2) | ||
if last_dim_is_batch: | ||
dists = dists.transpose(-3, -1) | ||
else: | ||
dists = dists.mean(-1) | ||
res = torch.exp(-dists) | ||
if diag: | ||
res = torch.diagonal(res, dim1=-1, dim2=-2) | ||
return res |
Oops, something went wrong.