diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py index d4b21c4b8..ea2975846 100644 --- a/bofire/data_models/domain/features.py +++ b/bofire/data_models/domain/features.py @@ -622,6 +622,7 @@ def _validate_transform_specs( raise ValueError( f"Forbidden transform type for feature with key {key}", ) + return specs def get_bounds( diff --git a/bofire/data_models/kernels/aggregation.py b/bofire/data_models/kernels/aggregation.py index 58afe9a8d..bcc92525d 100644 --- a/bofire/data_models/kernels/aggregation.py +++ b/bofire/data_models/kernels/aggregation.py @@ -3,13 +3,13 @@ from bofire.data_models.kernels.categorical import HammingDistanceKernel from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel -from bofire.data_models.kernels.kernel import Kernel +from bofire.data_models.kernels.kernel import AggregationKernel from bofire.data_models.kernels.molecular import TanimotoKernel from bofire.data_models.kernels.shape import WassersteinKernel from bofire.data_models.priors.api import AnyGeneralPrior -class AdditiveKernel(Kernel): +class AdditiveKernel(AggregationKernel): type: Literal["AdditiveKernel"] = "AdditiveKernel" kernels: Sequence[ Union[ @@ -26,7 +26,7 @@ class AdditiveKernel(Kernel): type: Literal["AdditiveKernel"] = "AdditiveKernel" -class MultiplicativeKernel(Kernel): +class MultiplicativeKernel(AggregationKernel): type: Literal["MultiplicativeKernel"] = "MultiplicativeKernel" kernels: Sequence[ Union[ @@ -42,7 +42,7 @@ class MultiplicativeKernel(Kernel): ] -class ScaleKernel(Kernel): +class ScaleKernel(AggregationKernel): type: Literal["ScaleKernel"] = "ScaleKernel" base_kernel: Union[ RBFKernel, diff --git a/bofire/data_models/kernels/api.py b/bofire/data_models/kernels/api.py index 609f76f50..0fffbaa5f 100644 --- a/bofire/data_models/kernels/api.py +++ b/bofire/data_models/kernels/api.py @@ -17,12 +17,23 @@ PolynomialKernel, RBFKernel, ) -from bofire.data_models.kernels.kernel import Kernel +from bofire.data_models.kernels.kernel import ( + AggregationKernel, + FeatureSpecificKernel, + Kernel, +) from bofire.data_models.kernels.molecular import MolecularKernel, TanimotoKernel from bofire.data_models.kernels.shape import WassersteinKernel -AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel] +AbstractKernel = Union[ + Kernel, + CategoricalKernel, + ContinuousKernel, + MolecularKernel, + FeatureSpecificKernel, + AggregationKernel, +] AnyContinuousKernel = Union[ MaternKernel, diff --git a/bofire/data_models/kernels/categorical.py b/bofire/data_models/kernels/categorical.py index 1bd39313d..3081c7148 100644 --- a/bofire/data_models/kernels/categorical.py +++ b/bofire/data_models/kernels/categorical.py @@ -1,9 +1,9 @@ from typing import Literal -from bofire.data_models.kernels.kernel import Kernel +from bofire.data_models.kernels.kernel import FeatureSpecificKernel -class CategoricalKernel(Kernel): +class CategoricalKernel(FeatureSpecificKernel): pass diff --git a/bofire/data_models/kernels/continuous.py b/bofire/data_models/kernels/continuous.py index f081a239b..bf2983159 100644 --- a/bofire/data_models/kernels/continuous.py +++ b/bofire/data_models/kernels/continuous.py @@ -1,12 +1,12 @@ -from typing import Literal, Optional +from typing import List, Literal, Optional from pydantic import PositiveInt, field_validator -from bofire.data_models.kernels.kernel import Kernel +from bofire.data_models.kernels.kernel import FeatureSpecificKernel from bofire.data_models.priors.api import AnyGeneralPrior, AnyPrior -class ContinuousKernel(Kernel): +class ContinuousKernel(FeatureSpecificKernel): pass @@ -40,6 +40,7 @@ class PolynomialKernel(ContinuousKernel): power: int = 2 -class InfiniteWidthBNNKernel(Kernel): +class InfiniteWidthBNNKernel(ContinuousKernel): + features: Optional[List[str]] = None type: Literal["InfiniteWidthBNNKernel"] = "InfiniteWidthBNNKernel" depth: PositiveInt = 3 diff --git a/bofire/data_models/kernels/kernel.py b/bofire/data_models/kernels/kernel.py index 18918e562..5673baf18 100644 --- a/bofire/data_models/kernels/kernel.py +++ b/bofire/data_models/kernels/kernel.py @@ -1,5 +1,15 @@ +from typing import List, Optional + from bofire.data_models.base import BaseModel class Kernel(BaseModel): type: str + + +class AggregationKernel(Kernel): + pass + + +class FeatureSpecificKernel(Kernel): + features: Optional[List[str]] = None diff --git a/bofire/data_models/kernels/molecular.py b/bofire/data_models/kernels/molecular.py index 522986f2b..b4ead51d4 100644 --- a/bofire/data_models/kernels/molecular.py +++ b/bofire/data_models/kernels/molecular.py @@ -1,9 +1,9 @@ from typing import Literal -from bofire.data_models.kernels.kernel import Kernel +from bofire.data_models.kernels.kernel import FeatureSpecificKernel -class MolecularKernel(Kernel): +class MolecularKernel(FeatureSpecificKernel): pass diff --git a/bofire/kernels/categorical.py b/bofire/kernels/categorical.py new file mode 100644 index 000000000..95564d864 --- /dev/null +++ b/bofire/kernels/categorical.py @@ -0,0 +1,70 @@ +from typing import Dict + +import torch +from botorch.models.transforms.input import OneHotToNumeric +from gpytorch.kernels.kernel import Kernel +from torch import Tensor + + +class HammingKernelWithOneHots(Kernel): + r""" + A Kernel for one-hot enocded categorical features. The inputs + may contain more than one categorical feature. + + This kernel mimics the functionality of CategoricalKernel from + botorch, but assumes categorical features encoded as one-hot variables. + Computes `exp(-dist(x1, x2) / lengthscale)`, where + `dist(x1, x2)` is zero if `x1` and `x2` correspond to the + same category, and one otherwise. If the last dimension + is not a batch dimension, then the mean is considered. + + Note: This kernel is NOT differentiable w.r.t. the inputs. + """ + + has_lengthscale = True + + def __init__(self, categorical_features: Dict[int, int], *args, **kwargs): + """ + Initialize. + + Args: + categorical_features: A dictionary mapping the starting index of each + categorical feature to its cardinality. This assumes that categoricals + are one-hot encoded. + *args, **kwargs: Passed to gpytorch.kernels.kernel.Kernel.__init__ + """ + super().__init__(*args, **kwargs) + + onehot_dim = sum(categorical_features.values()) + self.trx = OneHotToNumeric( + onehot_dim, categorical_features=categorical_features + ) + + def forward( + self, + x1: Tensor, + x2: Tensor, + diag: bool = False, + last_dim_is_batch: bool = False, + **params, + ) -> Tensor: + x1 = self.trx(x1) + x2 = self.trx(x2) + + delta = x1.unsqueeze(-2) != x2.unsqueeze(-3) + if self.ard_num_dims is not None: + # botorch forces ard_num_dims to be the same as the total size of the of one-hot encoded features + # however here we just need one length scale per categorical feature + ls = self.lengthscale[..., : delta.shape[-1]] + else: + ls = self.lengthscale + + dists = delta / ls.unsqueeze(-2) + if last_dim_is_batch: + dists = dists.transpose(-3, -1) + else: + dists = dists.mean(-1) + res = torch.exp(-dists) + if diag: + res = torch.diagonal(res, dim1=-1, dim2=-2) + return res diff --git a/bofire/kernels/mapper.py b/bofire/kernels/mapper.py index fe1e62622..3cedc0812 100644 --- a/bofire/kernels/mapper.py +++ b/bofire/kernels/mapper.py @@ -1,22 +1,40 @@ -from typing import List +from typing import Callable, List, Optional import gpytorch import torch from botorch.models.kernels.categorical import CategoricalKernel +from gpytorch.constraints import GreaterThan from gpytorch.kernels import Kernel as GpytorchKernel import bofire.data_models.kernels.api as data_models import bofire.priors.api as priors +from bofire.kernels.categorical import HammingKernelWithOneHots from bofire.kernels.fingerprint_kernels.tanimoto_kernel import TanimotoKernel from bofire.kernels.shape import WassersteinKernel +def _compute_active_dims( + data_model: data_models.FeatureSpecificKernel, + active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], +) -> List[int]: + if data_model.features: + if features_to_idx_mapper is None: + raise RuntimeError( + "features_to_idx_mapper must be defined when using only a subset of features" + ) + active_dims = features_to_idx_mapper(data_model.features) + return active_dims + + def map_RBFKernel( data_model: data_models.RBFKernel, batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.RBFKernel: + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return gpytorch.kernels.RBFKernel( batch_shape=batch_shape, ard_num_dims=len(active_dims) if data_model.ard else None, @@ -34,7 +52,9 @@ def map_MaternKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.MaternKernel: + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return gpytorch.kernels.MaternKernel( batch_shape=batch_shape, ard_num_dims=len(active_dims) if data_model.ard else None, @@ -53,6 +73,7 @@ def map_InfiniteWidthBNNKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> "InfiniteWidthBNNKernel": # type: ignore # noqa: F821 try: from botorch.models.kernels.infinite_width_bnn import ( # type: ignore @@ -66,6 +87,7 @@ def map_InfiniteWidthBNNKernel( "requires python 3.10+.", ) + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return InfiniteWidthBNNKernel( batch_shape=batch_shape, active_dims=tuple(active_dims), @@ -78,7 +100,9 @@ def map_LinearKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.LinearKernel: + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return gpytorch.kernels.LinearKernel( batch_shape=batch_shape, active_dims=active_dims, @@ -95,7 +119,9 @@ def map_PolynomialKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.PolynomialKernel: + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return gpytorch.kernels.PolynomialKernel( batch_shape=batch_shape, active_dims=active_dims, @@ -113,6 +139,7 @@ def map_AdditiveKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.AdditiveKernel: return gpytorch.kernels.AdditiveKernel( *[ # type: ignore @@ -121,6 +148,7 @@ def map_AdditiveKernel( batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, + features_to_idx_mapper=features_to_idx_mapper, ) for k in data_model.kernels ], @@ -132,6 +160,7 @@ def map_MultiplicativeKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.ProductKernel: return gpytorch.kernels.ProductKernel( *[ # type: ignore @@ -140,6 +169,7 @@ def map_MultiplicativeKernel( batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, + features_to_idx_mapper=features_to_idx_mapper, ) for k in data_model.kernels ], @@ -151,6 +181,7 @@ def map_ScaleKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> gpytorch.kernels.ScaleKernel: return gpytorch.kernels.ScaleKernel( base_kernel=map( @@ -158,6 +189,7 @@ def map_ScaleKernel( batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, + features_to_idx_mapper=features_to_idx_mapper, ), outputscale_prior=( priors.map(data_model.outputscale_prior) @@ -172,7 +204,9 @@ def map_TanimotoKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> TanimotoKernel: + active_dims = _compute_active_dims(data_model, active_dims, features_to_idx_mapper) return TanimotoKernel( batch_shape=batch_shape, ard_num_dims=len(active_dims) if data_model.ard else None, @@ -185,12 +219,49 @@ def map_HammingDistanceKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], -) -> CategoricalKernel: - return CategoricalKernel( - batch_shape=batch_shape, - ard_num_dims=len(active_dims) if data_model.ard else None, - active_dims=active_dims, # type: ignore - ) + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], +) -> GpytorchKernel: + if data_model.features is not None: + if features_to_idx_mapper is None: + raise RuntimeError( + "features_to_idx_mapper must be defined when using only a subset of features" + ) + + active_dims = [] + categorical_features = {} + for k in data_model.features: + idx = features_to_idx_mapper([k]) + categorical_features[len(active_dims)] = len(idx) + + already_used = [i for i in idx if i in active_dims] + if already_used: + raise RuntimeError( + f"indices {already_used} are used in more than one categorical feature" + ) + + active_dims.extend(idx) + + if len(idx) == 1: + raise RuntimeError( + f"feature {k} is supposed to be one-hot encoded but is mapped to a single dimension" + ) + + return HammingKernelWithOneHots( + categorical_features=categorical_features, + # botorch will check that the lengthscale for ARD has the same number of elements as the one-hotted inputs, + # so we have to specify the ard_num_dims accordingly. The kernel will make sure to only use one length scale + # for each categorical feature. + ard_num_dims=len(active_dims) if data_model.ard else None, + batch_shape=batch_shape, + active_dims=active_dims, # type: ignore + lengthscale_constraint=GreaterThan(1e-06), + ) + else: + return CategoricalKernel( + batch_shape=batch_shape, + ard_num_dims=len(active_dims) if data_model.ard else None, + active_dims=active_dims, # type: ignore + ) def map_WassersteinKernel( @@ -198,6 +269,7 @@ def map_WassersteinKernel( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> WassersteinKernel: return WassersteinKernel( squared=data_model.squared, @@ -230,10 +302,12 @@ def map( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int], + features_to_idx_mapper: Optional[Callable[[List[str]], List[int]]], ) -> GpytorchKernel: return KERNEL_MAP[data_model.__class__]( data_model, batch_shape, ard_num_dims, active_dims, + features_to_idx_mapper, ) diff --git a/bofire/strategies/predictives/botorch.py b/bofire/strategies/predictives/botorch.py index 71c48c67e..63d56bdb4 100644 --- a/bofire/strategies/predictives/botorch.py +++ b/bofire/strategies/predictives/botorch.py @@ -27,9 +27,11 @@ from bofire.data_models.features.api import ( CategoricalDescriptorInput, CategoricalInput, + CategoricalMolecularInput, DiscreteInput, Input, ) +from bofire.data_models.molfeatures.api import MolFeatures from bofire.data_models.strategies.api import BotorchStrategy as DataModel from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel from bofire.data_models.strategies.api import ( @@ -652,6 +654,17 @@ def get_categorical_combinations(self) -> List[Dict[int, float]]: for j, idx in enumerate(features2idx[feat]): fixed_features[idx] = feature.values[index][j] + elif isinstance(feature, CategoricalMolecularInput): + preproc = self.input_preprocessing_specs[feat] + if not isinstance(preproc, MolFeatures): + raise ValueError( + f"preprocessing for {feat} must be of type AnyMolFeatures" + ) + transformed = feature.to_descriptor_encoding( + preproc, pd.Series([val]) + ) + for j, idx in enumerate(features2idx[feat]): + fixed_features[idx] = transformed.values[0, j] elif isinstance(feature, CategoricalInput): # it has to be onehot in this case transformed = feature.to_onehot_encoding(pd.Series([val])) diff --git a/bofire/surrogates/mixed_single_task_gp.py b/bofire/surrogates/mixed_single_task_gp.py index 45e772637..79079f74f 100644 --- a/bofire/surrogates/mixed_single_task_gp.py +++ b/bofire/surrogates/mixed_single_task_gp.py @@ -92,7 +92,13 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): train_Y=tY, cat_dims=cat_dims, # cont_kernel_factory=self.continuous_kernel.to_gpytorch, - cont_kernel_factory=partial(kernels.map, data_model=self.continuous_kernel), + cont_kernel_factory=partial( + kernels.map, + data_model=self.continuous_kernel, + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), + ), outcome_transform=( Standardize(m=tY.shape[-1]) if self.output_scaler == ScalerEnum.STANDARDIZE diff --git a/bofire/surrogates/mixed_tanimoto_gp.py b/bofire/surrogates/mixed_tanimoto_gp.py index 8abe5c2e9..45dc4b519 100644 --- a/bofire/surrogates/mixed_tanimoto_gp.py +++ b/bofire/surrogates/mixed_tanimoto_gp.py @@ -317,9 +317,21 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # type: ignore train_Y=tY, cat_dims=cat_dims, mol_dims=mol_dims, - cont_kernel_factory=partial(kernels.map, data_model=self.continuous_kernel), + cont_kernel_factory=partial( + kernels.map, + data_model=self.continuous_kernel, + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), + ), # cat_kernel_factory=partial(kernels.map, data_model=self.categorical_kernel), BoTorch forced to use CategoricalKernel - mol_kernel_factory=partial(kernels.map, data_model=self.molecular_kernel), + mol_kernel_factory=partial( + kernels.map, + data_model=self.molecular_kernel, + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), + ), outcome_transform=Standardize(m=tY.shape[-1]), input_transform=tf, ) diff --git a/bofire/surrogates/multi_task_gp.py b/bofire/surrogates/multi_task_gp.py index 26bc3dd0a..25f9e4e10 100644 --- a/bofire/surrogates/multi_task_gp.py +++ b/bofire/surrogates/multi_task_gp.py @@ -70,6 +70,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): # type: ignore range(tX.shape[1] - 1), ), # kernel is for input space so we subtract one for the fidelity index ard_num_dims=1, # this keyword is ignored + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), ), outcome_transform=( Standardize(m=tY.shape[-1]) diff --git a/bofire/surrogates/shape.py b/bofire/surrogates/shape.py index b708a85a0..d86c25d46 100644 --- a/bofire/surrogates/shape.py +++ b/bofire/surrogates/shape.py @@ -96,12 +96,18 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): active_dims=self.idx_continuous, ard_num_dims=1, batch_shape=torch.Size(), + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), ) * kernels.map( self.shape_kernel, active_dims=self.idx_shape, ard_num_dims=1, batch_shape=torch.Size(), + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), ), outputscale_prior=priors.map(self.outputscale_prior), ) @@ -112,6 +118,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): active_dims=self.idx_shape, ard_num_dims=1, batch_shape=torch.Size(), + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), ), outputscale_prior=priors.map(self.outputscale_prior), ) diff --git a/bofire/surrogates/single_task_gp.py b/bofire/surrogates/single_task_gp.py index 8764ebbdd..ee710586a 100644 --- a/bofire/surrogates/single_task_gp.py +++ b/bofire/surrogates/single_task_gp.py @@ -53,6 +53,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame): batch_shape=torch.Size(), active_dims=list(range(tX.shape[1])), ard_num_dims=1, # this keyword is ignored + features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices( + self.input_preprocessing_specs, feats + ), ), outcome_transform=( Standardize(m=tY.shape[-1]) diff --git a/tests/bofire/data_models/specs/kernels.py b/tests/bofire/data_models/specs/kernels.py index 2056673af..c75e4ed21 100644 --- a/tests/bofire/data_models/specs/kernels.py +++ b/tests/bofire/data_models/specs/kernels.py @@ -10,6 +10,14 @@ kernels.HammingDistanceKernel, lambda: { "ard": True, + "features": None, + }, +) +specs.add_valid( + kernels.HammingDistanceKernel, + lambda: { + "ard": True, + "features": ["x_cat_1", "x_cat_2"], }, ) specs.add_valid( @@ -21,13 +29,17 @@ ) specs.add_valid( kernels.LinearKernel, - lambda: {"variance_prior": priors.valid(GammaPrior).obj().model_dump()}, + lambda: { + "variance_prior": priors.valid(GammaPrior).obj().model_dump(), + "features": None, + }, ) specs.add_valid( kernels.MaternKernel, lambda: { "ard": True, "nu": 2.5, + "features": None, "lengthscale_prior": priors.valid().obj().model_dump(), }, ) @@ -37,6 +49,7 @@ "ard": True, "nu": 5, "lengthscale_prior": priors.valid().obj(), + "features": None, }, error=ValueError, message="nu expected to be 0.5, 1.5, or 2.5", @@ -45,6 +58,7 @@ kernels.InfiniteWidthBNNKernel, lambda: { "depth": 3, + "features": None, }, ) @@ -53,6 +67,7 @@ lambda: { "ard": True, "lengthscale_prior": priors.valid().obj().model_dump(), + "features": None, }, ) specs.add_valid( @@ -84,5 +99,6 @@ kernels.TanimotoKernel, lambda: { "ard": True, + "features": None, }, ) diff --git a/tests/bofire/kernels/test_categorical.py b/tests/bofire/kernels/test_categorical.py new file mode 100644 index 000000000..20a81d9fe --- /dev/null +++ b/tests/bofire/kernels/test_categorical.py @@ -0,0 +1,80 @@ +import torch +from botorch.models.kernels.categorical import CategoricalKernel +from botorch.models.transforms.input import OneHotToNumeric + +from bofire.kernels.categorical import HammingKernelWithOneHots + + +def test_hamming_with_one_hot_one_feature(): + cat = {0: 3} + + k1 = CategoricalKernel() + k2 = HammingKernelWithOneHots(categorical_features=cat) + + xin_oh = torch.eye(3) + xin_cat = OneHotToNumeric(3, categorical_features=cat).transform(xin_oh) + + z1 = k1(xin_cat).to_dense() + z2 = k2(xin_oh).to_dense() + + assert z1.shape == z2.shape == (3, 3) + assert torch.allclose(z1, z2) + + +def test_hamming_with_one_hot_two_features(): + cat = {0: 2, 2: 4} + + k1 = CategoricalKernel() + k2 = HammingKernelWithOneHots(categorical_features=cat) + + xin_oh = torch.zeros(4, 6) + xin_oh[:2, :2] = xin_oh[2:, :2] = torch.eye(2) + xin_oh[:, 2:] = torch.eye(4) + + xin_cat = OneHotToNumeric(6, categorical_features=cat).transform(xin_oh) + + z1 = k1(xin_cat).to_dense() + z2 = k2(xin_oh).to_dense() + + assert z1.shape == z2.shape == (4, 4) + assert torch.allclose(z1, z2) + + +def test_hamming_with_one_hot_two_features_and_lengthscales(): + cat = {0: 2, 2: 4} + + k1 = CategoricalKernel(ard_num_dims=2) + k1.lengthscale = torch.tensor([1.5, 3.0]) + + # botorch will check that the lengthscale for ARD has the same number of elements as the one-hotted inputs, + # so we have to specify the ard_num_dims accordingly. The kernel will make sure to only use the right + # number of elements, corresponding to the number of categorical features. + k2 = HammingKernelWithOneHots(categorical_features=cat, ard_num_dims=6) + k2.lengthscale = torch.tensor([1.5, 3.0, 0.0, 0.0, 0.0, 0.0]) + + xin_oh = torch.zeros(4, 6) + xin_oh[:2, :2] = xin_oh[2:, :2] = torch.eye(2) + xin_oh[:, 2:] = torch.eye(4) + + xin_cat = OneHotToNumeric(6, categorical_features=cat).transform(xin_oh) + + z1 = k1(xin_cat).to_dense() + z2 = k2(xin_oh).to_dense() + + assert z1.shape == z2.shape == (4, 4) + assert torch.allclose(z1, z2) + + +def test_feature_order(): + x1_in = torch.zeros(4, 2) + x1_in[:2, :] = x1_in[2:, :] = torch.eye(2) + x2_in = torch.eye(4) + + k1 = HammingKernelWithOneHots(categorical_features={0: 2, 2: 4}) + k2 = HammingKernelWithOneHots(categorical_features={0: 4, 4: 2}) + + z1 = k1(torch.cat([x1_in, x2_in], dim=1)).to_dense() + z2 = k2(torch.cat([x2_in, x1_in], dim=1)).to_dense() + + assert z1.shape == z2.shape == (4, 4) + assert torch.allclose(z1, z2) diff --git a/tests/bofire/kernels/test_mapper.py b/tests/bofire/kernels/test_mapper.py index 79cbd9bfd..d99604554 100644 --- a/tests/bofire/kernels/test_mapper.py +++ b/tests/bofire/kernels/test_mapper.py @@ -9,6 +9,7 @@ import bofire.kernels.shape as shapeKernels from bofire.data_models.kernels.api import ( AdditiveKernel, + FeatureSpecificKernel, HammingDistanceKernel, InfiniteWidthBNNKernel, LinearKernel, @@ -21,6 +22,8 @@ WassersteinKernel, ) from bofire.data_models.priors.api import THREESIX_SCALE_PRIOR, GammaPrior +from bofire.kernels.categorical import HammingKernelWithOneHots +from bofire.kernels.mapper import _compute_active_dims from tests.bofire.data_models.specs.api import Spec @@ -54,6 +57,7 @@ def test_map(kernel_spec: Spec): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert isinstance(gkernel, EQUIVALENTS[kernel.__class__]) @@ -66,6 +70,7 @@ def test_map_infinite_width_bnn_kernel(): batch_shape=torch.Size(), active_dims=list(range(5)), ard_num_dims=10, + features_to_idx_mapper=None, ) assert isinstance(gkernel, BNNKernel) @@ -79,6 +84,7 @@ def test_map_scale_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert hasattr(k, "outputscale_prior") assert isinstance(k.outputscale_prior, gpytorch.priors.GammaPrior) @@ -88,6 +94,7 @@ def test_map_scale_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert hasattr(k, "outputscale_prior") is False @@ -99,6 +106,7 @@ def test_map_polynomial_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert hasattr(k, "offset_prior") assert isinstance(k.offset_prior, gpytorch.priors.GammaPrior) @@ -108,6 +116,7 @@ def test_map_polynomial_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert hasattr(k, "offset_prior") is False @@ -163,6 +172,7 @@ def test_map_continuous_kernel(kernel, ard_num_dims, active_dims, expected_kerne batch_shape=torch.Size(), ard_num_dims=ard_num_dims, active_dims=active_dims, + features_to_idx_mapper=None, ) assert isinstance(k, expected_kernel) if isinstance(kernel, LinearKernel): @@ -206,6 +216,7 @@ def test_map_molecular_kernel(kernel, ard_num_dims, active_dims, expected_kernel batch_shape=torch.Size(), ard_num_dims=ard_num_dims, active_dims=active_dims, + features_to_idx_mapper=None, ) assert isinstance(k, expected_kernel) @@ -226,6 +237,7 @@ def test_map_wasserstein_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert isinstance(k, shapeKernels.WassersteinKernel) assert hasattr(k, "lengthscale_prior") @@ -237,6 +249,205 @@ def test_map_wasserstein_kernel(): batch_shape=torch.Size(), ard_num_dims=10, active_dims=list(range(5)), + features_to_idx_mapper=None, ) assert k.squared is True assert hasattr(k, "lengthscale_prior") is False + + +def test_map_HammingDistanceKernel_to_onehot_with_ard(): + fmap = { + "x_cat_1": [5, 6, 7, 8], + "x_cat_2": [2, 3], + } + + k_mapped = kernels.map( + HammingDistanceKernel( + ard=True, + features=["x_cat_1", "x_cat_2"], + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=lambda ks: [i for k in ks for i in fmap[k]], + ) + + assert isinstance(k_mapped, HammingKernelWithOneHots) + assert k_mapped.active_dims.tolist() == [5, 6, 7, 8, 2, 3] + assert k_mapped.ard_num_dims == 6 + assert k_mapped.lengthscale.shape == (1, 6) + assert k_mapped.trx.categorical_features == {0: 4, 4: 2} + + +def test_map_HammingDistanceKernel_to_onehot_without_ard(): + fmap = { + "x_cat_1": [5, 6, 7, 8], + "x_cat_2": [2, 3], + } + + k_mapped = kernels.map( + HammingDistanceKernel( + ard=False, + features=["x_cat_1", "x_cat_2"], + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=lambda ks: [i for k in ks for i in fmap[k]], + ) + + assert isinstance(k_mapped, HammingKernelWithOneHots) + assert k_mapped.active_dims.tolist() == [5, 6, 7, 8, 2, 3] + assert k_mapped.ard_num_dims is None + assert k_mapped.lengthscale.shape == (1, 1) + assert k_mapped.trx.categorical_features == {0: 4, 4: 2} + + +def test_map_HammingDistanceKernel_to_categorical_without_ard(): + k_mapped = kernels.map( + HammingDistanceKernel( + ard=False, + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=None, + ) + + assert isinstance(k_mapped, CategoricalKernel) + assert k_mapped.active_dims.tolist() == [0, 1, 2, 3, 4] + assert k_mapped.ard_num_dims is None + assert k_mapped.lengthscale.shape == (1, 1) + + +def test_map_HammingDistanceKernel_to_categorical_with_ard(): + k_mapped = kernels.map( + HammingDistanceKernel( + ard=True, + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=None, + ) + + assert isinstance(k_mapped, CategoricalKernel) + assert k_mapped.active_dims.tolist() == [0, 1, 2, 3, 4] + assert k_mapped.ard_num_dims == 5 + assert k_mapped.lengthscale.shape == (1, 5) + + +def test_map_HammingDistanceKernel_to_onehot_checks_dimension_overlap(): + fmap = { + "x_cat_1": [3, 4], + "x_cat_2": [2, 3], + } + + with pytest.raises( + RuntimeError, + match=r"indices \[3\] are used in more than one categorical feature", + ): + kernels.map( + HammingDistanceKernel( + ard=True, + features=["x_cat_1", "x_cat_2"], + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=lambda ks: [i for k in ks for i in fmap[k]], + ) + + +def test_map_HammingDistanceKernel_to_onehot_checks_onehot_encoding(): + fmap = { + "x_cat_1": [4], + "x_cat_2": [2, 3], + } + + with pytest.raises( + RuntimeError, + match="feature x_cat_1 is supposed to be one-hot encoded but is mapped to a single dimension", + ): + kernels.map( + HammingDistanceKernel( + ard=True, + features=["x_cat_1", "x_cat_2"], + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=lambda ks: [i for k in ks for i in fmap[k]], + ) + + +def test_map_multiple_kernels_on_feature_subsets(): + fmap = { + "x_1": [0], + "x_2": [1], + "x_cat_1": [2, 3], + "x_cat_2": [4, 5], + } + + k_mapped = kernels.map( + AdditiveKernel( + kernels=[ + HammingDistanceKernel( + ard=True, + features=["x_cat_1", "x_cat_2"], + ), + RBFKernel( + features=["x_1", "x_2"], + ), + ] + ), + batch_shape=torch.Size(), + ard_num_dims=10, + active_dims=list(range(5)), + features_to_idx_mapper=lambda ks: [i for k in ks for i in fmap[k]], + ) + + assert len(k_mapped.kernels) == 2 + + assert isinstance(k_mapped.kernels[0], HammingKernelWithOneHots) + assert k_mapped.kernels[0].active_dims.tolist() == [2, 3, 4, 5] + assert k_mapped.kernels[0].ard_num_dims == 4 + + from gpytorch.kernels import RBFKernel as GpytorchRBFKernel + + assert isinstance(k_mapped.kernels[1], GpytorchRBFKernel) + assert k_mapped.kernels[1].active_dims.tolist() == [0, 1] + assert k_mapped.kernels[1].ard_num_dims == 2 + + +def test_compute_active_dims_no_features_returns_active_dims(): + assert _compute_active_dims( + data_model=FeatureSpecificKernel( + type="test", + features=None, + ), + active_dims=[1, 2, 3], + features_to_idx_mapper=None, + ) == [1, 2, 3] + + +def test_compute_active_dims_features_override_active_dims(): + assert _compute_active_dims( + data_model=FeatureSpecificKernel(type="test", features=["x1", "x2"]), + active_dims=[1, 2, 3], + features_to_idx_mapper=lambda ks: [ + i for k in ks for i in {"x1": [4], "x2": [7]}[k] + ], + ) == [4, 7] + + +def test_compute_active_dims_fails_with_features_without_mapper(): + with pytest.raises( + RuntimeError, + match="features_to_idx_mapper must be defined when using only a subset of features", + ): + _compute_active_dims( + data_model=FeatureSpecificKernel(type="test", features=["x1", "x2"]), + active_dims=[1, 2, 3], + features_to_idx_mapper=None, + ) diff --git a/tests/bofire/strategies/doe/test_design.py b/tests/bofire/strategies/doe/test_design.py index ab956e809..2bd72422a 100644 --- a/tests/bofire/strategies/doe/test_design.py +++ b/tests/bofire/strategies/doe/test_design.py @@ -664,6 +664,7 @@ def test_fixed_experiments_checker(): def test_partially_fixed_experiments(): + pytest.importorskip("docutils") domain = Domain( inputs=[ ContinuousInput(key="x1", bounds=(0, 5)), diff --git a/tests/bofire/strategies/doe/test_objective.py b/tests/bofire/strategies/doe/test_objective.py index 96bd66723..5ab62731e 100644 --- a/tests/bofire/strategies/doe/test_objective.py +++ b/tests/bofire/strategies/doe/test_objective.py @@ -43,7 +43,6 @@ def test_Objective_model_jacobian_t(): B[:, 1:4] = np.eye(3) B[:, 4] = np.array([0, 0, 6]) B[:, 5] = np.array([2, 1, 0]) - assert np.allclose(B, model_jacobian_t(x)) # fully quadratic model diff --git a/tests/bofire/strategies/test_doe.py b/tests/bofire/strategies/test_doe.py index a99c5853b..ea50b27cd 100644 --- a/tests/bofire/strategies/test_doe.py +++ b/tests/bofire/strategies/test_doe.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +import pytest import bofire.data_models.strategies.api as data_models from bofire.data_models.constraints.api import ( @@ -24,6 +25,9 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=UserWarning, append=True) + +pytest.importorskip("cyipopt") + with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/tests/bofire/strategies/test_from_data_model.py b/tests/bofire/strategies/test_from_data_model.py index ff1873490..9b163a613 100644 --- a/tests/bofire/strategies/test_from_data_model.py +++ b/tests/bofire/strategies/test_from_data_model.py @@ -1,7 +1,10 @@ +import pytest + from bofire.strategies import api as strategies def test_strategy_can_be_loaded_from_data_model(strategy_spec): + pytest.importorskip("entmoot") data_model = strategy_spec.obj() strategy = strategies.map(data_model=data_model) assert strategy is not None diff --git a/tests/bofire/strategies/test_space_filling.py b/tests/bofire/strategies/test_space_filling.py index 46e3676de..e44bd0288 100644 --- a/tests/bofire/strategies/test_space_filling.py +++ b/tests/bofire/strategies/test_space_filling.py @@ -14,6 +14,8 @@ from bofire.data_models.features.api import ContinuousInput +pytest.importorskip("cyipopt") + inputs = [ContinuousInput(key=f"if{i}", bounds=(0, 1)) for i in range(1, 4)] c1 = LinearInequalityConstraint( features=["if1", "if2", "if3"], diff --git a/tests/bofire/surrogates/test_gps.py b/tests/bofire/surrogates/test_gps.py index f59e49f1a..443f51c51 100644 --- a/tests/bofire/surrogates/test_gps.py +++ b/tests/bofire/surrogates/test_gps.py @@ -15,7 +15,7 @@ from pydantic import ValidationError import bofire.surrogates.api as surrogates -from bofire.benchmarks.api import Himmelblau +from bofire.benchmarks.api import Hartmann, Himmelblau from bofire.data_models.domain.api import Inputs, Outputs from bofire.data_models.enum import CategoricalEncodingEnum, RegressionMetricsEnum from bofire.data_models.features.api import ( @@ -25,10 +25,12 @@ MolecularInput, ) from bofire.data_models.kernels.api import ( + AdditiveKernel, HammingDistanceKernel, MaternKernel, RBFKernel, ScaleKernel, + TanimotoKernel, ) from bofire.data_models.molfeatures.api import MordredDescriptors from bofire.data_models.priors.api import ( @@ -293,6 +295,107 @@ def test_SingleTaskGPHyperconfig(): ) +def test_SingleTaskGPModel_feature_subsets(): + """make an additive kernel using feature subsets for each kernel in the sum""" + benchmark = Hartmann() + bench_x = benchmark.domain.inputs.sample(12) + bench_expts = pd.concat([bench_x, benchmark.f(bench_x)], axis=1) + + input_names = benchmark.domain.inputs.get_keys() + inputs_kernel_1 = input_names[:2] + inputs_kernel_2 = input_names[2:] + + gp_data = SingleTaskGPSurrogate( + inputs=benchmark.domain.inputs, + outputs=benchmark.domain.outputs, + kernel=AdditiveKernel( + kernels=[ + RBFKernel( + ard=True, + lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(), + features=inputs_kernel_1, + ), + RBFKernel( + ard=True, + lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(), + features=inputs_kernel_2, + ), + ] + ), + ) + + gp_mapped = surrogates.map(gp_data) + assert hasattr(gp_mapped, "fit") + assert len(gp_mapped.kernel.kernels) == 2 + assert gp_mapped.kernel.kernels[0].features == ["x_0", "x_1"] + assert gp_mapped.kernel.kernels[1].features == ["x_2", "x_3", "x_4", "x_5"] + gp_mapped.fit(bench_expts) + pred = gp_mapped.predict(bench_expts) + assert pred.shape == (12, 2) + assert gp_mapped.model.covar_module.kernels[0].active_dims.tolist() == [0, 1] + assert gp_mapped.model.covar_module.kernels[1].active_dims.tolist() == [2, 3, 4, 5] + + +def test_SingleTaskGPModel_mixed_features(): + """test that we can use a single task gp with mixed features""" + inputs = Inputs( + features=[ + ContinuousInput(key="x_1", bounds=(-4, 4)), + ContinuousInput(key="x_2", bounds=(-4, 4)), + CategoricalInput(key="x_cat_1", categories=["mama", "papa"]), + CategoricalInput(key="x_cat_2", categories=["cat", "dog"]), + MolecularInput(key="x_mol"), + ], + ) + outputs = Outputs(features=[ContinuousOutput(key="y")]) + + experiment_values = [ + [2.56, -1.42, "papa", "dog", -3.98, 1, "CC(=O)Oc1ccccc1C(=O)O"], + [3.84, -2.73, "mama", "cat", -197.46, 1, "c1ccccc1"], + [3.57, 3.23, "papa", "cat", -74.55, 1, "[CH3][CH2][OH]"], + [-0.07, -1.55, "mama", "dog", -179.14, 1, "N[C@](C)(F)C(=O)O"], + ] + experiments = pd.DataFrame( + experiment_values, + columns=["x_1", "x_2", "x_cat_1", "x_cat_2", "y", "valid_y", "x_mol"], + ) + + gp_data = SingleTaskGPSurrogate( + inputs=inputs, + outputs=outputs, + kernel=AdditiveKernel( + kernels=[ + HammingDistanceKernel( + ard=True, + features=["x_cat_1", "x_cat_2"], + ), + RBFKernel( + ard=True, + lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(), + features=["x_1", "x_2"], + ), + TanimotoKernel(features=["x_mol"]), + ] + ), + ) + + gp_mapped = surrogates.map(gp_data) + gp_mapped.fit(experiments) + pred = gp_mapped.predict(experiments) + assert pred.shape == (4, 2) + assert gp_mapped.model.covar_module.kernels[0].active_dims.tolist() == [ + 2050, + 2051, + 2052, + 2053, + ] + assert gp_mapped.model.covar_module.kernels[1].active_dims.tolist() == [0, 1] + assert gp_mapped.model.covar_module.kernels[2].active_dims.tolist() == list( + range(2, 2050) + ) + # assert (pred['y_pred'] - experiments['y']).abs().mean() < 0.4 + + def test_MixedSingleTaskGPHyperconfig(): inputs = Inputs( features=[ diff --git a/tests/bofire/surrogates/test_xgb.py b/tests/bofire/surrogates/test_xgb.py index f01b1fd5f..6d56516a4 100644 --- a/tests/bofire/surrogates/test_xgb.py +++ b/tests/bofire/surrogates/test_xgb.py @@ -15,6 +15,9 @@ from bofire.data_models.surrogates.api import XGBoostSurrogate +pytest.importorskip("xgboost") + + XGB_AVAILABLE = importlib.util.find_spec("xgboost") is not None diff --git a/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb b/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb index 1034aa19d..8dd5e587c 100644 --- a/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb +++ b/tutorials/benchmarks/007-Benchmark_outlier_detection.ipynb @@ -208,6 +208,9 @@ " batch_shape=torch.Size(),\n", " active_dims=list(range(tX.shape[1])),\n", " ard_num_dims=1, # this keyword is ignored\n", + " features_to_idx_mapper=lambda feats: self.inputs.get_feature_indices(\n", + " self.input_preprocessing_specs, feats\n", + " ),\n", " ),\n", " # outcome_transform=Standardize(m=tY.shape[-1]),\n", " input_transform=scaler,\n", @@ -767,7 +770,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -781,7 +784,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.12" }, "papermill": { "default_parameters": {},