-
Notifications
You must be signed in to change notification settings - Fork 548
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable CPU execution of IncrementalPCA #6254
base: branch-25.02
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright (c) 2020-2023, NVIDIA CORPORATION. | ||
# Copyright (c) 2020-2025, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -13,23 +13,25 @@ | |
# limitations under the License. | ||
# | ||
|
||
from cuml.internals.global_settings import GlobalSettings | ||
from cuml.internals.safe_imports import gpu_only_import | ||
|
||
cp = gpu_only_import("cupy") | ||
|
||
|
||
def _create_rs_generator(random_state): | ||
""" | ||
This is a utility function that returns an instance of CuPy RandomState | ||
This is a utility function that returns an instance of CuPy/numpy | ||
RandomState depending on the current globally-selected device type | ||
Parameters | ||
---------- | ||
random_state : None, int, or CuPy RandomState | ||
The random_state from which the CuPy random state is generated | ||
random_state : None, int, or RandomState | ||
The random_state from which the random state is generated | ||
""" | ||
|
||
if isinstance(random_state, (type(None), int)): | ||
return cp.random.RandomState(seed=random_state) | ||
elif isinstance(random_state, cp.random.RandomState): | ||
return GlobalSettings().xpy.random.RandomState(seed=random_state) | ||
elif isinstance(random_state, GlobalSettings().xpy.random.RandomState): | ||
return random_state | ||
else: | ||
raise ValueError("random_state type must be int or CuPy RandomState") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the global settings thing well enough (and this is an internal function), but I think it can happen that |
||
raise ValueError("random_state type must be int or RandomState") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# | ||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||
# Copyright (c) 2020-2025, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -20,6 +20,11 @@ | |
from cuml.internals.input_utils import input_to_cupy_array | ||
from cuml.common import input_to_cuml_array | ||
from cuml import Base | ||
from cuml.internals.api_decorators import ( | ||
device_interop_preparation, | ||
enable_device_interop, | ||
) | ||
from cuml.internals.global_settings import GlobalSettings | ||
from cuml.internals.safe_imports import cpu_only_import | ||
import numbers | ||
|
||
|
@@ -195,6 +200,9 @@ class IncrementalPCA(PCA): | |
0.0037122774558343763 | ||
""" | ||
|
||
_cpu_estimator_import_path = "sklearn.decomposition.IncrementalPCA" | ||
|
||
@device_interop_preparation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my education, why do we need this decorator? All it seems to do is process the constructor arguments to remove parameters that aren't arguments to the constructor. But why do we need this? What is the use-case where we want to allow someone to call the constructor with invalid keyword arguments and not error? |
||
def __init__( | ||
self, | ||
*, | ||
|
@@ -218,6 +226,7 @@ def __init__( | |
self.batch_size = batch_size | ||
self._sparse_model = True | ||
|
||
@enable_device_interop | ||
def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA": | ||
""" | ||
Fit the model with X, using minibatches of size batch_size. | ||
|
@@ -255,10 +264,10 @@ def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA": | |
check_dtype=[cp.float32, cp.float64], | ||
) | ||
|
||
n_samples, n_features = X.shape | ||
n_samples, self.n_features_in_ = X.shape | ||
|
||
if self.batch_size is None: | ||
self.batch_size_ = 5 * n_features | ||
self.batch_size_ = 5 * self.n_features_in_ | ||
else: | ||
self.batch_size_ = self.batch_size | ||
|
||
|
@@ -305,25 +314,30 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": | |
|
||
self._set_output_type(X) | ||
|
||
X, n_samples, n_features, self.dtype = input_to_cupy_array( | ||
( | ||
X, | ||
n_samples, | ||
self.n_features_in_, | ||
self.dtype, | ||
) = input_to_cupy_array( | ||
X, order="K", check_dtype=[cp.float32, cp.float64] | ||
) | ||
else: | ||
n_samples, n_features = X.shape | ||
n_samples, self.n_features_in_ = X.shape | ||
|
||
if not hasattr(self, "components_"): | ||
self.components_ = None | ||
|
||
if self.n_components is None: | ||
if self.components_ is None: | ||
self.n_components_ = min(n_samples, n_features) | ||
self.n_components_ = min(n_samples, self.n_features_in_) | ||
else: | ||
self.n_components_ = self.components_.shape[0] | ||
elif not 1 <= self.n_components <= n_features: | ||
elif not 1 <= self.n_components <= self.n_features_in_: | ||
raise ValueError( | ||
"n_components=%r invalid for n_features=%d, need " | ||
"more rows than columns for IncrementalPCA " | ||
"processing" % (self.n_components, n_features) | ||
"processing" % (self.n_components, self.n_features_in_) | ||
) | ||
elif not self.n_components <= n_samples: | ||
raise ValueError( | ||
|
@@ -394,7 +408,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": | |
self.explained_variance_ratio_ = explained_variance_ratio[ | ||
: self.n_components_ | ||
] | ||
if self.n_components_ < n_features: | ||
if self.n_components_ < self.n_features_in_: | ||
self.noise_variance_ = explained_variance[ | ||
self.n_components_ : | ||
].mean() | ||
|
@@ -403,6 +417,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA": | |
|
||
return self | ||
|
||
@enable_device_interop | ||
def transform(self, X, convert_dtype=False) -> CumlArray: | ||
""" | ||
Apply dimensionality reduction to X. | ||
|
@@ -678,16 +693,17 @@ def _svd_flip(u, v, u_based_decision=True): | |
u_adjusted, v_adjusted : arrays with the same dimensions as the input. | ||
|
||
""" | ||
xpy = GlobalSettings().xpy | ||
if u_based_decision: | ||
# columns of u, rows of v | ||
max_abs_cols = cp.argmax(cp.abs(u), axis=0) | ||
signs = cp.sign(u[max_abs_cols, list(range(u.shape[1]))]) | ||
max_abs_cols = xpy.argmax(xpy.abs(u), axis=0) | ||
signs = xpy.sign(u[max_abs_cols, list(range(u.shape[1]))]) | ||
u *= signs | ||
v *= signs[:, cp.newaxis] | ||
v *= signs[:, xpy.newaxis] | ||
else: | ||
# rows of v, columns of u | ||
max_abs_rows = cp.argmax(cp.abs(v), axis=1) | ||
signs = cp.sign(v[list(range(v.shape[0])), max_abs_rows]) | ||
max_abs_rows = xpy.argmax(xpy.abs(v), axis=1) | ||
signs = xpy.sign(v[list(range(v.shape[0])), max_abs_rows]) | ||
u *= signs | ||
v *= signs[:, cp.newaxis] | ||
v *= signs[:, xpy.newaxis] | ||
return u, v |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.