rapidsai · wphicks · Jan 23, 2025 · Jan 23, 2025 · Jan 28, 2025 · betatim
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 import cuml.internals
 
 from collections.abc import Iterable
+from cuml.internals.global_settings import GlobalSettings
 from cuml.internals.safe_imports import gpu_only_import
 from cuml.datasets.utils import _create_rs_generator
 from cuml.internals.safe_imports import (
@@ -44,8 +45,7 @@ def _get_centers(rs, centers, center_box, n_samples, n_features, dtype):
                 center_box[0],
                 center_box[1],
                 size=(n_centers, n_features),
-                dtype=dtype,
-            )
+            ).astype(dtype)
 
         else:
             if n_features != centers.shape[1]:
@@ -63,8 +63,7 @@ def _get_centers(rs, centers, center_box, n_samples, n_features, dtype):
                 center_box[0],
                 center_box[1],
                 size=(n_centers, n_features),
-                dtype=dtype,
-            )
+            ).astype(dtype)
         try:
             assert len(centers) == n_centers
         except TypeError:
@@ -173,7 +172,8 @@ def make_blobs(
     # Set the default output type to "cupy". This will be ignored if the user
     # has set `cuml.global_settings.output_type`. Only necessary for array
     # generation methods that do not take an array as input
-    cuml.internals.set_api_output_type("cupy")
+    cuml.internals.set_api_output_type("array")
+    xpy = GlobalSettings().xpy
 
     generator = _create_rs_generator(random_state=random_state)
 
@@ -191,7 +191,7 @@ def make_blobs(
         )
 
     if isinstance(cluster_std, numbers.Real):
-        cluster_std = cp.full(len(centers), cluster_std)
+        cluster_std = xpy.full(len(centers), cluster_std)
 
     if isinstance(n_samples, Iterable):
         n_samples_per_center = n_samples
@@ -201,9 +201,9 @@ def make_blobs(
         for i in range(n_samples % n_centers):
             n_samples_per_center[i] += 1
 
-    X = cp.zeros(n_samples * n_features, dtype=dtype)
+    X = xpy.zeros(n_samples * n_features, dtype=dtype)
     X = X.reshape((n_samples, n_features), order=order)
-    y = cp.zeros(n_samples, dtype=dtype)
+    y = xpy.zeros(n_samples, dtype=dtype)
 
     if shuffle:
         proba_samples_per_center = np.array(n_samples_per_center) / np.sum(
@@ -213,21 +213,20 @@ def make_blobs(
             n_centers, n_samples, replace=True, p=proba_samples_per_center
         )
         for i, (n, std) in enumerate(zip(n_samples_per_center, cluster_std)):
-            center_indices = cp.where(shuffled_sample_indices == i)
+            center_indices = xpy.where(shuffled_sample_indices == i)
 
             y[center_indices[0]] = i
 
             X_k = generator.normal(
                 scale=std,
                 size=(len(center_indices[0]), n_features),
-                dtype=dtype,
-            )
+            ).astype(dtype)
 
             # NOTE: Adding the loc explicitly as cupy has a bug
             # when calling generator.normal with an array for loc.
             # cupy.random.normal, however, works with the same
             # arguments
-            cp.add(X_k, centers[i], out=X_k)
+            xpy.add(X_k, centers[i], out=X_k)
             X[center_indices[0], :] = X_k
     else:
         stop = 0
@@ -236,11 +235,11 @@ def make_blobs(
 
             y[start:stop] = i
 
-            X_k = generator.normal(
-                scale=std, size=(n, n_features), dtype=dtype
+            X_k = generator.normal(scale=std, size=(n, n_features)).astype(
+                dtype
             )
 
-            cp.add(X_k, centers[i], out=X_k)
+            xpy.add(X_k, centers[i], out=X_k)
             X[start:stop, :] = X_k
 
     if return_centers:

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,23 +13,25 @@
 # limitations under the License.
 #
 
+from cuml.internals.global_settings import GlobalSettings
 from cuml.internals.safe_imports import gpu_only_import
 
 cp = gpu_only_import("cupy")
 
 
 def _create_rs_generator(random_state):
     """
-    This is a utility function that returns an instance of CuPy RandomState
+    This is a utility function that returns an instance of CuPy/numpy
+    RandomState depending on the current globally-selected device type
     Parameters
-    RandomState depending on the current globally-selected device type
+    RandomState depending on the current globally-selected device type
+
-    RandomState depending on the current globally-selected device type
+    RandomState depending on the current globally-selected device type
+
     ----------
-    random_state : None, int, or CuPy RandomState
-        The random_state from which the CuPy random state is generated
+    random_state : None, int, or RandomState
+        The random_state from which the random state is generated
     """
 
     if isinstance(random_state, (type(None), int)):
-        return cp.random.RandomState(seed=random_state)
-    elif isinstance(random_state, cp.random.RandomState):
+        return GlobalSettings().xpy.random.RandomState(seed=random_state)
+    elif isinstance(random_state, GlobalSettings().xpy.random.RandomState):
         return random_state
     else:
-        raise ValueError("random_state type must be int or CuPy RandomState")
+        raise ValueError("random_state type must be int or RandomState")
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,6 +20,11 @@
 from cuml.internals.input_utils import input_to_cupy_array
 from cuml.common import input_to_cuml_array
 from cuml import Base
+from cuml.internals.api_decorators import (
+    device_interop_preparation,
+    enable_device_interop,
+)
+from cuml.internals.global_settings import GlobalSettings
 from cuml.internals.safe_imports import cpu_only_import
 import numbers
 
@@ -195,6 +200,9 @@ class IncrementalPCA(PCA):
         0.0037122774558343763
     """
 
+    _cpu_estimator_import_path = "sklearn.decomposition.IncrementalPCA"
+
+    @device_interop_preparation
     def __init__(
         self,
         *,
@@ -218,6 +226,7 @@ def __init__(
         self.batch_size = batch_size
         self._sparse_model = True
 
+    @enable_device_interop
     def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA":
         """
         Fit the model with X, using minibatches of size batch_size.
@@ -255,10 +264,10 @@ def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA":
                 check_dtype=[cp.float32, cp.float64],
             )
 
-        n_samples, n_features = X.shape
+        n_samples, self.n_features_in_ = X.shape
 
         if self.batch_size is None:
-            self.batch_size_ = 5 * n_features
+            self.batch_size_ = 5 * self.n_features_in_
         else:
             self.batch_size_ = self.batch_size
 
@@ -305,25 +314,30 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA":
 
             self._set_output_type(X)
 
-            X, n_samples, n_features, self.dtype = input_to_cupy_array(
+            (
+                X,
+                n_samples,
+                self.n_features_in_,
+                self.dtype,
+            ) = input_to_cupy_array(
                 X, order="K", check_dtype=[cp.float32, cp.float64]
             )
         else:
-            n_samples, n_features = X.shape
+            n_samples, self.n_features_in_ = X.shape
 
         if not hasattr(self, "components_"):
             self.components_ = None
 
         if self.n_components is None:
             if self.components_ is None:
-                self.n_components_ = min(n_samples, n_features)
+                self.n_components_ = min(n_samples, self.n_features_in_)
             else:
                 self.n_components_ = self.components_.shape[0]
-        elif not 1 <= self.n_components <= n_features:
+        elif not 1 <= self.n_components <= self.n_features_in_:
             raise ValueError(
                 "n_components=%r invalid for n_features=%d, need "
                 "more rows than columns for IncrementalPCA "
-                "processing" % (self.n_components, n_features)
+                "processing" % (self.n_components, self.n_features_in_)
             )
         elif not self.n_components <= n_samples:
             raise ValueError(
@@ -394,7 +408,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA":
         self.explained_variance_ratio_ = explained_variance_ratio[
             : self.n_components_
         ]
-        if self.n_components_ < n_features:
+        if self.n_components_ < self.n_features_in_:
             self.noise_variance_ = explained_variance[
                 self.n_components_ :
             ].mean()
@@ -403,6 +417,7 @@ def partial_fit(self, X, y=None, check_input=True) -> "IncrementalPCA":
 
         return self
 
+    @enable_device_interop
     def transform(self, X, convert_dtype=False) -> CumlArray:
         """
         Apply dimensionality reduction to X.
@@ -678,16 +693,17 @@ def _svd_flip(u, v, u_based_decision=True):
     u_adjusted, v_adjusted : arrays with the same dimensions as the input.
 
     """
+    xpy = GlobalSettings().xpy
     if u_based_decision:
         # columns of u, rows of v
-        max_abs_cols = cp.argmax(cp.abs(u), axis=0)
-        signs = cp.sign(u[max_abs_cols, list(range(u.shape[1]))])
+        max_abs_cols = xpy.argmax(xpy.abs(u), axis=0)
+        signs = xpy.sign(u[max_abs_cols, list(range(u.shape[1]))])
         u *= signs
-        v *= signs[:, cp.newaxis]
+        v *= signs[:, xpy.newaxis]
     else:
         # rows of v, columns of u
-        max_abs_rows = cp.argmax(cp.abs(v), axis=1)
-        signs = cp.sign(v[list(range(v.shape[0])), max_abs_rows])
+        max_abs_rows = xpy.argmax(xpy.abs(v), axis=1)
+        signs = xpy.sign(v[list(range(v.shape[0])), max_abs_rows])
         u *= signs
-        v *= signs[:, cp.newaxis]
+        v *= signs[:, xpy.newaxis]
     return u, v
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -134,3 +134,7 @@ def output_type(self, value):
     @property
     def xpy(self):
         return self.memory_type.xpy
+
+    @property
+    def xsparse(self):
+        return self.memory_type.xsparse