From 3bb1b51604e3f21916e9164a0ec6873098fac173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Wed, 29 Nov 2023 16:39:27 -0600 Subject: [PATCH] allocate empty arrays (#9) --- coreforecast/grouped_array.py | 22 +++++++++---------- src/coreforecast.cpp | 40 +++++++++++++++++++++++++++++------ tests/test_lag_transforms.py | 2 +- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/coreforecast/grouped_array.py b/coreforecast/grouped_array.py index f6a1f95..713c176 100644 --- a/coreforecast/grouped_array.py +++ b/coreforecast/grouped_array.py @@ -68,7 +68,7 @@ def _pyfloat_to_c(self, x: float) -> Union[ctypes.c_float, ctypes.c_double]: return out def scaler_fit(self, scaler_type: str) -> np.ndarray: - stats = np.full_like(self.data, np.nan, shape=(len(self), 2)) + stats = np.empty_like(self.data, shape=(len(self), 2)) _LIB[f"{self.prefix}_{scaler_type}ScalerStats"]( self._handle, _data_as_void_ptr(stats), @@ -76,7 +76,7 @@ def scaler_fit(self, scaler_type: str) -> np.ndarray: return stats def scaler_transform(self, stats: np.ndarray) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_ScalerTransform"]( self._handle, _data_as_void_ptr(stats), @@ -103,7 +103,7 @@ def take_from_groups(self, k: int) -> np.ndarray: return out def lag_transform(self, lag: int) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_LagTransform"]( self._handle, ctypes.c_int(lag), @@ -114,7 +114,7 @@ def lag_transform(self, lag: int) -> np.ndarray: def rolling_transform( self, stat_name: str, lag: int, window_size: int, min_samples: int ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_Rolling{stat_name}Transform"]( self._handle, ctypes.c_int(lag), @@ -127,7 +127,7 @@ def rolling_transform( def rolling_quantile_transform( self, lag: int, p: float, window_size: int, min_samples: int ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_RollingQuantileTransform"]( self._handle, ctypes.c_int(lag), @@ -173,7 +173,7 @@ def seasonal_rolling_transform( window_size: int, min_samples: int, ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_SeasonalRolling{stat_name}Transform"]( self._handle, ctypes.c_int(lag), @@ -211,7 +211,7 @@ def seasonal_rolling_quantile_transform( window_size: int, min_samples: int, ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_SeasonalRollingQuantileTransform"]( self._handle, ctypes.c_int(lag), @@ -249,7 +249,7 @@ def expanding_transform_with_aggs( lag: int, aggs: np.ndarray, ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_Expanding{stat_name}Transform"]( self._handle, ctypes.c_int(lag), @@ -263,7 +263,7 @@ def expanding_transform( stat_name: str, lag: int, ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_Expanding{stat_name}Transform"]( self._handle, ctypes.c_int(lag), @@ -272,7 +272,7 @@ def expanding_transform( return out def expanding_quantile_transform(self, lag: int, p: float) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_ExpandingQuantileTransform"]( self._handle, ctypes.c_int(lag), @@ -297,7 +297,7 @@ def exponentially_weighted_transform( lag: int, alpha: float, ) -> np.ndarray: - out = np.full_like(self.data, np.nan) + out = np.empty_like(self.data) _LIB[f"{self.prefix}_ExponentiallyWeighted{stat_name}Transform"]( self._handle, ctypes.c_int(lag), diff --git a/src/coreforecast.cpp b/src/coreforecast.cpp index 8556834..c255274 100644 --- a/src/coreforecast.cpp +++ b/src/coreforecast.cpp @@ -22,6 +22,16 @@ template inline indptr_t FirstNotNaN(const T *data, indptr_t n) { return i; } +template +inline indptr_t FirstNotNaN(const T *data, indptr_t n, T *out) { + indptr_t i = 0; + while (std::isnan(data[i]) && i < n) { + out[i] = std::numeric_limits::quiet_NaN(); + ++i; + } + return i; +} + template inline void TakeFromGroups(const T *data, int n, T *out, int k) { if (k > n) { @@ -118,8 +128,11 @@ inline void RollingMeanTransform(const T *data, int n, T *out, int window_size, int upper_limit = std::min(window_size, n); for (int i = 0; i < upper_limit; ++i) { accum += data[i]; - if (i + 1 >= min_samples) + if (i + 1 < min_samples) { + out[i] = std::numeric_limits::quiet_NaN(); + } else { out[i] = accum / (i + 1); + } } for (int i = window_size; i < n; ++i) { @@ -140,8 +153,11 @@ inline void RollingStdTransformWithStats(const T *data, int n, T *out, T *agg, prev_avg = curr_avg; curr_avg = prev_avg + (data[i] - prev_avg) / (i + 1); m2 += (data[i] - prev_avg) * (data[i] - curr_avg); - if (i + 1 >= min_samples) + if (i + 1 < min_samples) { + out[i] = std::numeric_limits::quiet_NaN(); + } else { out[i] = sqrt(m2 / i); + } } for (int i = window_size; i < n; ++i) { T delta = data[i] - data[i - window_size]; @@ -176,7 +192,9 @@ inline void RollingCompTransform(Func Comp, const T *data, int n, T *out, if (Comp(data[i], pivot)) { pivot = data[i]; } - if (i + 1 >= min_samples) { + if (i + 1 < min_samples) { + out[i] = std::numeric_limits::quiet_NaN(); + } else { out[i] = pivot; } } @@ -490,6 +508,13 @@ inline void ExponentiallyWeightedMeanTransform(const T *data, int n, T *out, } } +template inline void SkipLags(T *out, int n, int lag) { + int replacements = std::min(lag, n); + for (int i = 0; i < replacements; ++i) { + out[i] = std::numeric_limits::quiet_NaN(); + } +} + template class GroupedArray { private: const T *data_; @@ -515,7 +540,8 @@ template class GroupedArray { indptr_t start_idx = FirstNotNaN(data_ + start, n); if (start_idx + lag >= n) continue; - f(data_ + start + start_idx, n - start_idx - lag, out + n_out * i, + start += start_idx; + f(data_ + start, n - start_idx - lag, out + n_out * i, std::forward(args)...); } } @@ -544,7 +570,8 @@ template class GroupedArray { indptr_t start = indptr_[i]; indptr_t end = indptr_[i + 1]; indptr_t n = end - start; - indptr_t start_idx = FirstNotNaN(data_ + start, n); + indptr_t start_idx = FirstNotNaN(data_ + start, n, out + start); + SkipLags(out + start + start_idx, n - start_idx, lag); if (start_idx + lag >= n) { continue; } @@ -562,7 +589,8 @@ template class GroupedArray { indptr_t start = indptr_[i]; indptr_t end = indptr_[i + 1]; indptr_t n = end - start; - indptr_t start_idx = FirstNotNaN(data_ + start, n); + indptr_t start_idx = FirstNotNaN(data_ + start, n, out + start); + SkipLags(out + start + start_idx, n - start_idx, lag); if (start_idx + lag >= n) { continue; } diff --git a/tests/test_lag_transforms.py b/tests/test_lag_transforms.py index dbfb8b5..03585f1 100644 --- a/tests/test_lag_transforms.py +++ b/tests/test_lag_transforms.py @@ -102,7 +102,7 @@ def test_correctness(data, comb, dtype): if "rolling_std" in comb: rtol = 1e-2 elif "expanding_std" in comb: - rtol = 5e-5 + rtol = 1e-4 data = data.astype(dtype, copy=True) ga = GroupedArray(data, indptr) wf, cf, args = combs_map[comb]