diff --git a/src/delayedarray/DelayedArray.py b/src/delayedarray/DelayedArray.py index d52a583..9a90d60 100644 --- a/src/delayedarray/DelayedArray.py +++ b/src/delayedarray/DelayedArray.py @@ -749,7 +749,7 @@ def __and__(self, other) -> "DelayedArray": Returns: A ``DelayedArray`` containing the delayed AND operation. """ - return _wrap_isometric_with_args(self, other, operation="logical_or", right=True) + return _wrap_isometric_with_args(self, other, operation="logical_and", right=True) def __rand__(self, other) -> "DelayedArray": """Element-wise AND with the right-hand-side of a ``DelayedArray``. @@ -907,7 +907,7 @@ def var(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona masked=is_masked(self), ) - def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None, buffer_size: int = 1e8) -> numpy.ndarray: + def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = numpy.bool_, buffer_size: int = 1e8) -> numpy.ndarray: """Test whether any array element along a given axis evaluates to True. Compute this test across the ``DelayedArray``, possibly over a @@ -916,14 +916,14 @@ def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona Args: axis: - A single integer specifying the axis over which to calculate - the mean. Alternatively, a tuple (multiple axes) or None (no - axes), see :py:func:`~numpy.mean` for details. + A single integer specifying the axis over which to test + for any. Alternatively, a tuple (multiple axes) or None (no + axes), see :py:func:`~numpy.any` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``DelayedArray``, see - :py:func:`~numpy.mean` for details. + :py:func:`~numpy.any` for details. buffer_size: Buffer size in bytes to use for block processing. Larger values @@ -934,7 +934,7 @@ def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona be a NumPy scalar instead. """ if hasattr(self._seed, "any"): - return self._seed.any(axis=axis, dtype=dtype) + return self._seed.any(axis=axis) else: return array_any( self, @@ -944,23 +944,23 @@ def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona masked=is_masked(self), ) - def all(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = None, buffer_size: int = 1e8) -> numpy.ndarray: + def all(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = numpy.bool_, buffer_size: int = 1e8) -> numpy.ndarray: """Test whether all array elements along a given axis evaluate to True. Compute this test across the ``DelayedArray``, possibly over a - given axis or set of axes. If the seed has a ``any()`` method, that + given axis or set of axes. If the seed has a ``all()`` method, that method is called directly with the supplied arguments. Args: axis: - A single integer specifying the axis over which to calculate - the mean. Alternatively, a tuple (multiple axes) or None (no - axes), see :py:func:`~numpy.mean` for details. + A single integer specifying the axis over which to test + for all. Alternatively, a tuple (multiple axes) or None (no + axes), see :py:func:`~numpy.all` for details. dtype: NumPy type for the output array. If None, this is automatically chosen based on the type of the ``DelayedArray``, see - :py:func:`~numpy.mean` for details. + :py:func:`~numpy.all` for details. buffer_size: Buffer size in bytes to use for block processing. Larger values @@ -971,7 +971,7 @@ def all(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona be a NumPy scalar instead. """ if hasattr(self._seed, "all"): - return self._seed.all(axis=axis, dtype=dtype) + return self._seed.all(axis=axis) else: return array_all( self, diff --git a/src/delayedarray/SparseNdarray.py b/src/delayedarray/SparseNdarray.py index 7ca2a59..eca08eb 100644 --- a/src/delayedarray/SparseNdarray.py +++ b/src/delayedarray/SparseNdarray.py @@ -14,7 +14,7 @@ _concatenate_unmasked_ndarrays, _concatenate_maybe_masked_ndarrays ) -from ._statistics import array_mean, array_var, array_sum, _create_offset_multipliers +from ._statistics import array_mean, array_var, array_sum, _create_offset_multipliers, array_all, array_any __author__ = "ltla" __copyright__ = "ltla" @@ -665,6 +665,71 @@ def __abs__(self): A ``SparseNdarray`` containing the delayed absolute value operation. """ return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, abs(v)), self._dtype) + + def __or__(self, other) -> Union["SparseNdarray", numpy.ndarray]: + """Element-wise OR with something. + + Args: + other: + A numeric scalar; + or a NumPy array with dimensions as described in + :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; + or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. + + Returns: + Array containing the result of the check. + This may or may not be sparse depending on ``other``. + """ + return _operate_with_args_on_SparseNdarray(self, other, operation="logical_or", right=True) + + def __ror__(self, other) -> Union["SparseNdarray", numpy.ndarray]: + """Element-wise OR with the right-hand-side of a ``DelayedArray``. + + Args: + other: + A numeric scalar; + or a NumPy array with dimensions as described in + :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; + or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. + + Returns: + Array containing the result of the check. + This may or may not be sparse depending on ``other``. + """ + return _operate_with_args_on_SparseNdarray(self, other, operation="logical_or", right=False) + + def __and__(self, other) -> Union["SparseNdarray", numpy.ndarray]: + """Element-wise AND with something. + + Args: + other: + A numeric scalar; + or a NumPy array with dimensions as described in + :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; + or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. + + Returns: + Array containing the result of the check. + This may or may not be sparse depending on ``other``. + """ + return _operate_with_args_on_SparseNdarray(self, other, operation="logical_and", right=True) + + def __rand__(self, other) -> Union["SparseNdarray", numpy.ndarray]: + """Element-wise AND with the right-hand-side of a ``DelayedArray``. + + Args: + other: + A numeric scalar; + or a NumPy array with dimensions as described in + :py:class:`~delayedarray.UnaryIsometricOpWithArgs.UnaryIsometricOpWithArgs`; + or a ``DelayedArray`` of the same dimensions as :py:attr:`~shape`. + + Returns: + Array containing the result of the check. + This may or may not be sparse depending on ``other``. + """ + return _operate_with_args_on_SparseNdarray(self, other, operation="logical_and", right=False) + # Subsetting. def __getitem__(self, subset: Tuple[Union[slice, Sequence], ...]) -> Union["SparseNdarray", numpy.ndarray]: @@ -760,6 +825,21 @@ def __array_function__(self, func, types, args, kwargs) -> "SparseNdarray": if func == numpy.round: return _transform_sparse_array_from_SparseNdarray(self, lambda l, i, v : (i, func(v, **kwargs)), self._dtype) + if func == numpy.mean: + return self.mean(**kwargs) + + if func == numpy.sum: + return self.sum(**kwargs) + + if func == numpy.var: + return self.var(**kwargs) + + if func == numpy.any: + return self.any(**kwargs) + + if func == numpy.all: + return self.all(**kwargs) + raise NotImplementedError(f"'{func.__name__}' is not implemented!") @@ -872,6 +952,64 @@ def var(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optiona masked=self._is_masked, ) + def any(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = numpy.bool_) -> numpy.ndarray: + """Test whether any array element along a given axis evaluates to True. + + Compute this test across the ``SparseNdarray``, possibly over a + given axis or set of axes. If the seed has a ``any()`` method, that + method is called directly with the supplied arguments. + + Args: + axis: + A single integer specifying the axis over which to test + for any. Alternatively, a tuple (multiple axes) or None + (no axes), see :py:func:`~numpy.any` for details. + + dtype: + NumPy type for the output array. If None, this is automatically + chosen based on the type of the ``SparseNdarray``, see + :py:func:`~numpy.any` for details. + + Returns: + A NumPy array containing the variances. If ``axis = None``, + this will be a NumPy scalar instead. + """ + return array_any( + self, + axis=axis, + dtype=dtype, + reduce_over_x=_reduce_SparseNdarray, + masked=self._is_masked, + ) + + def all(self, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[numpy.dtype] = numpy.bool_) -> numpy.ndarray: + """Test whether all array elements along a given axis evaluate to True. + + Compute this test across the ``SparseNdarray``, possibly over a + given axis or set of axes. If the seed has a ``all()`` method, that + method is called directly with the supplied arguments. + Args: + axis: + A single integer specifying the axis over which to test + for any. Alternatively, a tuple (multiple axes) or None + (no axes), see :py:func:`~numpy.any` for details. + + dtype: + NumPy type for the output array. If None, this is automatically + chosen based on the type of the ``SparseNdarray``, see + :py:func:`~numpy.any` for details. + + Returns: + A NumPy array containing the variances. If ``axis = None``, + this will be a NumPy scalar instead. + """ + return array_all( + self, + axis=axis, + dtype=dtype, + reduce_over_x=_reduce_SparseNdarray, + masked=self._is_masked, + ) # Other stuff def __copy__(self) -> "SparseNdarray": diff --git a/src/delayedarray/_statistics.py b/src/delayedarray/_statistics.py index b0ce40a..fa3ffda 100644 --- a/src/delayedarray/_statistics.py +++ b/src/delayedarray/_statistics.py @@ -52,14 +52,17 @@ def _choose_output_type(dtype: numpy.dtype, preserve_integer: bool) -> numpy.dty return dtype -def _allocate_output_array(shape: Tuple[int, ...], axes: List[int], dtype: numpy.dtype) -> numpy.ndarray: +def _allocate_output_array(shape: Tuple[int, ...], axes: List[int], dtype: numpy.dtype, default_func: Callable = numpy.zeros) -> numpy.ndarray: + if default_func is None: + default_func = numpy.zeros + if len(axes) == 0: # Returning a length-1 array to allow for continued use of offsets. - return numpy.zeros(1, dtype=dtype) + return default_func(1, dtype=dtype) else: # Use Fortran order so that the offsets make sense. shape = [shape[i] for i in axes] - return numpy.zeros((*shape,), dtype=dtype, order="F") + return default_func((*shape,), dtype=dtype, order="F") def _create_offset_multipliers(shape: Tuple[int, ...], axes: List[int]) -> List[int]: @@ -195,15 +198,18 @@ def array_any(x, axis: Optional[Union[int, Tuple[int, ...]]], dtype: Optional[nu mask_buffer = masked.ravel(order="F") def op(offset, value): if value is not numpy.ma.masked: - buffer[offset] = numpy.any([buffer[offset], value]) + if value and not buffer[offset]: + buffer[offset] = True else: mask_buffer[offset] += 1 reduce_over_x(x, axes, op) size = _expected_sample_size(x.shape, axes) - output = numpy.ma.MaskedArray(output, mask=(masked == size)) + denom = size - masked + output = numpy.ma.MaskedArray(output, mask=(denom == 0)) else: def op(offset, value): - buffer[offset] = numpy.any([buffer[offset], value]) + if value and not buffer[offset]: + buffer[offset] = True reduce_over_x(x, axes, op) if len(axes) == 0: @@ -216,24 +222,26 @@ def array_all(x, axis: Optional[Union[int, Tuple[int, ...]]], dtype: Optional[nu axes = _find_useful_axes(len(x.shape), axis) if dtype is None: dtype = _choose_output_type(x.dtype, preserve_integer = True) - output = _allocate_output_array(x.shape, axes, dtype) + output = _allocate_output_array(x.shape, axes, dtype, default_func=numpy.ones) buffer = output.ravel(order="F") - buffer += 1 # since all has to be true, we start with a value other than 0's if masked: masked = numpy.zeros(output.shape, dtype=numpy.uint, order="F") mask_buffer = masked.ravel(order="F") def op(offset, value): if value is not numpy.ma.masked: - buffer[offset] = numpy.all([buffer[offset], value]) + if not value and buffer[offset]: + buffer[offset] = False else: mask_buffer[offset] += 1 reduce_over_x(x, axes, op) size = _expected_sample_size(x.shape, axes) - output = numpy.ma.MaskedArray(output, mask=(masked == size)) + denom = size - masked + output = numpy.ma.MaskedArray(output, mask=(denom == 0)) else: def op(offset, value): - buffer[offset] = numpy.all([buffer[offset], value]) + if not value and buffer[offset]: + buffer[offset] = False reduce_over_x(x, axes, op) if len(axes) == 0: diff --git a/tests/test_DelayedArray.py b/tests/test_DelayedArray.py index 6d9c066..add39ea 100644 --- a/tests/test_DelayedArray.py +++ b/tests/test_DelayedArray.py @@ -247,3 +247,118 @@ def test_SparseNdarray_var_sparse(mask_rate, buffer_size): with pytest.warns(RuntimeWarning): y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50 assert numpy.isnan(y.var()) + +@pytest.mark.parametrize("mask_rate", [0, 0.5]) +@pytest.mark.parametrize("buffer_size", [100, 500, 2000]) +def test_SparseNdarray_any_dense(mask_rate, buffer_size): + raw = simulate_ndarray((30, 40, 15), mask_rate = mask_rate) + assert_identical_ndarrays(raw.any(), delayedarray.wrap(raw).any()) + assert_identical_ndarrays(raw.any(axis=0), delayedarray.wrap(raw).any(axis=0)) + + y = delayedarray.wrap(raw) - 12 + ref = raw - 12 + assert numpy.isclose(ref.any(), y.any(buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=1), y.any(axis=1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=-1), y.any(axis=-1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=(0, 2)), y.any(axis=(0, 2), buffer_size=buffer_size)) + + # Trying with a single dimension. + test_shape = (100,) + raw = simulate_ndarray((100,), mask_rate=mask_rate) + y = delayedarray.wrap(raw) + 29 + ref = raw + 29 + assert numpy.isclose(ref.any(), y.any(buffer_size=buffer_size)) + + # Full masking is respected. + y = delayedarray.wrap(numpy.ma.MaskedArray([1], mask=True)) + 20 + assert y.any() is numpy.ma.masked + + # Zero-length array is respected. + y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50 + assert y.any() == False + + +@pytest.mark.parametrize("mask_rate", [0, 0.5]) +@pytest.mark.parametrize("buffer_size", [100, 500, 2000]) +def test_SparseNdarray_any_sparse(mask_rate, buffer_size): + raw = simulate_SparseNdarray((20, 30, 25), mask_rate = mask_rate) + ref = raw * 19 + y = delayedarray.wrap(raw) * 19 + + assert numpy.isclose(ref.any(), y.any(buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=1), y.any(axis=1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=-1), y.any(axis=-1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.any(axis=(0, 2)), y.any(axis=(0, 2), buffer_size=buffer_size)) + + # Trying with a single dimension. + test_shape = (100,) + raw = simulate_SparseNdarray((100,), mask_rate=mask_rate) + y = delayedarray.wrap(raw) * 12 + ref = raw * 12 + assert numpy.isclose(ref.any(), y.any(buffer_size=buffer_size)) + + # Full masking is respected. + ref = delayedarray.SparseNdarray((1,), (numpy.zeros(1, dtype=numpy.int_), numpy.ma.MaskedArray([1], mask=True))) + y = delayedarray.wrap(ref) / 5 + assert y.any() is numpy.ma.masked + + # Zero-length array is respected. + y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50 + assert y.any() == False + +@pytest.mark.parametrize("mask_rate", [0, 0.5]) +@pytest.mark.parametrize("buffer_size", [100, 500, 2000]) +def test_SparseNdarray_all_dense(mask_rate, buffer_size): + raw = simulate_ndarray((30, 40, 15), mask_rate = mask_rate) + assert_identical_ndarrays(raw.all(), delayedarray.wrap(raw).all()) + assert_identical_ndarrays(raw.all(axis=0), delayedarray.wrap(raw).all(axis=0)) + + y = delayedarray.wrap(raw) - 12 + ref = raw - 12 + assert numpy.isclose(ref.all(), y.all(buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=1), y.all(axis=1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=-1), y.all(axis=-1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=(0, 2)), y.all(axis=(0, 2), buffer_size=buffer_size)) + + # Trying with a single dimension. + test_shape = (100,) + raw = simulate_ndarray((100,), mask_rate=mask_rate) + y = delayedarray.wrap(raw) + 29 + ref = raw + 29 + assert numpy.isclose(ref.all(), y.all(buffer_size=buffer_size)) + + # Full masking is respected. + y = delayedarray.wrap(numpy.ma.MaskedArray([1], mask=True)) + 20 + assert y.all() is numpy.ma.masked + + # Zero-length array is respected. + y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50 + assert y.all() + +@pytest.mark.parametrize("mask_rate", [0, 0.5]) +@pytest.mark.parametrize("buffer_size", [100, 500, 2000]) +def test_SparseNdarray_all_sparse(mask_rate, buffer_size): + raw = simulate_SparseNdarray((20, 30, 25), mask_rate = mask_rate) + ref = raw * 19 + y = delayedarray.wrap(raw) * 19 + + assert numpy.isclose(ref.all(), y.all(buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=1), y.all(axis=1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=-1), y.all(axis=-1, buffer_size=buffer_size)) + assert_close_ndarrays(ref.all(axis=(0, 2)), y.all(axis=(0, 2), buffer_size=buffer_size)) + + # Trying with a single dimension. + test_shape = (100,) + raw = simulate_SparseNdarray((100,), mask_rate=mask_rate) + y = delayedarray.wrap(raw) * 12 + ref = raw * 12 + assert numpy.isclose(ref.any(), y.all(buffer_size=buffer_size)) + + # Full masking is respected. + ref = delayedarray.SparseNdarray((1,), (numpy.zeros(1, dtype=numpy.int_), numpy.ma.MaskedArray([1], mask=True))) + y = delayedarray.wrap(ref) / 5 + assert y.all() is numpy.ma.masked + + # Zero-length array is respected. + y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50 + assert y.all() \ No newline at end of file