Skip to content

Commit

Permalink
fix mean for datetime-like using the respective time resolution unit (#…
Browse files Browse the repository at this point in the history
…9977)

* fix mean for datetime-like by using the respective dtype time resolution unit, adapting tests

* fix mypy

* add PR to existing entry for non-nanosecond datetimes

* Update xarray/core/duck_array_ops.py

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>

* cast to "int64" in calculation of datime-like mean

* Apply suggestions from code review

* Apply suggestions from code review

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>

---------

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 29, 2025
1 parent e432479 commit e28f171
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 38 deletions.
2 changes: 1 addition & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ eventually be deprecated.

New Features
~~~~~~~~~~~~
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`).
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`, :pull:`9977`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Spencer Clark <https://github.com/spencerkclark>`_.
- Enable the ``compute=False`` option in :py:meth:`DataTree.to_zarr`. (:pull:`9958`).
By `Sam Levang <https://github.com/slevang>`_.
Expand Down
23 changes: 10 additions & 13 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,16 +662,10 @@ def _to_pytimedelta(array, unit="us"):


def np_timedelta64_to_float(array, datetime_unit):
"""Convert numpy.timedelta64 to float.
Notes
-----
The array is first converted to microseconds, which is less likely to
cause overflow errors.
"""
array = array.astype("timedelta64[ns]").astype(np.float64)
conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit)
return conversion_factor * array
"""Convert numpy.timedelta64 to float, possibly at a loss of resolution."""
unit, _ = np.datetime_data(array.dtype)
conversion_factor = np.timedelta64(1, unit) / np.timedelta64(1, datetime_unit)
return conversion_factor * array.astype(np.float64)


def pd_timedelta_to_float(value, datetime_unit):
Expand Down Expand Up @@ -715,12 +709,15 @@ def mean(array, axis=None, skipna=None, **kwargs):
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)

# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
# From version 2025.01.2 xarray uses np.datetime64[unit], where unit
# is one of "s", "ms", "us", "ns".
# To not have to worry about the resolution, we just convert the output
# to "timedelta64" (without unit) and let the dtype of offset take precedence.
# This is fully backwards compatible with datetime64[ns].
return (
_mean(
datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs
).astype(dtype)
).astype("timedelta64")
+ offset
)
elif _contains_cftime_datetimes(array):
Expand Down
64 changes: 40 additions & 24 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from numpy import array, nan

from xarray import DataArray, Dataset, cftime_range, concat
from xarray.coding.times import _NS_PER_TIME_DELTA
from xarray.core import dtypes, duck_array_ops
from xarray.core.duck_array_ops import (
array_notnull_equiv,
Expand All @@ -28,6 +29,7 @@
where,
)
from xarray.core.extension_array import PandasExtensionArray
from xarray.core.types import NPDatetimeUnitOptions, PDDatetimeUnitOptions
from xarray.namedarray.pycompat import array_type
from xarray.testing import assert_allclose, assert_equal, assert_identical
from xarray.tests import (
Expand Down Expand Up @@ -411,10 +413,11 @@ def assert_dask_array(da, dask):
@arm_xfail
@pytest.mark.filterwarnings("ignore:All-NaN .* encountered:RuntimeWarning")
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
def test_datetime_mean(dask: bool) -> None:
def test_datetime_mean(dask: bool, time_unit: PDDatetimeUnitOptions) -> None:
# Note: only testing numpy, as dask is broken upstream
dtype = f"M8[{time_unit}]"
da = DataArray(
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8[ns]"),
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype=dtype),
dims=["time"],
)
if dask:
Expand Down Expand Up @@ -846,11 +849,11 @@ def test_multiple_dims(dtype, dask, skipna, func):


@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_datetime64(dask):
def test_datetime_to_numeric_datetime64(dask, time_unit: PDDatetimeUnitOptions):
if dask and not has_dask:
pytest.skip("requires dask")

times = pd.date_range("2000", periods=5, freq="7D").values
times = pd.date_range("2000", periods=5, freq="7D").as_unit(time_unit).values
if dask:
import dask.array

Expand All @@ -874,8 +877,8 @@ def test_datetime_to_numeric_datetime64(dask):
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)
expected2 = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected2)


@requires_cftime
Expand Down Expand Up @@ -923,15 +926,18 @@ def test_datetime_to_numeric_cftime(dask):


@requires_cftime
def test_datetime_to_numeric_potential_overflow():
def test_datetime_to_numeric_potential_overflow(time_unit: PDDatetimeUnitOptions):
import cftime

times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]")
if time_unit == "ns":
pytest.skip("out-of-bounds datetime64 overflow")
dtype = f"M8[{time_unit}]"
times = pd.date_range("2000", periods=5, freq="7D").values.astype(dtype)
cftimes = cftime_range(
"2000", periods=5, freq="7D", calendar="proleptic_gregorian"
).values

offset = np.datetime64("0001-01-01")
offset = np.datetime64("0001-01-01", time_unit)
cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1)

result = duck_array_ops.datetime_to_numeric(
Expand All @@ -957,35 +963,45 @@ def test_py_timedelta_to_float():
assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6


@pytest.mark.parametrize(
"td, expected",
([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]),
)
def test_np_timedelta64_to_float(td, expected):
out = np_timedelta64_to_float(td, datetime_unit="ns")
@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"])
def test_np_timedelta64_to_float(
np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions
):
# tests any combination of source np.timedelta64 (NPDatetimeUnitOptions) with
# np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions)
td = np.timedelta64(1, np_dt_unit)
expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit]

out = np_timedelta64_to_float(td, datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)

out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns")
out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)


@pytest.mark.parametrize(
"td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0])
)
def test_pd_timedelta_to_float(td, expected):
out = pd_timedelta_to_float(td, datetime_unit="ns")
@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"])
def test_pd_timedelta_to_float(
np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions
):
# tests any combination of source pd.Timedelta (NPDatetimeUnitOptions) with
# np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions)
td = pd.Timedelta(1, np_dt_unit)
expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit]

out = pd_timedelta_to_float(td, datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)


@pytest.mark.parametrize(
"td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"]
)
def test_timedelta_to_numeric(td):
def test_timedelta_to_numeric(td, time_unit: PDDatetimeUnitOptions):
# Scalar input
out = timedelta_to_numeric(td, "ns")
np.testing.assert_allclose(out, 86400 * 1e9)
out = timedelta_to_numeric(td, time_unit)
expected = _NS_PER_TIME_DELTA["D"] / _NS_PER_TIME_DELTA[time_unit]
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)


Expand Down

0 comments on commit e28f171

Please sign in to comment.