Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix mean for datetime-like using the respective time resolution unit #9977

Merged
merged 9 commits into from
Jan 29, 2025
2 changes: 1 addition & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ eventually be deprecated.

New Features
~~~~~~~~~~~~
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`).
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`, :pull:`9977`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Spencer Clark <https://github.com/spencerkclark>`_.
- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`)
By `Jimmy Westling <https://github.com/illviljan>`_.
Expand Down
23 changes: 10 additions & 13 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@
xp = get_array_namespace(data)
if xp == np:
# numpy currently doesn't have a astype:
return data.astype(dtype, **kwargs)

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

invalid value encountered in cast

Check warning on line 234 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

invalid value encountered in cast
return xp.astype(data, dtype, **kwargs)
return data.astype(dtype, **kwargs)

Expand Down Expand Up @@ -662,16 +662,10 @@


def np_timedelta64_to_float(array, datetime_unit):
"""Convert numpy.timedelta64 to float.

Notes
-----
The array is first converted to microseconds, which is less likely to
cause overflow errors.
"""
array = array.astype("timedelta64[ns]").astype(np.float64)
conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit)
return conversion_factor * array
"""Convert numpy.timedelta64 to float, possibly at a loss of resolution."""
unit, _ = np.datetime_data(array.dtype)
conversion_factor = np.timedelta64(1, unit) / np.timedelta64(1, datetime_unit)
return conversion_factor * array.astype(np.float64)


def pd_timedelta_to_float(value, datetime_unit):
Expand Down Expand Up @@ -715,12 +709,15 @@
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)
kmuehlbauer marked this conversation as resolved.
Show resolved Hide resolved

# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
# From version 2025.01.2 xarray uses np.datetime64[unit], where unit
# is one of "s", "ms", "us", "ns".
# To not have to worry about the resolution, we just convert the output
# to "timedelta64" (without unit) and let the dtype of offset take precedence.
# This is fully backwards compatible with datetime64[ns].
return (
_mean(
datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs
).astype(dtype)
).astype("timedelta64")
+ offset
)
elif _contains_cftime_datetimes(array):
Expand Down
64 changes: 40 additions & 24 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from numpy import array, nan

from xarray import DataArray, Dataset, cftime_range, concat
from xarray.coding.times import _NS_PER_TIME_DELTA
from xarray.core import dtypes, duck_array_ops
from xarray.core.duck_array_ops import (
array_notnull_equiv,
Expand All @@ -28,6 +29,7 @@
where,
)
from xarray.core.extension_array import PandasExtensionArray
from xarray.core.types import NPDatetimeUnitOptions, PDDatetimeUnitOptions
from xarray.namedarray.pycompat import array_type
from xarray.testing import assert_allclose, assert_equal, assert_identical
from xarray.tests import (
Expand Down Expand Up @@ -411,10 +413,11 @@ def assert_dask_array(da, dask):
@arm_xfail
@pytest.mark.filterwarnings("ignore:All-NaN .* encountered:RuntimeWarning")
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
def test_datetime_mean(dask: bool) -> None:
def test_datetime_mean(dask: bool, time_unit: PDDatetimeUnitOptions) -> None:
# Note: only testing numpy, as dask is broken upstream
dtype = f"M8[{time_unit}]"
da = DataArray(
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8[ns]"),
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype=dtype),
dims=["time"],
)
if dask:
Expand Down Expand Up @@ -846,11 +849,11 @@ def test_multiple_dims(dtype, dask, skipna, func):


@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_datetime64(dask):
def test_datetime_to_numeric_datetime64(dask, time_unit: PDDatetimeUnitOptions):
if dask and not has_dask:
pytest.skip("requires dask")

times = pd.date_range("2000", periods=5, freq="7D").values
times = pd.date_range("2000", periods=5, freq="7D").as_unit(time_unit).values
if dask:
import dask.array

Expand All @@ -874,8 +877,8 @@ def test_datetime_to_numeric_datetime64(dask):
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)
expected2 = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected2)


@requires_cftime
Expand Down Expand Up @@ -923,15 +926,18 @@ def test_datetime_to_numeric_cftime(dask):


@requires_cftime
def test_datetime_to_numeric_potential_overflow():
def test_datetime_to_numeric_potential_overflow(time_unit: PDDatetimeUnitOptions):
import cftime

times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]")
if time_unit == "ns":
pytest.skip("out-of-bounds datetime64 overflow")
dtype = f"M8[{time_unit}]"
times = pd.date_range("2000", periods=5, freq="7D").values.astype(dtype)
cftimes = cftime_range(
"2000", periods=5, freq="7D", calendar="proleptic_gregorian"
).values

offset = np.datetime64("0001-01-01")
offset = np.datetime64("0001-01-01", time_unit)
cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1)

result = duck_array_ops.datetime_to_numeric(
Expand All @@ -957,35 +963,45 @@ def test_py_timedelta_to_float():
assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6


@pytest.mark.parametrize(
"td, expected",
([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]),
)
def test_np_timedelta64_to_float(td, expected):
out = np_timedelta64_to_float(td, datetime_unit="ns")
@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"])
def test_np_timedelta64_to_float(
np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions
):
# tests any combination of source np.timedelta64 (NPDatetimeUnitOptions) with
# np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions)
td = np.timedelta64(1, np_dt_unit)
expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit]

out = np_timedelta64_to_float(td, datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)

out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns")
out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)


@pytest.mark.parametrize(
"td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0])
)
def test_pd_timedelta_to_float(td, expected):
out = pd_timedelta_to_float(td, datetime_unit="ns")
@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"])
def test_pd_timedelta_to_float(
np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions
):
# tests any combination of source pd.Timedelta (NPDatetimeUnitOptions) with
# np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions)
td = pd.Timedelta(1, np_dt_unit)
expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit]

out = pd_timedelta_to_float(td, datetime_unit=time_unit)
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)


@pytest.mark.parametrize(
"td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"]
)
def test_timedelta_to_numeric(td):
def test_timedelta_to_numeric(td, time_unit: PDDatetimeUnitOptions):
# Scalar input
out = timedelta_to_numeric(td, "ns")
np.testing.assert_allclose(out, 86400 * 1e9)
out = timedelta_to_numeric(td, time_unit)
expected = _NS_PER_TIME_DELTA["D"] / _NS_PER_TIME_DELTA[time_unit]
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)


Expand Down
Loading