Merge branch 'main' into backend-indexing

* main: (79 commits) fix mean for datetime-like using the respective time resolution unit (#9977) Add `time_unit` argument to `CFTimeIndex.to_datetimeindex` (#9965) remove gate and add a test (#9958) Remove repetitive that (replace it with the) (#9994) add shxarray to the xarray ecosystem list (#9995) Add `shards` to `valid_encodings` to enable sharded Zarr writing (#9948) Use flox for grouped first, last (#9986) Bump the actions group with 2 updates (#9989) Fix some typing (#9988) Remove unnecessary a article (#9980) Fix test_doc_example on big-endian systems (#9949) fix weighted polyfit for arrays with more than 2 dimensions (#9974) Use zarr-fixture to prevent thread leakage errors (#9967) remove dask-expr from CI runs, fix related tests (#9971) Update time coding tests to assert exact equality (#9961) cast type to PDDatetimeUnitOptions (#9963) Suggest the correct name when no key matches in the dataset (#9943) fix upstream dev issues (#9953) Relax nanosecond datetime restriction in CF time decoding (#9618) Remove outdated quantile test. (#9945) ...
pydata · Jan 30, 2025 · 1ffe5e9 · 1ffe5e9
2 parents fb24e9c + e28f171
commit 1ffe5e9
Show file tree

Hide file tree

Showing 109 changed files with 5,158 additions and 2,131 deletions.
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -123,7 +123,7 @@ jobs:
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy
@@ -174,7 +174,7 @@ jobs:
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy-min
@@ -230,7 +230,7 @@ jobs:
           python -m pyright xarray/
 
       - name: Upload pyright coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
         with:
           file: pyright_report/cobertura.xml
           flags: pyright
@@ -286,7 +286,7 @@ jobs:
           python -m pyright xarray/
 
       - name: Upload pyright coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
         with:
           file: pyright_report/cobertura.xml
           flags: pyright39

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -159,7 +159,9 @@ jobs:
           path: pytest.xml
 
       - name: Upload code coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
         with:
           file: ./coverage.xml
           flags: unittests

diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
@@ -88,7 +88,7 @@ jobs:
           path: dist
       - name: Publish package to TestPyPI
         if: github.event_name == 'push'
-        uses: pypa/gh-action-pypi-publish@v1.12.2
+        uses: pypa/gh-action-pypi-publish@v1.12.4
         with:
           repository_url: https://test.pypi.org/legacy/
           verbose: true
@@ -110,6 +110,6 @@ jobs:
           name: releases
           path: dist
       - name: Publish package to PyPI
-        uses: pypa/gh-action-pypi-publish@v1.12.2
+        uses: pypa/gh-action-pypi-publish@v1.12.4
         with:
           verbose: true
diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
@@ -140,7 +140,7 @@ jobs:
         run: |
           python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action@v5.0.2
+        uses: codecov/codecov-action@v5.3.1
         with:
           file: mypy_report/cobertura.xml
           flags: mypy

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
       - id: text-unicode-replacement-char
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.2
+    rev: v0.8.6
     hooks:
       - id: ruff-format
       - id: ruff
@@ -37,12 +37,12 @@ repos:
         exclude: "generate_aggregations.py"
         additional_dependencies: ["black==24.8.0"]
   - repo: https://github.com/rbubley/mirrors-prettier
-    rev: v3.3.3
+    rev: v3.4.2
     hooks:
       - id: prettier
         args: [--cache-location=.prettier_cache/cache]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.13.0
+    rev: v1.14.1
     hooks:
       - id: mypy
         # Copied from setup.cfg
@@ -63,3 +63,13 @@ repos:
     rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
     hooks:
       - id: validate-cff
+  - repo: https://github.com/ComPWA/taplo-pre-commit
+    rev: v0.9.3
+    hooks:
+      - id: taplo-format
+        args: ["--option", "array_auto_collapse=false"]
+  - repo: https://github.com/abravalheri/validate-pyproject
+    rev: v0.23
+    hooks:
+      - id: validate-pyproject
+        additional_dependencies: ["validate-pyproject-schema-store[all]"]
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,5 +1,9 @@
 version: 2
 
+sphinx:
+  configuration: doc/conf.py
+  fail_on_warning: true
+
 build:
   os: ubuntu-lts-latest
   tools:
@@ -14,7 +18,4 @@ build:
 conda:
   environment: ci/requirements/doc.yml
 
-sphinx:
-  fail_on_warning: true
-
 formats: []
diff --git a/DATATREE_MIGRATION_GUIDE.md b/DATATREE_MIGRATION_GUIDE.md
@@ -45,6 +45,7 @@ A number of other API changes have been made, which should only require minor mo
 - The `DataTree.parent` property is now read-only. To assign a ancestral relationships directly you must instead use the `.children` property on the parent node, which remains settable.
 - Similarly the `parent` kwarg has been removed from the `DataTree.__init__` constructor.
 - DataTree objects passed to the `children` kwarg in `DataTree.__init__` are now shallow-copied.
+- `DataTree.map_over_subtree` has been renamed to `DataTree.map_over_datasets`, and changed to no longer work like a decorator. Instead you use it to apply the function and arguments directly, more like how `xarray.apply_ufunc` works.
 - `DataTree.as_array` has been replaced by `DataTree.to_dataarray`.
 - A number of methods which were not well tested have been (temporarily) disabled. In general we have tried to only keep things that are known to work, with the plan to increase API surface incrementally after release.
 

diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
@@ -30,13 +30,13 @@ def requires_sparse():
 
 
 def randn(shape, frac_nan=None, chunks=None, seed=0):
-    rng = np.random.RandomState(seed)
+    rng = np.random.default_rng(seed)
     if chunks is None:
         x = rng.standard_normal(shape)
     else:
         import dask.array as da
 
-        rng = da.random.RandomState(seed)
+        rng = da.random.default_rng(seed)
         x = rng.standard_normal(shape, chunks=chunks)
 
     if frac_nan is not None:
@@ -47,8 +47,8 @@ def randn(shape, frac_nan=None, chunks=None, seed=0):
 
 
 def randint(low, high=None, size=None, frac_minus=None, seed=0):
-    rng = np.random.RandomState(seed)
-    x = rng.randint(low, high, size)
+    rng = np.random.default_rng(seed)
+    x = rng.integers(low, high, size)
     if frac_minus is not None:
         inds = rng.choice(range(x.size), int(x.size * frac_minus))
         x.flat[inds] = -1

diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -305,7 +305,7 @@ def make_ds(self, nfiles=10):
             ds.attrs = {"history": "created for xarray benchmarking"}
 
             self.ds_list.append(ds)
-            self.filenames_list.append("test_netcdf_%i.nc" % i)
+            self.filenames_list.append(f"test_netcdf_{i}.nc")
 
 
 class IOWriteMultipleNetCDF3(IOMultipleNetCDF):

diff --git a/asv_bench/benchmarks/reindexing.py b/asv_bench/benchmarks/reindexing.py
@@ -11,7 +11,7 @@
 
 class Reindex:
     def setup(self):
-        data = np.random.RandomState(0).randn(ntime, nx, ny)
+        data = np.random.default_rng(0).random((ntime, nx, ny))
         self.ds = xr.Dataset(
             {"temperature": (("time", "x", "y"), data)},
             coords={"time": np.arange(ntime), "x": np.arange(nx), "y": np.arange(ny)},

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -3,7 +3,7 @@
 
 import xarray as xr
 
-from . import parameterized, randn, requires_dask
+from . import _skip_slow, parameterized, randn, requires_dask
 
 nx = 3000
 long_nx = 30000
@@ -80,6 +80,9 @@ def time_rolling_construct(self, center, stride, use_bottleneck):
 class RollingDask(Rolling):
     def setup(self, *args, **kwargs):
         requires_dask()
+        # TODO: Lazily skipped in CI as it is very demanding and slow.
+        # Improve times and remove errors.
+        _skip_slow()
         super().setup(**kwargs)
         self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50})
         self.da_long = self.da_long.chunk({"x": 10000})

diff --git a/asv_bench/benchmarks/unstacking.py b/asv_bench/benchmarks/unstacking.py
@@ -8,7 +8,7 @@
 
 class Unstacking:
     def setup(self):
-        data = np.random.RandomState(0).randn(250, 500)
+        data = np.random.default_rng(0).random((250, 500))
         self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
         self.da_missing = self.da_full[:-1]
         self.df_missing = self.da_missing.to_pandas()

diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml
@@ -12,7 +12,6 @@ dependencies:
   - cartopy
   - cftime
   - dask-core
-  - dask-expr # dask raises a deprecation warning without this, breaking doctests
   - distributed
   - flox
   - fsspec

diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -10,7 +10,6 @@ dependencies:
   - cfgrib
   - kerchunk
   - dask-core>=2022.1
-  - dask-expr
   - hypothesis>=6.75.8
   - h5netcdf>=0.13
   - ipykernel
@@ -20,6 +19,7 @@ dependencies:
   - jupyter_client
   - matplotlib-base
   - nbsphinx
+  - ncdata
   - netcdf4>=1.5
   - numba
   - numpy>=2
@@ -30,6 +30,7 @@ dependencies:
   - pre-commit
   - pyarrow
   - pyproj
+  - rich # for Zarr tree()
   - scipy!=1.10.0
   - seaborn
   - setuptools

diff --git a/ci/requirements/environment-3.13.yml b/ci/requirements/environment-3.13.yml
@@ -10,7 +10,6 @@ dependencies:
   - cartopy
   - cftime
   - dask-core
-  - dask-expr
   - distributed
   - flox
   - fsspec
@@ -47,3 +46,5 @@ dependencies:
   - toolz
   - typing_extensions
   - zarr
+  - pip:
+      - jax # no way to get cpu-only jaxlib from conda if gpu is present
diff --git a/ci/requirements/environment-windows-3.13.yml b/ci/requirements/environment-windows-3.13.yml
@@ -8,7 +8,6 @@ dependencies:
   - cartopy
   - cftime
   - dask-core
-  - dask-expr
   - distributed
   - flox
   - fsspec
@@ -29,6 +28,7 @@ dependencies:
   # - pint>=0.22
   - pip
   - pre-commit
+  - pyarrow # importing dask.dataframe raises an ImportError without this
   - pydap
   - pytest
   - pytest-cov

diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -8,7 +8,6 @@ dependencies:
   - cartopy
   - cftime
   - dask-core
-  - dask-expr
   - distributed
   - flox
   - fsspec
@@ -29,6 +28,7 @@ dependencies:
   # - pint>=0.22
   - pip
   - pre-commit
+  - pyarrow # importing dask.dataframe raises an ImportError without this
   - pydap
   - pytest
   - pytest-cov

diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -10,7 +10,6 @@ dependencies:
   - cartopy
   - cftime
   - dask-core
-  - dask-expr # dask raises a deprecation warning without this, breaking doctests
   - distributed
   - flox
   - fsspec
@@ -49,3 +48,5 @@ dependencies:
   - toolz
   - typing_extensions
   - zarr
+  - pip:
+      - jax # no way to get cpu-only jaxlib from conda if gpu is present
diff --git a/doc/api.rst b/doc/api.rst
@@ -626,12 +626,14 @@ Attributes relating to the recursive tree-like structure of a ``DataTree``.
    DataTree.depth
    DataTree.width
    DataTree.subtree
+   DataTree.subtree_with_keys
    DataTree.descendants
    DataTree.siblings
    DataTree.lineage
    DataTree.parents
    DataTree.ancestors
    DataTree.groups
+   DataTree.xindexes
 
 Data Contents
 -------------
@@ -645,6 +647,7 @@ This interface echoes that of ``xarray.Dataset``.
    DataTree.dims
    DataTree.sizes
    DataTree.data_vars
+   DataTree.ds
    DataTree.coords
    DataTree.attrs
    DataTree.encoding
@@ -1093,6 +1096,17 @@ DataTree methods
 ..    Missing:
 ..    ``open_mfdatatree``
 
+Encoding/Decoding
+=================
+
+Coder objects
+-------------
+
+.. autosummary::
+   :toctree: generated/
+
+   coders.CFDatetimeCoder
+
 Coordinates objects
 ===================
 
@@ -1210,6 +1224,7 @@ Dataset
    DatasetGroupBy.var
    DatasetGroupBy.dims
    DatasetGroupBy.groups
+   DatasetGroupBy.shuffle_to_chunks
 
 DataArray
 ---------
@@ -1241,6 +1256,7 @@ DataArray
    DataArrayGroupBy.var
    DataArrayGroupBy.dims
    DataArrayGroupBy.groups
+   DataArrayGroupBy.shuffle_to_chunks
 
 Grouper Objects
 ---------------

diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
@@ -38,6 +38,7 @@ Geosciences
 - `salem <https://salem.readthedocs.io>`_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors.
 - `SatPy <https://satpy.readthedocs.io/>`_ : Library for reading and manipulating meteorological remote sensing data and writing it to various image and data file formats.
 - `SARXarray <https://tudelftgeodesy.github.io/sarxarray/>`_: xarray extension for reading and processing large Synthetic Aperture Radar (SAR) data stacks.
+- `shxarray <https://shxarray.wobbly.earth/>`_: Convert, filter,and map geodesy related spherical harmonic representations of gravity and terrestrial water storage through an xarray extension.
 - `Spyfit <https://spyfit.readthedocs.io/en/master/>`_: FTIR spectroscopy of the atmosphere
 - `windspharm <https://ajdawson.github.io/windspharm/index.html>`_: Spherical
   harmonic wind analysis in Python.

diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
@@ -173,9 +173,9 @@ integration with Cartopy_.
 
 We think the design decisions we have made for xarray (namely, basing it on
 pandas) make it a faster and more flexible data analysis tool. That said, Iris
-has some great domain specific functionality, and xarray includes
-methods for converting back and forth between xarray and Iris. See
-:py:meth:`~xarray.DataArray.to_iris` for more details.
+has some great domain specific functionality, and there are dedicated methods for
+converting back and forth between xarray and Iris. See
+:ref:`Reading and Writing Iris data <io.iris>` for more details.
 
 What other projects leverage xarray?
 ------------------------------------

diff --git a/doc/internals/index.rst b/doc/internals/index.rst
@@ -26,3 +26,4 @@ The pages in this section are intended for:
    how-to-add-new-backend
    how-to-create-custom-index
    zarr-encoding-spec
+   time-coding
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,7 +12,6 @@ dependencies: @@
       - cartopy
       - cftime
       - dask-core
-      - dask-expr # dask raises a deprecation warning without this, breaking doctests
       - distributed
       - flox
       - fsspec
@@ Expand Down @@