pydata · headtr1ck · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -46,3 +46,9 @@ repos:
     rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
     hooks:
     - id: validate-cff
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.3.0
+    hooks:
+    - id: codespell
+      additional_dependencies:
+        - tomli
diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
@@ -95,12 +95,12 @@ def test_roundtrip_dataset(dataset) -> None:
 
 
 @given(numeric_series, st.text())
-def test_roundtrip_pandas_series(ser, ix_name) -> None:
+def test_roundtrip_pandas_series(ser, ix_name) -> None:  # codespell:ignore ser
     # Need to name the index, otherwise Xarray calls it 'dim_0'.
-    ser.index.name = ix_name
-    arr = xr.DataArray(ser)
+    ser.index.name = ix_name  # codespell:ignore ser
+    arr = xr.DataArray(ser)  # codespell:ignore ser
     roundtripped = arr.to_pandas()
-    pd.testing.assert_series_equal(ser, roundtripped)
+    pd.testing.assert_series_equal(ser, roundtripped)  # codespell:ignore ser
     xr.testing.assert_identical(arr, roundtripped.to_xarray())
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -50,8 +50,8 @@ dev = [
   "sphinx_autosummary_accessors",
   "xarray[complete]",
 ]
-io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
 etc = ["sparse"]
+io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
 parallel = ["dask[complete]"]
 viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]
 
@@ -234,40 +234,39 @@ extend-exclude = [
 ]
 
 [tool.ruff.lint]
+extend-safe-fixes = [
+  "TID252", # absolute imports
+]
 extend-select = [
-  "F",    # Pyflakes
-  "E",    # pycodestyle errors
-  "W",    # pycodestyle warnings
-  "I",    # isort
-  "UP",   # pyupgrade
-  "B",    # flake8-bugbear
-  "C4",   # flake8-comprehensions
-  "PIE",  # flake8-pie
-  "TID",  # flake8-tidy-imports (absolute imports)
-  "PGH",  # pygrep-hooks
+  "F", # Pyflakes
+  "E", # pycodestyle errors
+  "W", # pycodestyle warnings
+  "I", # isort
+  "UP", # pyupgrade
+  "B", # flake8-bugbear
+  "C4", # flake8-comprehensions
+  "PIE", # flake8-pie
+  "TID", # flake8-tidy-imports (absolute imports)
+  "PGH", # pygrep-hooks
   "PERF", # Perflint
   "RUF",
 ]
-extend-safe-fixes = [
-  "TID252",  # absolute imports
-]
 ignore = [
-  "E402",    # module level import not at top of file
-  "E501",    # line too long - let the formatter worry about that
-  "E731",    # do not assign a lambda expression, use a def
-  "UP007",   # use X | Y for type annotations
-  "UP027",   # deprecated
-  "C40",     # unnecessary generator, comprehension, or literal
-  "PIE790",  # unnecessary pass statement
+  "E402", # module level import not at top of file
+  "E501", # line too long - let the formatter worry about that
+  "E731", # do not assign a lambda expression, use a def
+  "UP007", # use X | Y for type annotations
+  "UP027", # deprecated
+  "C40", # unnecessary generator, comprehension, or literal
+  "PIE790", # unnecessary pass statement
   "PERF203", # try-except within a loop incurs performance overhead
-  "RUF001",  # string contains ambiguous unicode character
-  "RUF002",  # docstring contains ambiguous acute accent unicode character
-  "RUF003",  # comment contains ambiguous no-break space unicode character
-  "RUF005",  # consider upacking operator instead of concatenation
-  "RUF012",  # mutable class attributes
+  "RUF001", # string contains ambiguous unicode character
+  "RUF002", # docstring contains ambiguous acute accent unicode character
+  "RUF003", # comment contains ambiguous no-break space unicode character
+  "RUF005", # consider unpacking operator instead of concatenation
+  "RUF012", # mutable class attributes
 ]
 
-
 [tool.ruff.lint.per-file-ignores]
 # don't enforce absolute imports
 "asv_bench/**" = ["TID252"]
@@ -349,3 +348,8 @@ test = "pytest"
 ignore = [
   "PP308", # This option creates a large amount of log lines.
 ]
+
+[tool.codespell]
+enable-color = true
+ignore-words-list = "nd,coo,COO,nin,Marge,Commun,Claus,Soler,Tung,Celles,slowy"
+quiet-level = 3
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
@@ -2560,7 +2560,7 @@ def split(
 
         Returns
         -------
-        splitted : same type as values or object array
+        split : same type as values or object array
 
         Examples
         --------
@@ -2576,29 +2576,29 @@ def split(
 
         Split once and put the results in a new dimension
 
-        >>> values.str.split(dim="splitted", maxsplit=1)
-        <xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 864B
+        >>> values.str.split(dim="split", maxsplit=1)
+        <xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 864B
         array([[['abc', 'def'],
                 ['spam', 'eggs\tswallow'],
                 ['red_blue', '']],
         <BLANKLINE>
                [['test0', 'test1\ntest2\n\ntest3'],
                 ['', ''],
                 ['abra', 'ka\nda\tbra']]], dtype='<U18')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split as many times as needed and put the results in a new dimension
 
-        >>> values.str.split(dim="splitted")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
+        >>> values.str.split(dim="split")
+        <xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
         array([[['abc', 'def', '', ''],
                 ['spam', 'eggs', 'swallow', ''],
                 ['red_blue', '', '', '']],
         <BLANKLINE>
                [['test0', 'test1', 'test2', 'test3'],
                 ['', '', '', ''],
                 ['abra', 'ka', 'da', 'bra']]], dtype='<U8')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split once and put the results in lists
 
@@ -2622,16 +2622,16 @@ def split(
 
         Split only on spaces
 
-        >>> values.str.split(dim="splitted", sep=" ")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
+        >>> values.str.split(dim="split", sep=" ")
+        <xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
         array([[['abc', 'def', ''],
                 ['spam\t\teggs\tswallow', '', ''],
                 ['red_blue', '', '']],
         <BLANKLINE>
                [['test0\ntest1\ntest2\n\ntest3', '', ''],
                 ['', '', ''],
                 ['abra', '', 'ka\nda\tbra']]], dtype='<U24')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         See Also
         --------
@@ -2678,7 +2678,7 @@ def rsplit(
 
         Returns
         -------
-        rsplitted : same type as values or object array
+        rsplit : same type as values or object array
 
         Examples
         --------
@@ -2694,29 +2694,29 @@ def rsplit(
 
         Split once and put the results in a new dimension
 
-        >>> values.str.rsplit(dim="splitted", maxsplit=1)
-        <xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 816B
+        >>> values.str.rsplit(dim="split", maxsplit=1)
+        <xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 816B
         array([[['abc', 'def'],
                 ['spam\t\teggs', 'swallow'],
                 ['', 'red_blue']],
         <BLANKLINE>
                [['test0\ntest1\ntest2', 'test3'],
                 ['', ''],
                 ['abra  ka\nda', 'bra']]], dtype='<U17')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split as many times as needed and put the results in a new dimension
 
-        >>> values.str.rsplit(dim="splitted")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
+        >>> values.str.rsplit(dim="split")
+        <xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
         array([[['', '', 'abc', 'def'],
                 ['', 'spam', 'eggs', 'swallow'],
                 ['', '', '', 'red_blue']],
         <BLANKLINE>
                [['test0', 'test1', 'test2', 'test3'],
                 ['', '', '', ''],
                 ['abra', 'ka', 'da', 'bra']]], dtype='<U8')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split once and put the results in lists
 
@@ -2740,16 +2740,16 @@ def rsplit(
 
         Split only on spaces
 
-        >>> values.str.rsplit(dim="splitted", sep=" ")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
+        >>> values.str.rsplit(dim="split", sep=" ")
+        <xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
         array([[['', 'abc', 'def'],
                 ['', '', 'spam\t\teggs\tswallow'],
                 ['', '', 'red_blue']],
         <BLANKLINE>
                [['', '', 'test0\ntest1\ntest2\n\ntest3'],
                 ['', '', ''],
                 ['abra', '', 'ka\nda\tbra']]], dtype='<U24')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         See Also
         --------

diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py
@@ -130,7 +130,7 @@ def test_contains(dtype) -> None:
     assert_equal(result, expected)
 
     # case sensitive without regex
-    result = values.str.contains("fO", regex=False, case=True)
+    result = values.str.contains("fO", regex=False, case=True)  # codespell:ignore
     expected = xr.DataArray([False, False, True, False])
     assert result.dtype == expected.dtype
     assert_equal(result, expected)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -1250,12 +1250,12 @@ def test_roundtrip_endian(self) -> None:
                     pass
 
     def test_invalid_dataarray_names_raise(self) -> None:
-        te = (TypeError, "string or None")
-        ve = (ValueError, "string must be length 1 or")
+        terr = (TypeError, "string or None")
+        verr = (ValueError, "string must be length 1 or")
         data = np.random.random((2, 2))
         da = xr.DataArray(data)
         for name, (error, msg) in zip(
-            [0, (4, 5), True, ""], [te, te, te, ve], strict=True
+            [0, (4, 5), True, ""], [terr, terr, terr, verr], strict=True
         ):
             ds = Dataset({name: da})
             with pytest.raises(error) as excinfo:
@@ -3314,7 +3314,7 @@ def test_append(self) -> None:
 
         with self.create_zarr_target() as store:
             if has_zarr_v3:
-                # TOOD: verify these
+                # TODO: verify these
                 expected = {
                     "set": 17,
                     "get": 12,

diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
@@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non
     if function == cftime_range and not has_cftime:
         pytest.skip("requires cftime")
 
-    with pytest.raises(ValueError, match="nclusive"):
+    with pytest.raises(ValueError, match="nclusi"):
         function("2000", periods=3, inclusive="foo")
 
 

diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
@@ -1102,8 +1102,8 @@ def test_cftimeindex_repr_formatting_width(periods, display_width):
     len_intro_str = len("CFTimeIndex(")
     with xr.set_options(display_width=display_width):
         repr_str = index.__repr__()
-        splitted = repr_str.split("\n")
-        for i, s in enumerate(splitted):
+        split = repr_str.split("\n")
+        for i, s in enumerate(split):
             # check that lines not longer than OPTIONS['display_width']
             assert len(s) <= display_width, f"{len(s)} {s} {display_width}"
             if i > 0:

diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py
@@ -262,7 +262,7 @@ class TestCoarsenConstruct:
     def test_coarsen_construct(self, dask: bool) -> None:
         ds = Dataset(
             {
-                "vart": ("time", np.arange(48), {"a": "b"}),
+                "vart": ("time", np.arange(48), {"a": "b"}),  # codespell:ignore vart
                 "varx": ("x", np.arange(10), {"a": "b"}),
                 "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}),
                 "vary": ("y", np.arange(12)),
@@ -275,9 +275,9 @@ def test_coarsen_construct(self, dask: bool) -> None:
             ds = ds.chunk({"x": 4, "time": 10})
 
         expected = xr.Dataset(attrs={"foo": "bar"})
-        expected["vart"] = (
+        expected["vart"] = (  # codespell:ignore vart
             ("year", "month"),
-            duck_array_ops.reshape(ds.vart.data, (-1, 12)),
+            duck_array_ops.reshape(ds.vart.data, (-1, 12)),  # codespell:ignore vart
             {"a": "b"},
         )
         expected["varx"] = (

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -4930,8 +4930,8 @@ def test_from_dataframe_categorical_index_string_categories(self) -> None:
                 categories=pd.Index(["foo", "bar", "baz"], dtype="string"),
             )
         )
-        ser = pd.Series(1, index=cat)
-        ds = ser.to_xarray()
+        series = pd.Series(1, index=cat)
+        ds = series.to_xarray()
         assert ds.coords.dtypes["index"] == np.dtype("O")
 
     @requires_sparse

diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
@@ -871,7 +871,7 @@ def test_getitem_error(self):
 
         v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
         ind = Variable(["x"], [0, 1])
-        with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
+        with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
             v[:, ind]
 
     @pytest.mark.parametrize(