Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add codespell pre-commit-hook #9781

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,9 @@ repos:
rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
hooks:
- id: validate-cff
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies:
- tomli
8 changes: 4 additions & 4 deletions properties/test_pandas_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ def test_roundtrip_dataset(dataset) -> None:


@given(numeric_series, st.text())
def test_roundtrip_pandas_series(ser, ix_name) -> None:
def test_roundtrip_pandas_series(ser, ix_name) -> None: # codespell:ignore ser
# Need to name the index, otherwise Xarray calls it 'dim_0'.
ser.index.name = ix_name
arr = xr.DataArray(ser)
ser.index.name = ix_name # codespell:ignore ser
arr = xr.DataArray(ser) # codespell:ignore ser
roundtripped = arr.to_pandas()
pd.testing.assert_series_equal(ser, roundtripped)
pd.testing.assert_series_equal(ser, roundtripped) # codespell:ignore ser
xr.testing.assert_identical(arr, roundtripped.to_xarray())


Expand Down
58 changes: 31 additions & 27 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ dev = [
"sphinx_autosummary_accessors",
"xarray[complete]",
]
io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
etc = ["sparse"]
io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
parallel = ["dask[complete]"]
viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]

Expand Down Expand Up @@ -234,40 +234,39 @@ extend-exclude = [
]

[tool.ruff.lint]
extend-safe-fixes = [
"TID252", # absolute imports
]
extend-select = [
"F", # Pyflakes
"E", # pycodestyle errors
"W", # pycodestyle warnings
"I", # isort
"UP", # pyupgrade
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"PIE", # flake8-pie
"TID", # flake8-tidy-imports (absolute imports)
"PGH", # pygrep-hooks
"F", # Pyflakes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't black usually do 2 whitespaces for comments?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, good question.
I shouldn't have even formatted this even, but pre-commit did that ;)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which hook was that? As far as I can tell, we don't have anything that would format TOML (yet, we can totally change that).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, might have been format on save in vscode.
I will have to check that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't have anything that would format TOML (yet, we can totally change that).

Stumbled across this and thought I'd make #9840 :)

"E", # pycodestyle errors
"W", # pycodestyle warnings
"I", # isort
"UP", # pyupgrade
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"PIE", # flake8-pie
"TID", # flake8-tidy-imports (absolute imports)
"PGH", # pygrep-hooks
"PERF", # Perflint
"RUF",
]
extend-safe-fixes = [
"TID252", # absolute imports
]
ignore = [
"E402", # module level import not at top of file
"E501", # line too long - let the formatter worry about that
"E731", # do not assign a lambda expression, use a def
"UP007", # use X | Y for type annotations
"UP027", # deprecated
"C40", # unnecessary generator, comprehension, or literal
"PIE790", # unnecessary pass statement
"E402", # module level import not at top of file
"E501", # line too long - let the formatter worry about that
"E731", # do not assign a lambda expression, use a def
"UP007", # use X | Y for type annotations
"UP027", # deprecated
"C40", # unnecessary generator, comprehension, or literal
"PIE790", # unnecessary pass statement
"PERF203", # try-except within a loop incurs performance overhead
"RUF001", # string contains ambiguous unicode character
"RUF002", # docstring contains ambiguous acute accent unicode character
"RUF003", # comment contains ambiguous no-break space unicode character
"RUF005", # consider upacking operator instead of concatenation
"RUF012", # mutable class attributes
"RUF001", # string contains ambiguous unicode character
"RUF002", # docstring contains ambiguous acute accent unicode character
"RUF003", # comment contains ambiguous no-break space unicode character
"RUF005", # consider unpacking operator instead of concatenation
"RUF012", # mutable class attributes
]


[tool.ruff.lint.per-file-ignores]
# don't enforce absolute imports
"asv_bench/**" = ["TID252"]
Expand Down Expand Up @@ -349,3 +348,8 @@ test = "pytest"
ignore = [
"PP308", # This option creates a large amount of log lines.
]

[tool.codespell]
enable-color = true
ignore-words-list = "nd,coo,COO,nin,Marge,Commun,Claus,Soler,Tung,Celles,slowy"
quiet-level = 3
40 changes: 20 additions & 20 deletions xarray/core/accessor_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -2560,7 +2560,7 @@ def split(

Returns
-------
splitted : same type as values or object array
split : same type as values or object array

Examples
--------
Expand All @@ -2576,29 +2576,29 @@ def split(

Split once and put the results in a new dimension

>>> values.str.split(dim="splitted", maxsplit=1)
<xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 864B
>>> values.str.split(dim="split", maxsplit=1)
<xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 864B
array([[['abc', 'def'],
['spam', 'eggs\tswallow'],
['red_blue', '']],
<BLANKLINE>
[['test0', 'test1\ntest2\n\ntest3'],
['', ''],
['abra', 'ka\nda\tbra']]], dtype='<U18')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

Split as many times as needed and put the results in a new dimension

>>> values.str.split(dim="splitted")
<xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
>>> values.str.split(dim="split")
<xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
array([[['abc', 'def', '', ''],
['spam', 'eggs', 'swallow', ''],
['red_blue', '', '', '']],
<BLANKLINE>
[['test0', 'test1', 'test2', 'test3'],
['', '', '', ''],
['abra', 'ka', 'da', 'bra']]], dtype='<U8')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

Split once and put the results in lists

Expand All @@ -2622,16 +2622,16 @@ def split(

Split only on spaces

>>> values.str.split(dim="splitted", sep=" ")
<xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
>>> values.str.split(dim="split", sep=" ")
<xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
array([[['abc', 'def', ''],
['spam\t\teggs\tswallow', '', ''],
['red_blue', '', '']],
<BLANKLINE>
[['test0\ntest1\ntest2\n\ntest3', '', ''],
['', '', ''],
['abra', '', 'ka\nda\tbra']]], dtype='<U24')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

See Also
--------
Expand Down Expand Up @@ -2678,7 +2678,7 @@ def rsplit(

Returns
-------
rsplitted : same type as values or object array
rsplit : same type as values or object array

Examples
--------
Expand All @@ -2694,29 +2694,29 @@ def rsplit(

Split once and put the results in a new dimension

>>> values.str.rsplit(dim="splitted", maxsplit=1)
<xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 816B
>>> values.str.rsplit(dim="split", maxsplit=1)
<xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 816B
array([[['abc', 'def'],
['spam\t\teggs', 'swallow'],
['', 'red_blue']],
<BLANKLINE>
[['test0\ntest1\ntest2', 'test3'],
['', ''],
['abra ka\nda', 'bra']]], dtype='<U17')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

Split as many times as needed and put the results in a new dimension

>>> values.str.rsplit(dim="splitted")
<xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
>>> values.str.rsplit(dim="split")
<xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
array([[['', '', 'abc', 'def'],
['', 'spam', 'eggs', 'swallow'],
['', '', '', 'red_blue']],
<BLANKLINE>
[['test0', 'test1', 'test2', 'test3'],
['', '', '', ''],
['abra', 'ka', 'da', 'bra']]], dtype='<U8')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

Split once and put the results in lists

Expand All @@ -2740,16 +2740,16 @@ def rsplit(

Split only on spaces

>>> values.str.rsplit(dim="splitted", sep=" ")
<xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
>>> values.str.rsplit(dim="split", sep=" ")
<xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
array([[['', 'abc', 'def'],
['', '', 'spam\t\teggs\tswallow'],
['', '', 'red_blue']],
<BLANKLINE>
[['', '', 'test0\ntest1\ntest2\n\ntest3'],
['', '', ''],
['abra', '', 'ka\nda\tbra']]], dtype='<U24')
Dimensions without coordinates: X, Y, splitted
Dimensions without coordinates: X, Y, split

See Also
--------
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_accessor_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_contains(dtype) -> None:
assert_equal(result, expected)

# case sensitive without regex
result = values.str.contains("fO", regex=False, case=True)
result = values.str.contains("fO", regex=False, case=True) # codespell:ignore
expected = xr.DataArray([False, False, True, False])
assert result.dtype == expected.dtype
assert_equal(result, expected)
Expand Down
8 changes: 4 additions & 4 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,12 +1250,12 @@ def test_roundtrip_endian(self) -> None:
pass

def test_invalid_dataarray_names_raise(self) -> None:
te = (TypeError, "string or None")
ve = (ValueError, "string must be length 1 or")
terr = (TypeError, "string or None")
verr = (ValueError, "string must be length 1 or")
data = np.random.random((2, 2))
da = xr.DataArray(data)
for name, (error, msg) in zip(
[0, (4, 5), True, ""], [te, te, te, ve], strict=True
[0, (4, 5), True, ""], [terr, terr, terr, verr], strict=True
):
ds = Dataset({name: da})
with pytest.raises(error) as excinfo:
Expand Down Expand Up @@ -3314,7 +3314,7 @@ def test_append(self) -> None:

with self.create_zarr_target() as store:
if has_zarr_v3:
# TOOD: verify these
# TODO: verify these
expected = {
"set": 17,
"get": 12,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non
if function == cftime_range and not has_cftime:
pytest.skip("requires cftime")

with pytest.raises(ValueError, match="nclusive"):
with pytest.raises(ValueError, match="nclusi"):
function("2000", periods=3, inclusive="foo")


Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,8 +1102,8 @@ def test_cftimeindex_repr_formatting_width(periods, display_width):
len_intro_str = len("CFTimeIndex(")
with xr.set_options(display_width=display_width):
repr_str = index.__repr__()
splitted = repr_str.split("\n")
for i, s in enumerate(splitted):
split = repr_str.split("\n")
for i, s in enumerate(split):
# check that lines not longer than OPTIONS['display_width']
assert len(s) <= display_width, f"{len(s)} {s} {display_width}"
if i > 0:
Expand Down
6 changes: 3 additions & 3 deletions xarray/tests/test_coarsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ class TestCoarsenConstruct:
def test_coarsen_construct(self, dask: bool) -> None:
ds = Dataset(
{
"vart": ("time", np.arange(48), {"a": "b"}),
"vart": ("time", np.arange(48), {"a": "b"}), # codespell:ignore vart
"varx": ("x", np.arange(10), {"a": "b"}),
"vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}),
"vary": ("y", np.arange(12)),
Expand All @@ -275,9 +275,9 @@ def test_coarsen_construct(self, dask: bool) -> None:
ds = ds.chunk({"x": 4, "time": 10})

expected = xr.Dataset(attrs={"foo": "bar"})
expected["vart"] = (
expected["vart"] = ( # codespell:ignore vart
("year", "month"),
duck_array_ops.reshape(ds.vart.data, (-1, 12)),
duck_array_ops.reshape(ds.vart.data, (-1, 12)), # codespell:ignore vart
{"a": "b"},
)
expected["varx"] = (
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4930,8 +4930,8 @@ def test_from_dataframe_categorical_index_string_categories(self) -> None:
categories=pd.Index(["foo", "bar", "baz"], dtype="string"),
)
)
ser = pd.Series(1, index=cat)
ds = ser.to_xarray()
series = pd.Series(1, index=cat)
ds = series.to_xarray()
assert ds.coords.dtypes["index"] == np.dtype("O")

@requires_sparse
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def test_getitem_error(self):

v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
ind = Variable(["x"], [0, 1])
with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
v[:, ind]

@pytest.mark.parametrize(
Expand Down
Loading