Skip to content

Commit

Permalink
Correctly interpolate seasons in Grouper (#2019)
Browse files Browse the repository at this point in the history
<!--Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [x] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes only part of #2014 
- [x] Tests for the changes have been added (for bug fixes / features)
- [x] CHANGELOG.rst has been updated (with summary of main changes)
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added

### What kind of change does this PR introduce?

This PR adds a line to correctly interpolate seasonal values. It also
changes the test_timeseries function that now accepts a `calendar`
argument instead of `cftime`. Not providing it or providing `None` is
equivalent to `cftime=False` and `calendar='standard` to the previous
`cftime=True`. This allows for testing different calendar
implementations e.g. 360_day calendars
  • Loading branch information
coxipi authored Feb 5, 2025
2 parents 92eb57c + a3a98ef commit 2c9fe06
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 86 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog

v0.55.0 (unreleased)
--------------------
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`).
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Sascha Hofmann (:user:`saschahofmann`).

New indicators
^^^^^^^^^^^^^^
Expand All @@ -16,13 +16,18 @@ New features and enhancements
* New function ``ensemble.partition.general_partition`` (:pull:`2035`)
* Added a new ``xclim.indices.generic.bivariate_count_occurrences`` function to count instances where operations and performed and validated for two variables. (:pull:`2030`).
* `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`).
* ``xclim.testing.helpers.test_timeseries`` now accepts a `calendar` argument that is forwarded to ``xr.cftime_range``. (:pull:`2019`).

Internal changes
^^^^^^^^^^^^^^^^
* `sphinx-codeautolink` and `pygments` have been temporarily pinned due to breaking API changes. (:pull:`2030`).
* Adjusted the ``TestOfficialYaml`` test to use a dynamic method for finding the installed location of `xclim`. (:pull:`2028`).
* Adjusted two tests for better handling when running in Windows environments. (:pull:`2057`).

Bug fixes
^^^^^^^^^
* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didn't correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`).

v0.54.0 (2024-12-16)
--------------------
Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`).
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,4 @@ To create a conda environment including `xclim`'s dependencies and several optio
conda env create -n my_xclim_env python=3.10 --file=environment.yml
conda activate my_xclim_env
(my_xclim_env) python -m pip install -e --no-deps .
(my_xclim_env) python -m pip install --no-deps -e .
70 changes: 35 additions & 35 deletions docs/notebooks/sdba-advanced.ipynb

Large diffs are not rendered by default.

48 changes: 34 additions & 14 deletions src/xclim/sdba/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ def prop_name(self):
"""Create a significant name for the grouping."""
return "year" if self.prop == "group" else self.prop

def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray:
def get_coordinate(
self, ds: xr.Dataset | xr.DataArray | None = None
) -> xr.DataArray:
"""Return the coordinate as in the output of group.apply.
Currently, only implemented for groupings with prop == `month` or `dayofyear`.
Expand Down Expand Up @@ -293,21 +295,39 @@ def get_index(
return da[self.dim].rename("group")

ind = da.indexes[self.dim]
if self.prop == "week":
i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
elif self.prop == "season":
i = da[self.dim].copy(data=ind.month % 12 // 3)
else:
i = getattr(ind, self.prop)

if not np.issubdtype(i.dtype, np.integer):
raise ValueError(
f"Index {self.name} is not of type int (rather {i.dtype}), "
f"but {self.__class__.__name__} requires integer indexes."
)
if interp and self.dim == "time":
if self.prop == "month":
i = ind.month - 0.5 + ind.day / ind.days_in_month

if interp and self.dim == "time" and self.prop == "month":
i = ind.month - 0.5 + ind.day / ind.days_in_month
elif self.prop == "season":
calendar = ind.calendar if hasattr(ind, "calendar") else "standard"
length_year = (
360
if calendar == "360_day"
else 365 + (0 if calendar == "noleap" else ind.is_leap_year)
)
# This is assuming that seasons have the same length. The factor 1/6 comes from the fact that
# the first season is shifted by 1 month the but the middle of the season is shifted in the other direction
# by half a month so -(1/12-1/24)*4 = -1/6
i = ind.dayofyear / length_year * 4 - 1 / 6
else:
raise ValueError(
f"Interpolation is not supported for {self.dim}.{self.prop}."
)
else:
if self.prop == "week":
i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
elif self.prop == "season":
i = da[self.dim].copy(data=ind.month % 12 // 3)
else:
i = getattr(ind, self.prop)

if not np.issubdtype(i.dtype, np.integer):
raise ValueError(
f"Index {self.name} is not of type int (rather {i.dtype}), "
f"but {self.__class__.__name__} requires integer indexes."
)

xi = xr.DataArray(
i,
Expand Down
2 changes: 1 addition & 1 deletion src/xclim/sdba/nbutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def _extrapolate_on_quantiles(
Arguments are the same as _interp_on_quantiles_2D.
"""
bnds = _first_and_last_nonnull(oldx)
xp = np.arange(bnds.shape[0])
xp = oldg[:, 0]
toolow = newx < np.interp(newg, xp, bnds[:, 0])
toohigh = newx > np.interp(newg, xp, bnds[:, 1])
if method == "constant":
Expand Down
8 changes: 5 additions & 3 deletions src/xclim/sdba/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ def broadcast(
sel.update({group.prop: group.get_index(x, interp=interp != "nearest")})

if sel:
if group.prop == "season":
grouped = grouped.assign_coords(season=map_season_to_int(grouped.season))
# Extract the correct mean factor for each time step.
if interp == "nearest": # Interpolate both the time group and the quantile.
grouped = grouped.sel(sel, method="nearest")
Expand Down Expand Up @@ -473,11 +475,11 @@ def interp_on_quantiles(
output_dtypes=[yq.dtype],
)
return out

prop_coords = group.get_coordinate(newx)
if prop not in xq.dims:
xq = xq.expand_dims({prop: group.get_coordinate()})
xq = xq.expand_dims({prop: prop_coords})
if prop not in yq.dims:
yq = yq.expand_dims({prop: group.get_coordinate()})
yq = yq.expand_dims({prop: prop_coords})

# Adding the cyclic bounds fails for string coordinates like seasons
# That's why we map the seasons to integers
Expand Down
9 changes: 7 additions & 2 deletions src/xclim/testing/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def test_timeseries(
freq: str = "D",
as_dataset: bool = False,
cftime: bool = False,
calendar: str | None = None,
) -> xr.DataArray | xr.Dataset:
"""
Create a generic timeseries object based on pre-defined dictionaries of existing variables.
Expand All @@ -210,14 +211,18 @@ def test_timeseries(
Whether to return a Dataset or a DataArray. Default is False.
cftime : bool
Whether to use cftime or not. Default is False.
calendar : str or None
Whether to use a calendar. If a calendar is provided, cftime is used.
Returns
-------
xr.DataArray or xr.Dataset
A DataArray or Dataset with time, lon and lat dimensions.
"""
if cftime:
coords = xr.cftime_range(start, periods=len(values), freq=freq)
if calendar or cftime:
coords = xr.cftime_range(
start, periods=len(values), freq=freq, calendar=calendar or "standard"
)
else:
coords = pd.date_range(start, periods=len(values), freq=freq)

Expand Down
32 changes: 16 additions & 16 deletions tests/test_bootstrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,30 @@ class Test_bootstrap:
@pytest.mark.slow
@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize(
"var,p,index,freq, cftime",
"var,p,index,freq, calendar",
(
["tas", 98, tg90p, "MS", False],
["tasmin", 98, tn90p, "YS-JUL", False],
["tasmax", 98, tx90p, "QS-APR", False],
["tasmax", 98, tx90p, "QS-APR", True],
["tasmin", 2, tn10p, "MS", False],
["tasmax", 2, tx10p, "YS", False],
["tasmax", 2, tx10p, "YS", True],
["tas", 2, tg10p, "MS", False],
["tasmax", 98, warm_spell_duration_index, "MS", False],
["tasmin", 2, cold_spell_duration_index, "MS", False],
["pr", 99, days_over_precip_thresh, "MS", False],
["pr", 98, fraction_over_precip_thresh, "MS", False],
["pr", 98, fraction_over_precip_thresh, "MS", True],
["tas", 98, tg90p, "MS", None],
["tasmin", 98, tn90p, "YS-JUL", None],
["tasmax", 98, tx90p, "QS-APR", None],
["tasmax", 98, tx90p, "QS-APR", "standard"],
["tasmin", 2, tn10p, "MS", None],
["tasmax", 2, tx10p, "YS", None],
["tasmax", 2, tx10p, "YS", "standard"],
["tas", 2, tg10p, "MS", None],
["tasmax", 98, warm_spell_duration_index, "MS", None],
["tasmin", 2, cold_spell_duration_index, "MS", None],
["pr", 99, days_over_precip_thresh, "MS", None],
["pr", 98, fraction_over_precip_thresh, "MS", None],
["pr", 98, fraction_over_precip_thresh, "MS", "standard"],
),
)
def test_bootstrap(self, var, p, index, freq, cftime, use_dask, random):
def test_bootstrap(self, var, p, index, freq, calendar, use_dask, random):
# -- GIVEN
arr = self.ar1(
alpha=0.8, n=int(4 * 365.25), random=random, positive_values=(var == "pr")
)
climate_var = _test_timeseries(
arr, start="2000-01-01", variable=var, cftime=cftime
arr, start="2000-01-01", variable=var, calendar=calendar
)
if use_dask:
climate_var = climate_var.chunk(dict(time=50))
Expand Down
8 changes: 4 additions & 4 deletions tests/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,13 @@ def test_convert_doy():
)


@pytest.mark.parametrize("cftime", [True, False])
@pytest.mark.parametrize("calendar", ["standard", None])
@pytest.mark.parametrize(
"w,s,m,f,ss",
[(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)],
)
def test_stack_periods(tas_series, cftime, w, s, m, f, ss):
da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime)
def test_stack_periods(tas_series, calendar, w, s, m, f, ss):
da = tas_series(np.arange(365 * 50), start="2000-01-01", calendar=calendar)

da_stck = stack_periods(
da, window=w, stride=s, min_length=m, freq=f, align_days=False
Expand All @@ -502,7 +502,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss):


def test_stack_periods_special(tas_series):
da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01")
da = tas_series(np.arange(365 * 48 + 12), calendar="standard", start="2004-01-01")

with pytest.raises(ValueError, match="unaligned day-of-year"):
stack_periods(da)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ def test_resample_map_passthrough(tas_series):
assert not uses_dask(out)


@pytest.mark.parametrize("cftime", [False, True])
def test_make_hourly_temperature(tasmax_series, tasmin_series, cftime):
tasmax = tasmax_series(np.array([20]), units="degC", cftime=cftime)
tasmin = tasmin_series(np.array([0]), units="degC", cftime=cftime).expand_dims(
@pytest.mark.parametrize("calendar", [None, "standard"])
def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar):
tasmax = tasmax_series(np.array([20]), units="degC", calendar=calendar)
tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims(
lat=[0]
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_sdba/test_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def test_real_data(self, open_dataset):
ref, hist, group=Grouper("time.dayofyear", window=31), nquantiles=quantiles
)

scen = EQM.adjust(hist, interp="linear", extrapolation="constant")
scen = EQM.adjust(hist, extrapolation="constant")

EX = ExtremeValues.train(ref, hist, cluster_thresh="1 mm/day", q_thresh=0.97)
new_scen = EX.adjust(scen, hist, frac=0.000000001)
Expand Down
18 changes: 14 additions & 4 deletions tests/test_sdba/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,21 @@ def test_grouper_group(tas_series, group, window, nvals):


@pytest.mark.parametrize(
"group,interp,val90",
[("time", False, True), ("time.month", False, 3), ("time.month", True, 3.5)],
"group,interp,val90,calendar",
[
("time", False, True, None),
("time.month", False, 3, None),
("time.month", True, 3.5, None),
("time.season", False, 1, None),
("time.season", True, 0.8278688524590164, None),
("time.month", True, 3.533333333333333, "360_day"),
("time.month", True, 3.533333333333333, "noleap"),
("time.season", True, 0.8444444444444444, "360_day"),
("time.season", True, 0.8305936073059361, "noleap"),
],
)
def test_grouper_get_index(tas_series, group, interp, val90):
tas = tas_series(np.ones(366), start="2000-01-01")
def test_grouper_get_index(tas_series, group, interp, val90, calendar):
tas = tas_series(np.ones(366), start="2000-01-01", calendar=calendar)
grouper = Grouper(group)
indx = grouper.get_index(tas, interp=interp)
# 90 is March 31st
Expand Down

0 comments on commit 2c9fe06

Please sign in to comment.