Correctly interpolate seasons in Grouper (#2019)

### Pull Request Checklist: - [x] This PR addresses an already opened issue (for bug fixes / features) - This PR fixes only part of #2014 - [x] Tests for the changes have been added (for bug fixes / features) - [x] CHANGELOG.rst has been updated (with summary of main changes) - [x] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added ### What kind of change does this PR introduce? This PR adds a line to correctly interpolate seasonal values. It also changes the test_timeseries function that now accepts a `calendar` argument instead of `cftime`. Not providing it or providing `None` is equivalent to `cftime=False` and `calendar='standard` to the previous `cftime=True`. This allows for testing different calendar implementations e.g. 360_day calendars
Ouranosinc · Feb 5, 2025 · 2c9fe06 · 2c9fe06
2 parents 92eb57c + a3a98ef
commit 2c9fe06
Show file tree

Hide file tree

Showing 12 changed files with 128 additions and 86 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,7 +4,7 @@ Changelog
 
 v0.55.0 (unreleased)
 --------------------
-Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`).
+Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Sascha Hofmann (:user:`saschahofmann`).
 
 New indicators
 ^^^^^^^^^^^^^^
@@ -16,13 +16,18 @@ New features and enhancements
 * New function ``ensemble.partition.general_partition`` (:pull:`2035`)
 * Added a new ``xclim.indices.generic.bivariate_count_occurrences`` function to count instances where operations and performed and validated for two variables. (:pull:`2030`).
 * `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`).
+* ``xclim.testing.helpers.test_timeseries`` now accepts a `calendar` argument that is forwarded to ``xr.cftime_range``. (:pull:`2019`).
 
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `sphinx-codeautolink` and `pygments` have been temporarily pinned due to breaking API changes. (:pull:`2030`).
 * Adjusted the ``TestOfficialYaml`` test to use a dynamic method for finding the installed location of `xclim`. (:pull:`2028`).
 * Adjusted two tests for better handling when running in Windows environments. (:pull:`2057`).
 
+Bug fixes
+^^^^^^^^^
+* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didn't correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`).
+
 v0.54.0 (2024-12-16)
 --------------------
 Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`).

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -160,4 +160,4 @@ To create a conda environment including `xclim`'s dependencies and several optio
 
     conda env create -n my_xclim_env python=3.10 --file=environment.yml
     conda activate my_xclim_env
-    (my_xclim_env) python -m pip install -e --no-deps .
+    (my_xclim_env) python -m pip install --no-deps -e  .
diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb
diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py
@@ -179,7 +179,9 @@ def prop_name(self):
         """Create a significant name for the grouping."""
         return "year" if self.prop == "group" else self.prop
 
-    def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray:
+    def get_coordinate(
+        self, ds: xr.Dataset | xr.DataArray | None = None
+    ) -> xr.DataArray:
         """Return the coordinate as in the output of group.apply.
 
         Currently, only implemented for groupings with prop == `month` or `dayofyear`.
@@ -293,21 +295,39 @@ def get_index(
             return da[self.dim].rename("group")
 
         ind = da.indexes[self.dim]
-        if self.prop == "week":
-            i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
-        elif self.prop == "season":
-            i = da[self.dim].copy(data=ind.month % 12 // 3)
-        else:
-            i = getattr(ind, self.prop)
 
-        if not np.issubdtype(i.dtype, np.integer):
-            raise ValueError(
-                f"Index {self.name} is not of type int (rather {i.dtype}), "
-                f"but {self.__class__.__name__} requires integer indexes."
-            )
+        if interp and self.dim == "time":
+            if self.prop == "month":
+                i = ind.month - 0.5 + ind.day / ind.days_in_month
 
-        if interp and self.dim == "time" and self.prop == "month":
-            i = ind.month - 0.5 + ind.day / ind.days_in_month
+            elif self.prop == "season":
+                calendar = ind.calendar if hasattr(ind, "calendar") else "standard"
+                length_year = (
+                    360
+                    if calendar == "360_day"
+                    else 365 + (0 if calendar == "noleap" else ind.is_leap_year)
+                )
+                # This is assuming that seasons have the same length. The factor 1/6 comes from the fact that
+                # the first season is shifted by 1 month the but the middle of the season is shifted in the other direction
+                # by half a month so -(1/12-1/24)*4 = -1/6
+                i = ind.dayofyear / length_year * 4 - 1 / 6
+            else:
+                raise ValueError(
+                    f"Interpolation is not supported for {self.dim}.{self.prop}."
+                )
+        else:
+            if self.prop == "week":
+                i = da[self.dim].copy(data=ind.isocalendar().week).astype(int)
+            elif self.prop == "season":
+                i = da[self.dim].copy(data=ind.month % 12 // 3)
+            else:
+                i = getattr(ind, self.prop)
+
+            if not np.issubdtype(i.dtype, np.integer):
+                raise ValueError(
+                    f"Index {self.name} is not of type int (rather {i.dtype}), "
+                    f"but {self.__class__.__name__} requires integer indexes."
+                )
 
         xi = xr.DataArray(
             i,

diff --git a/src/xclim/sdba/nbutils.py b/src/xclim/sdba/nbutils.py
@@ -393,7 +393,7 @@ def _extrapolate_on_quantiles(
     Arguments are the same as _interp_on_quantiles_2D.
     """
     bnds = _first_and_last_nonnull(oldx)
-    xp = np.arange(bnds.shape[0])
+    xp = oldg[:, 0]
     toolow = newx < np.interp(newg, xp, bnds[:, 0])
     toohigh = newx > np.interp(newg, xp, bnds[:, 1])
     if method == "constant":

diff --git a/src/xclim/sdba/utils.py b/src/xclim/sdba/utils.py
@@ -219,6 +219,8 @@ def broadcast(
         sel.update({group.prop: group.get_index(x, interp=interp != "nearest")})
 
     if sel:
+        if group.prop == "season":
+            grouped = grouped.assign_coords(season=map_season_to_int(grouped.season))
         # Extract the correct mean factor for each time step.
         if interp == "nearest":  # Interpolate both the time group and the quantile.
             grouped = grouped.sel(sel, method="nearest")
@@ -473,11 +475,11 @@ def interp_on_quantiles(
             output_dtypes=[yq.dtype],
         )
         return out
-
+    prop_coords = group.get_coordinate(newx)
     if prop not in xq.dims:
-        xq = xq.expand_dims({prop: group.get_coordinate()})
+        xq = xq.expand_dims({prop: prop_coords})
     if prop not in yq.dims:
-        yq = yq.expand_dims({prop: group.get_coordinate()})
+        yq = yq.expand_dims({prop: prop_coords})
 
     # Adding the cyclic bounds fails for string coordinates like seasons
     # That's why we map the seasons to integers

diff --git a/src/xclim/testing/helpers.py b/src/xclim/testing/helpers.py
@@ -190,6 +190,7 @@ def test_timeseries(
     freq: str = "D",
     as_dataset: bool = False,
     cftime: bool = False,
+    calendar: str | None = None,
 ) -> xr.DataArray | xr.Dataset:
     """
     Create a generic timeseries object based on pre-defined dictionaries of existing variables.
@@ -210,14 +211,18 @@ def test_timeseries(
         Whether to return a Dataset or a DataArray. Default is False.
     cftime : bool
         Whether to use cftime or not. Default is False.
+    calendar : str or None
+        Whether to use a calendar. If a calendar is provided, cftime is used.
 
     Returns
     -------
     xr.DataArray or xr.Dataset
         A DataArray or Dataset with time, lon and lat dimensions.
     """
-    if cftime:
-        coords = xr.cftime_range(start, periods=len(values), freq=freq)
+    if calendar or cftime:
+        coords = xr.cftime_range(
+            start, periods=len(values), freq=freq, calendar=calendar or "standard"
+        )
     else:
         coords = pd.date_range(start, periods=len(values), freq=freq)
 

diff --git a/tests/test_bootstrapping.py b/tests/test_bootstrapping.py
@@ -23,30 +23,30 @@ class Test_bootstrap:
     @pytest.mark.slow
     @pytest.mark.parametrize("use_dask", [True, False])
     @pytest.mark.parametrize(
-        "var,p,index,freq, cftime",
+        "var,p,index,freq, calendar",
         (
-            ["tas", 98, tg90p, "MS", False],
-            ["tasmin", 98, tn90p, "YS-JUL", False],
-            ["tasmax", 98, tx90p, "QS-APR", False],
-            ["tasmax", 98, tx90p, "QS-APR", True],
-            ["tasmin", 2, tn10p, "MS", False],
-            ["tasmax", 2, tx10p, "YS", False],
-            ["tasmax", 2, tx10p, "YS", True],
-            ["tas", 2, tg10p, "MS", False],
-            ["tasmax", 98, warm_spell_duration_index, "MS", False],
-            ["tasmin", 2, cold_spell_duration_index, "MS", False],
-            ["pr", 99, days_over_precip_thresh, "MS", False],
-            ["pr", 98, fraction_over_precip_thresh, "MS", False],
-            ["pr", 98, fraction_over_precip_thresh, "MS", True],
+            ["tas", 98, tg90p, "MS", None],
+            ["tasmin", 98, tn90p, "YS-JUL", None],
+            ["tasmax", 98, tx90p, "QS-APR", None],
+            ["tasmax", 98, tx90p, "QS-APR", "standard"],
+            ["tasmin", 2, tn10p, "MS", None],
+            ["tasmax", 2, tx10p, "YS", None],
+            ["tasmax", 2, tx10p, "YS", "standard"],
+            ["tas", 2, tg10p, "MS", None],
+            ["tasmax", 98, warm_spell_duration_index, "MS", None],
+            ["tasmin", 2, cold_spell_duration_index, "MS", None],
+            ["pr", 99, days_over_precip_thresh, "MS", None],
+            ["pr", 98, fraction_over_precip_thresh, "MS", None],
+            ["pr", 98, fraction_over_precip_thresh, "MS", "standard"],
         ),
     )
-    def test_bootstrap(self, var, p, index, freq, cftime, use_dask, random):
+    def test_bootstrap(self, var, p, index, freq, calendar, use_dask, random):
         # -- GIVEN
         arr = self.ar1(
             alpha=0.8, n=int(4 * 365.25), random=random, positive_values=(var == "pr")
         )
         climate_var = _test_timeseries(
-            arr, start="2000-01-01", variable=var, cftime=cftime
+            arr, start="2000-01-01", variable=var, calendar=calendar
         )
         if use_dask:
             climate_var = climate_var.chunk(dict(time=50))

diff --git a/tests/test_calendar.py b/tests/test_calendar.py
@@ -482,13 +482,13 @@ def test_convert_doy():
     )
 
 
-@pytest.mark.parametrize("cftime", [True, False])
+@pytest.mark.parametrize("calendar", ["standard", None])
 @pytest.mark.parametrize(
     "w,s,m,f,ss",
     [(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)],
 )
-def test_stack_periods(tas_series, cftime, w, s, m, f, ss):
-    da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime)
+def test_stack_periods(tas_series, calendar, w, s, m, f, ss):
+    da = tas_series(np.arange(365 * 50), start="2000-01-01", calendar=calendar)
 
     da_stck = stack_periods(
         da, window=w, stride=s, min_length=m, freq=f, align_days=False
@@ -502,7 +502,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss):
 
 
 def test_stack_periods_special(tas_series):
-    da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01")
+    da = tas_series(np.arange(365 * 48 + 12), calendar="standard", start="2004-01-01")
 
     with pytest.raises(ValueError, match="unaligned day-of-year"):
         stack_periods(da)

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -179,10 +179,10 @@ def test_resample_map_passthrough(tas_series):
     assert not uses_dask(out)
 
 
-@pytest.mark.parametrize("cftime", [False, True])
-def test_make_hourly_temperature(tasmax_series, tasmin_series, cftime):
-    tasmax = tasmax_series(np.array([20]), units="degC", cftime=cftime)
-    tasmin = tasmin_series(np.array([0]), units="degC", cftime=cftime).expand_dims(
+@pytest.mark.parametrize("calendar", [None, "standard"])
+def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar):
+    tasmax = tasmax_series(np.array([20]), units="degC", calendar=calendar)
+    tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims(
         lat=[0]
     )
 

diff --git a/tests/test_sdba/test_adjustment.py b/tests/test_sdba/test_adjustment.py
@@ -842,7 +842,7 @@ def test_real_data(self, open_dataset):
             ref, hist, group=Grouper("time.dayofyear", window=31), nquantiles=quantiles
         )
 
-        scen = EQM.adjust(hist, interp="linear", extrapolation="constant")
+        scen = EQM.adjust(hist, extrapolation="constant")
 
         EX = ExtremeValues.train(ref, hist, cluster_thresh="1 mm/day", q_thresh=0.97)
         new_scen = EX.adjust(scen, hist, frac=0.000000001)

diff --git a/tests/test_sdba/test_base.py b/tests/test_sdba/test_base.py
@@ -50,11 +50,21 @@ def test_grouper_group(tas_series, group, window, nvals):
 
 
 @pytest.mark.parametrize(
-    "group,interp,val90",
-    [("time", False, True), ("time.month", False, 3), ("time.month", True, 3.5)],
+    "group,interp,val90,calendar",
+    [
+        ("time", False, True, None),
+        ("time.month", False, 3, None),
+        ("time.month", True, 3.5, None),
+        ("time.season", False, 1, None),
+        ("time.season", True, 0.8278688524590164, None),
+        ("time.month", True, 3.533333333333333, "360_day"),
+        ("time.month", True, 3.533333333333333, "noleap"),
+        ("time.season", True, 0.8444444444444444, "360_day"),
+        ("time.season", True, 0.8305936073059361, "noleap"),
+    ],
 )
-def test_grouper_get_index(tas_series, group, interp, val90):
-    tas = tas_series(np.ones(366), start="2000-01-01")
+def test_grouper_get_index(tas_series, group, interp, val90, calendar):
+    tas = tas_series(np.ones(366), start="2000-01-01", calendar=calendar)
     grouper = Grouper(group)
     indx = grouper.get_index(tas, interp=interp)
     # 90 is March 31st