Dewberry · sray014 · Jan 23, 2024 · Jan 24, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/.gitignore b/.gitignore
@@ -133,4 +133,5 @@ dmypy.json
 
 # DSS data and GRID files
 *.dss
-*.grid
+*.grid
+*.zarr
diff --git a/stormcloud/stormtyper/.gitignore b/stormcloud/stormtyper/.gitignore
@@ -0,0 +1,7 @@
+__pycache__/
+.env
+
+notebooks/
+
+*.json
+*.zarr
diff --git a/stormcloud/stormtyper/calc_zarr_stats.py b/stormcloud/stormtyper/calc_zarr_stats.py
@@ -0,0 +1,124 @@
+import xarray as xr
+import zarr
+import numpy as np
+import geopandas as gpd
+import json
+from typing import List, Dict, Any
+
+
+def clip_ds(ds_to_clip: xr.Dataset, sst_region: gpd.GeoDataFrame) -> xr.Dataset:
+    """
+    Clips the dataset to the region defined by a GeoDataFrame.
+    """
+    ds_new_coords = ds_to_clip.assign_coords(
+        lat=(("south_north", "west_east"), ds_to_clip["XLAT"].data),
+        lon=(("south_north", "west_east"), ds_to_clip["XLONG"].data),
+        west_east=ds_to_clip["XLONG"][0, :].data,
+        south_north=ds_to_clip["XLAT"][:, 0].data,
+    )
+
+    # Set spatial dimensions
+    spatial_ds = ds_new_coords.rio.set_spatial_dims(
+        x_dim="west_east", y_dim="south_north", inplace=True
+    )
+
+    spatial_ds = spatial_ds.rio.write_crs("EPSG:4326", inplace=True)
+    sst_region = sst_region.to_crs(spatial_ds.rio.crs)
+    clipped_ds = spatial_ds.rio.clip(sst_region.geometry.values, sst_region.crs)
+    return clipped_ds
+
+
+def get_stats(dataset: xr.Dataset) -> Dict[str, float]:
+    """
+    Computes statistics for the variable in a dataset, returning a dictionary with all the stats.
+    """
+
+    var_name = list(dataset.data_vars)[0]
+    data_array = dataset[var_name].astype("float32")
+
+    stats = {
+        "max": round(float(np.nanmax(data_array.values)), 4),
+        "min": round(float(np.nanmin(data_array.values)), 4),
+        "mean": round(float(np.nanmean(data_array.values)), 4),
+        "sum": round(float(np.nansum(data_array.values)), 4),
+        "std": round(float(np.nanstd(data_array.values)), 4),
+        "range": round(
+            float(np.nanmax(data_array.values) - np.nanmin(data_array.values)), 4
+        ),
+        "max_mean_diff": round(
+            float(np.nanmax(data_array.values) - np.nanmean(data_array.values)), 4
+        ),
+        "max_mean_ratio": round(
+            float(np.nanmax(data_array.values) / np.nanmean(data_array.values)), 4
+        ),
+    }
+
+    return stats
+
+
+def main(zarr_paths: List[str], sst_region_path: str) -> List[Dict[str, Any]]:
+    """
+    Processes Zarr datasets and computes daily statistics for the entire domain and a clipped region.
+    """
+
+    full_stats = []
+    sst_region = gpd.read_file(sst_region_path)
+
+    for zarr_path in zarr_paths:
+        var_name = zarr_path.split("_", 1)[1].rsplit(".", 1)[0]
+        zarr_store = zarr.open(zarr_path)
+
+        for group_name in zarr_store.keys():
+            ds = xr.open_zarr(zarr_path, group=group_name)
+            unique_days = np.unique(ds["time"].values.astype("datetime64[D]"))[:-1]
+
+            for day in unique_days:
+                start_of_day = np.datetime64(day, "ns")
+                end_of_day = start_of_day + np.timedelta64(1, "D")
+                day_str = str(day)
+
+                daily_ds = ds.sel(
+                    time=slice(start_of_day, end_of_day - np.timedelta64(1, "ns"))
+                )
+                clipped_ds = clip_ds(daily_ds, sst_region)
+
+                daily_domain_stats = get_stats(daily_ds)
+                daily_clipped_stats = get_stats(clipped_ds)
+
+                daily_stats_entry = {
+                    day_str: {
+                        var_name: {
+                            "domain": daily_domain_stats,
+                            "clipped": daily_clipped_stats,
+                        }
+                    }
+                }
+
+                existing_entry = next(
+                    (entry for entry in full_stats if day_str in entry), None
+                )
+                if existing_entry:
+                    existing_entry[day_str].update(daily_stats_entry[day_str])
+                else:
+                    full_stats.append(daily_stats_entry)
+
+    return full_stats
+
+
+if __name__ == "__main__":
+    zarr_paths = [
+        "Duwamish_SRH03.zarr",
+        "Duwamish_PSFC.zarr",
+        "Duwamish_IVT.zarr",
+        "Duwamish_PREC_ACC_NC.zarr",
+        "Duwamish_PWAT.zarr",
+        "Duwamish_SBCAPE.zarr",
+        "Duwamish_Z_50000Pa.zarr",
+    ]
+    sst_region_path = "duwamish-transpo-area-v01.geojson"
+    output_file = "example_stats.json"
+
+    full_stats = main(zarr_paths, sst_region_path)
+
+    with open(output_file, "w") as file:
+        json.dump(full_stats, file, indent=4)
diff --git a/stormcloud/stormtyper/constants.py b/stormcloud/stormtyper/constants.py
@@ -0,0 +1,17 @@
+M_TO_IN_FACTOR = 39.370
+MM_TO_IN_FACTOR = 25.4
+
+SOUTH_NORTH_DIM = "[550:1:1014]"
+WEST_EAST_DIM = "[0:1:400]"
+TIME_DIM = "[0:1:0]"
+
+SOUTH_NORTH_DIM_3D = "[550:1:1014]"
+WEST_EAST_DIM_3D = "[0:1:450]"
+
+WATERSHED_FILE_LOCATION = (
+    "s3://tempest/watersheds/duwamish/duwamish-transpo-area-v01.geojson"
+)
+
+
+URL_ROOT = "https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds559.0"
+TRINITY_IBTRACS_JSON = ''
diff --git a/stormcloud/stormtyper/conus404_utils.py b/stormcloud/stormtyper/conus404_utils.py
@@ -0,0 +1,168 @@
+import logging
+
+import xarray as xr
+import numpy as np
+from scipy.interpolate import interp1d
+
+
+# Copied over from wrf-python to avoid dependency
+def destagger(var, stagger_dim: int = 1, meta=False):
+    """Return the variable on the unstaggered grid.
+
+    This function destaggers the variable by taking the average of the
+    values located on either side of the grid box.
+
+    Args:
+
+        var (:class:`xarray.DataArray` or :class:`numpy.ndarray`): A variable
+            on a staggered grid.
+
+        stagger_dim (:obj:`int`): The dimension index to destagger.
+            Negative values can be used to choose dimensions referenced
+            from the right hand side (-1 is the rightmost dimension).
+
+        meta (:obj:`bool`, optional): Set to False to disable metadata and
+            return :class:`numpy.ndarray` instead of
+            :class:`xarray.DataArray`.  Default is False.
+
+    Returns:
+
+        :class:`xarray.DataArray` or :class:`numpy.ndarray`:
+        The destaggered variable.  If xarray is enabled and
+        the *meta* parameter is True, then the result will be a
+        :class:`xarray.DataArray` object.  Otherwise, the result will be a
+        :class:`numpy.ndarray` object with no metadata.
+
+    """
+    logging.debug("Destaggering Z grid")
+    var_shape = var.shape
+    num_dims = var.ndim
+    stagger_dim_size = var_shape[stagger_dim]
+
+    full_slice = slice(None)
+    slice1 = slice(0, stagger_dim_size - 1, 1)
+    slice2 = slice(1, stagger_dim_size, 1)
+
+    # default to full slices
+    dim_ranges_1 = [full_slice] * num_dims
+    dim_ranges_2 = [full_slice] * num_dims
+
+    # for the stagger dim, insert the appropriate slice range
+    dim_ranges_1[stagger_dim] = slice1
+    dim_ranges_2[stagger_dim] = slice2
+
+    result = 0.5 * (var[tuple(dim_ranges_1)] + var[tuple(dim_ranges_2)])
+
+    return result
+
+
+def interp_pressure_level(
+    pressure: xr.DataArray, height: xr.DataArray, target_pressure: float
+) -> float:
+    """
+    Interpolate height at a given pressure level using linear interpolation.
+
+    Args:
+    pressure (xr.DataArray): Array of pressure values.
+    height (xr.DataArray): Array of height values corresponding to the pressure values.
+    target_pressure (float): The pressure level at which height is to be interpolated.
+
+    Returns:
+    float: The interpolated height at target pressure.
+    """
+    interp_func = interp1d(pressure, height, bounds_error=False)
+    return interp_func(target_pressure)
+
+
+def calc_geopot_at_press_lvl(
+    pressure_da: xr.DataArray, z_unstag_da: xr.DataArray, pressure_level: float
+) -> xr.DataArray:
+    """
+    Calculate array of geopotential height at a given pressure level. Utilizes xarray's apply_ufunc to apply a custom interpolation function
+    across the entire pressure and height dataarrays.
+
+    Args:
+    pressure_da (xr.DataArray): DataArray of pressure values.
+    z_unstag_da (xr.DataArray): DataArray of unstaggered geopotential heights.
+    pressure_level (float): The target pressure level for calculating geopotential height.
+
+    Returns:
+    xr.DataArray: The calculated geopotential height at the specified pressure level.
+    """
+    calc_z_at_press_level = xr.apply_ufunc(
+        interp_pressure_level,
+        pressure_da,
+        z_unstag_da,
+        input_core_dims=[["bottom_top"], ["bottom_top"]],
+        kwargs={"target_pressure": pressure_level},
+        vectorize=True,  # Enable vectorized execution
+        output_dtypes=[float],
+    )
+    return calc_z_at_press_level
+
+
+def calculate_slp(
+    surface_pressure,
+    altitude,
+    sea_level_temp=288.15,
+    lapse_rate=0.0065,
+    gravity=9.80665,
+    gas_constant=287,
+):
+    """
+    Calculate sea level pressure from surface pressure for arrays of data.
+    Surface pressure must be in units hPa and altitude must be in meters.
+
+    Args:
+    - surface_pressure: array of surface pressure (in hPa)
+    - altitude: array of altitude at which the pressure is measured (in meters)
+    - sea_level_temp: standard temperature at sea level in kelvin (default 288.15 K)
+    - lapse_rate: standard temperature lapse rate (default 0.0065 K/m)
+    - gravity: acceleration due to gravity (default 9.80665 m/s^2)
+    - gas_constant: specific gas constant for dry air (default 287 J/kg/K)
+
+    Returns:
+    - Array of sea level pressure
+    """
+    temp_at_altitude = sea_level_temp - lapse_rate * altitude
+    slp = surface_pressure * (sea_level_temp / temp_at_altitude) ** (
+        gravity / (lapse_rate * gas_constant)
+    )
+    return slp
+
+
+def calculate_ivt(
+    u_wind: xr.DataArray,
+    v_wind: xr.DataArray,
+    pressure: xr.DataArray,
+    mixing_ratio: xr.DataArray,
+    vertical_dim: str = "bottom_top",
+) -> xr.Dataset:
+    """
+    Calculate the Integrated Vapor Transport (IVT) from wind components, pressure, and mixing ratio.
+    """
+
+    specific_humidity = mixing_ratio / (1 + mixing_ratio)
+
+    # Slice q, u, v to match dp dimensions
+    q_slice = specific_humidity.isel(bottom_top=slice(None, -1))
+    u_slice = u_wind.isel(bottom_top=slice(None, -1))
+    v_slice = v_wind.isel(bottom_top=slice(None, -1))
+
+    dp = pressure.diff(dim=vertical_dim)
+
+    ivt_x = -(1 / 9.81) * (q_slice * u_slice * dp).sum(dim=vertical_dim)
+    ivt_y = -(1 / 9.81) * (q_slice * v_slice * dp).sum(dim=vertical_dim)
+    ivt = np.sqrt(ivt_x**2 + ivt_y**2)
+
+    ivt_ds = xr.Dataset(
+        {
+            "IVT": (["Time", "south_north", "west_east"], ivt.data),
+        },
+        coords={
+            "Time": ivt["Time"],
+            "XLAT": ivt["XLAT"],
+            "XLONG": ivt["XLONG"],
+        },
+    )
+    return ivt_ds
-Original file line number
+Diff line change
@@ Expand Up / @@ -133,4 +133,5 @@ dmypy.json @@
     # DSS data and GRID files
     *.dss
-    *.grid
+    *.grid
+    *.zarr