Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stormtyper scripts and notebook #37

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b854a3e
Add stormtyper scripts and notebook
sray014 Jan 23, 2024
2c6a433
Add TS info, clean, Add papermill functionality
sray014 Jan 24, 2024
eec4281
Add wigits, create script to organize results
sray014 Jan 26, 2024
552114e
Add processes for final output, clean
sray014 Jan 26, 2024
164b089
Minor cleaning
sray014 Jan 30, 2024
cd45a34
Update precip plots
sray014 Jan 30, 2024
c47b8f6
Precip plot updates
sray014 Jan 31, 2024
7541437
Update run_notebooks to read dates from json
sray014 Jan 31, 2024
5882966
organize
sray014 Jan 31, 2024
d755504
Fix error with storm_type_analysis
sray014 Feb 8, 2024
fb604f7
Clean, add zarr script
sray014 Apr 8, 2024
7adce2c
Create C:\Users\sjanke\Code\stormcloud\stormcloud\stormtyper\notebooks
sray014 Apr 8, 2024
d53ed16
Delete stormcloud/stormtyper/C:\Users\sjanke\Code\stormcloud\stormclo…
sray014 Apr 8, 2024
e0cf131
Add files via upload
sray014 Apr 8, 2024
ab6c9f6
Add dask for parallelizing notebook creation
Nov 20, 2024
f95941d
Update requirements
Nov 20, 2024
a1be92d
Remove wrf library, add ivt function
Nov 20, 2024
e8729ee
Add constants.py
Nov 20, 2024
3f3c8a0
Add script to create zarrs for typing notebook and final stats
Nov 20, 2024
d45cf8d
Remove old file
Nov 20, 2024
e4d021c
Update script for new zarr inputs
Nov 20, 2024
aa6abe0
Update for new vars and storm types. Add multiday functionality
Nov 20, 2024
acdc31b
Fix issue with notebook conversion
Nov 20, 2024
1df47fd
Update for new zarr format, add ivt plotting function
Nov 20, 2024
1d93327
Update for new formats
Nov 20, 2024
66e32b5
Add script to calculate stats from var zarr files
Dec 31, 2024
b76b9f7
Update gitignores
Dec 31, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ dmypy.json

# DSS data and GRID files
*.dss
*.grid
*.grid
*.zarr
7 changes: 7 additions & 0 deletions stormcloud/stormtyper/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__pycache__/
.env

notebooks/

*.json
*.zarr
124 changes: 124 additions & 0 deletions stormcloud/stormtyper/calc_zarr_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import xarray as xr
import zarr
import numpy as np
import geopandas as gpd
import json
from typing import List, Dict, Any


def clip_ds(ds_to_clip: xr.Dataset, sst_region: gpd.GeoDataFrame) -> xr.Dataset:
"""
Clips the dataset to the region defined by a GeoDataFrame.
"""
ds_new_coords = ds_to_clip.assign_coords(
lat=(("south_north", "west_east"), ds_to_clip["XLAT"].data),
lon=(("south_north", "west_east"), ds_to_clip["XLONG"].data),
west_east=ds_to_clip["XLONG"][0, :].data,
south_north=ds_to_clip["XLAT"][:, 0].data,
)

# Set spatial dimensions
spatial_ds = ds_new_coords.rio.set_spatial_dims(
x_dim="west_east", y_dim="south_north", inplace=True
)

spatial_ds = spatial_ds.rio.write_crs("EPSG:4326", inplace=True)
sst_region = sst_region.to_crs(spatial_ds.rio.crs)
clipped_ds = spatial_ds.rio.clip(sst_region.geometry.values, sst_region.crs)
return clipped_ds


def get_stats(dataset: xr.Dataset) -> Dict[str, float]:
"""
Computes statistics for the variable in a dataset, returning a dictionary with all the stats.
"""

var_name = list(dataset.data_vars)[0]
data_array = dataset[var_name].astype("float32")

stats = {
"max": round(float(np.nanmax(data_array.values)), 4),
"min": round(float(np.nanmin(data_array.values)), 4),
"mean": round(float(np.nanmean(data_array.values)), 4),
"sum": round(float(np.nansum(data_array.values)), 4),
"std": round(float(np.nanstd(data_array.values)), 4),
"range": round(
float(np.nanmax(data_array.values) - np.nanmin(data_array.values)), 4
),
"max_mean_diff": round(
float(np.nanmax(data_array.values) - np.nanmean(data_array.values)), 4
),
"max_mean_ratio": round(
float(np.nanmax(data_array.values) / np.nanmean(data_array.values)), 4
),
}

return stats


def main(zarr_paths: List[str], sst_region_path: str) -> List[Dict[str, Any]]:
"""
Processes Zarr datasets and computes daily statistics for the entire domain and a clipped region.
"""

full_stats = []
sst_region = gpd.read_file(sst_region_path)

for zarr_path in zarr_paths:
var_name = zarr_path.split("_", 1)[1].rsplit(".", 1)[0]
zarr_store = zarr.open(zarr_path)

for group_name in zarr_store.keys():
ds = xr.open_zarr(zarr_path, group=group_name)
unique_days = np.unique(ds["time"].values.astype("datetime64[D]"))[:-1]

for day in unique_days:
start_of_day = np.datetime64(day, "ns")
end_of_day = start_of_day + np.timedelta64(1, "D")
day_str = str(day)

daily_ds = ds.sel(
time=slice(start_of_day, end_of_day - np.timedelta64(1, "ns"))
)
clipped_ds = clip_ds(daily_ds, sst_region)

daily_domain_stats = get_stats(daily_ds)
daily_clipped_stats = get_stats(clipped_ds)

daily_stats_entry = {
day_str: {
var_name: {
"domain": daily_domain_stats,
"clipped": daily_clipped_stats,
}
}
}

existing_entry = next(
(entry for entry in full_stats if day_str in entry), None
)
if existing_entry:
existing_entry[day_str].update(daily_stats_entry[day_str])
else:
full_stats.append(daily_stats_entry)

return full_stats


if __name__ == "__main__":
zarr_paths = [
"Duwamish_SRH03.zarr",
"Duwamish_PSFC.zarr",
"Duwamish_IVT.zarr",
"Duwamish_PREC_ACC_NC.zarr",
"Duwamish_PWAT.zarr",
"Duwamish_SBCAPE.zarr",
"Duwamish_Z_50000Pa.zarr",
]
sst_region_path = "duwamish-transpo-area-v01.geojson"
output_file = "example_stats.json"

full_stats = main(zarr_paths, sst_region_path)

with open(output_file, "w") as file:
json.dump(full_stats, file, indent=4)
17 changes: 17 additions & 0 deletions stormcloud/stormtyper/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
M_TO_IN_FACTOR = 39.370
MM_TO_IN_FACTOR = 25.4

SOUTH_NORTH_DIM = "[550:1:1014]"
WEST_EAST_DIM = "[0:1:400]"
TIME_DIM = "[0:1:0]"

SOUTH_NORTH_DIM_3D = "[550:1:1014]"
WEST_EAST_DIM_3D = "[0:1:450]"

WATERSHED_FILE_LOCATION = (
"s3://tempest/watersheds/duwamish/duwamish-transpo-area-v01.geojson"
)


URL_ROOT = "https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds559.0"
TRINITY_IBTRACS_JSON = ''
168 changes: 168 additions & 0 deletions stormcloud/stormtyper/conus404_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import logging

import xarray as xr
import numpy as np
from scipy.interpolate import interp1d


# Copied over from wrf-python to avoid dependency
def destagger(var, stagger_dim: int = 1, meta=False):
"""Return the variable on the unstaggered grid.

This function destaggers the variable by taking the average of the
values located on either side of the grid box.

Args:

var (:class:`xarray.DataArray` or :class:`numpy.ndarray`): A variable
on a staggered grid.

stagger_dim (:obj:`int`): The dimension index to destagger.
Negative values can be used to choose dimensions referenced
from the right hand side (-1 is the rightmost dimension).

meta (:obj:`bool`, optional): Set to False to disable metadata and
return :class:`numpy.ndarray` instead of
:class:`xarray.DataArray`. Default is False.

Returns:

:class:`xarray.DataArray` or :class:`numpy.ndarray`:
The destaggered variable. If xarray is enabled and
the *meta* parameter is True, then the result will be a
:class:`xarray.DataArray` object. Otherwise, the result will be a
:class:`numpy.ndarray` object with no metadata.

"""
logging.debug("Destaggering Z grid")
var_shape = var.shape
num_dims = var.ndim
stagger_dim_size = var_shape[stagger_dim]

full_slice = slice(None)
slice1 = slice(0, stagger_dim_size - 1, 1)
slice2 = slice(1, stagger_dim_size, 1)

# default to full slices
dim_ranges_1 = [full_slice] * num_dims
dim_ranges_2 = [full_slice] * num_dims

# for the stagger dim, insert the appropriate slice range
dim_ranges_1[stagger_dim] = slice1
dim_ranges_2[stagger_dim] = slice2

result = 0.5 * (var[tuple(dim_ranges_1)] + var[tuple(dim_ranges_2)])

return result


def interp_pressure_level(
pressure: xr.DataArray, height: xr.DataArray, target_pressure: float
) -> float:
"""
Interpolate height at a given pressure level using linear interpolation.

Args:
pressure (xr.DataArray): Array of pressure values.
height (xr.DataArray): Array of height values corresponding to the pressure values.
target_pressure (float): The pressure level at which height is to be interpolated.

Returns:
float: The interpolated height at target pressure.
"""
interp_func = interp1d(pressure, height, bounds_error=False)
return interp_func(target_pressure)


def calc_geopot_at_press_lvl(
pressure_da: xr.DataArray, z_unstag_da: xr.DataArray, pressure_level: float
) -> xr.DataArray:
"""
Calculate array of geopotential height at a given pressure level. Utilizes xarray's apply_ufunc to apply a custom interpolation function
across the entire pressure and height dataarrays.

Args:
pressure_da (xr.DataArray): DataArray of pressure values.
z_unstag_da (xr.DataArray): DataArray of unstaggered geopotential heights.
pressure_level (float): The target pressure level for calculating geopotential height.

Returns:
xr.DataArray: The calculated geopotential height at the specified pressure level.
"""
calc_z_at_press_level = xr.apply_ufunc(
interp_pressure_level,
pressure_da,
z_unstag_da,
input_core_dims=[["bottom_top"], ["bottom_top"]],
kwargs={"target_pressure": pressure_level},
vectorize=True, # Enable vectorized execution
output_dtypes=[float],
)
return calc_z_at_press_level


def calculate_slp(
surface_pressure,
altitude,
sea_level_temp=288.15,
lapse_rate=0.0065,
gravity=9.80665,
gas_constant=287,
):
"""
Calculate sea level pressure from surface pressure for arrays of data.
Surface pressure must be in units hPa and altitude must be in meters.

Args:
- surface_pressure: array of surface pressure (in hPa)
- altitude: array of altitude at which the pressure is measured (in meters)
- sea_level_temp: standard temperature at sea level in kelvin (default 288.15 K)
- lapse_rate: standard temperature lapse rate (default 0.0065 K/m)
- gravity: acceleration due to gravity (default 9.80665 m/s^2)
- gas_constant: specific gas constant for dry air (default 287 J/kg/K)

Returns:
- Array of sea level pressure
"""
temp_at_altitude = sea_level_temp - lapse_rate * altitude
slp = surface_pressure * (sea_level_temp / temp_at_altitude) ** (
gravity / (lapse_rate * gas_constant)
)
return slp


def calculate_ivt(
u_wind: xr.DataArray,
v_wind: xr.DataArray,
pressure: xr.DataArray,
mixing_ratio: xr.DataArray,
vertical_dim: str = "bottom_top",
) -> xr.Dataset:
"""
Calculate the Integrated Vapor Transport (IVT) from wind components, pressure, and mixing ratio.
"""

specific_humidity = mixing_ratio / (1 + mixing_ratio)

# Slice q, u, v to match dp dimensions
q_slice = specific_humidity.isel(bottom_top=slice(None, -1))
u_slice = u_wind.isel(bottom_top=slice(None, -1))
v_slice = v_wind.isel(bottom_top=slice(None, -1))

dp = pressure.diff(dim=vertical_dim)

ivt_x = -(1 / 9.81) * (q_slice * u_slice * dp).sum(dim=vertical_dim)
ivt_y = -(1 / 9.81) * (q_slice * v_slice * dp).sum(dim=vertical_dim)
ivt = np.sqrt(ivt_x**2 + ivt_y**2)

ivt_ds = xr.Dataset(
{
"IVT": (["Time", "south_north", "west_east"], ivt.data),
},
coords={
"Time": ivt["Time"],
"XLAT": ivt["XLAT"],
"XLONG": ivt["XLONG"],
},
)
return ivt_ds
Loading