From 30d31a2f739bdb01cdd9c186587e64e130d21728 Mon Sep 17 00:00:00 2001 From: irfan-farmer Date: Mon, 27 Jan 2025 14:21:53 +0100 Subject: [PATCH] Add Dockerfile with necessary packagesand example notebooks (#232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: irfan-farmer <“khan@dkrz-de”> --- .github/workflows/d4science-images.yml | 1 + single-user-d4science-dkrzwps/Dockerfile | 24 + .../notebooks/demo-rooki-subset-by-time.ipynb | 948 ++++++ ...rooki-subset-by-time.ipynb:Zone.Identifier | 4 + .../temperature_threshold_analysis_new.ipynb | 2659 +++++++++++++++++ ...reshold_analysis_new.ipynb:Zone.Identifier | 4 + 6 files changed, 3640 insertions(+) create mode 100644 single-user-d4science-dkrzwps/Dockerfile create mode 100644 single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb create mode 100644 single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb:Zone.Identifier create mode 100644 single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb create mode 100644 single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb:Zone.Identifier diff --git a/.github/workflows/d4science-images.yml b/.github/workflows/d4science-images.yml index 0230acf..6be3f68 100644 --- a/.github/workflows/d4science-images.yml +++ b/.github/workflows/d4science-images.yml @@ -28,6 +28,7 @@ jobs: "single-user-sobigdata-itineris-evs/Dockerfile" "single-user-sobigdata-itinerisclima/Dockerfile" "single-user-d4science-jericocore/Dockerfile" + "single-user-d4science-dkrzwps/Dockerfile" base-image: diff --git a/single-user-d4science-dkrzwps/Dockerfile b/single-user-d4science-dkrzwps/Dockerfile new file mode 100644 index 0000000..b49f45f --- /dev/null +++ b/single-user-d4science-dkrzwps/Dockerfile @@ -0,0 +1,24 @@ +FROM quay.io/jupyter/minimal-notebook:2024-04-19 + +# Install necessary Python packages using pip +RUN pip install --no-cache-dir \ + numpy \ + pandas \ + xarray \ + intake \ + intake-esm \ + folium \ + hvplot \ + requests \ + ipywidgets \ + aiohttp \ + pyproj \ + pyparsing \ + zarr \ + fsspec \ + geopy \ + rooki + +# Copy sample notebooks to the working directory +COPY notebooks/ /home/jovyan/work/ + diff --git a/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb b/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb new file mode 100644 index 0000000..a74845e --- /dev/null +++ b/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb @@ -0,0 +1,948 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run subset by time operation\n", + "\n", + "**Rooki** calls climate data operations on the **rook** processing service." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['ROOK_URL'] = 'http://rook.dkrz.de/wps'\n", + "\n", + "from rooki import rooki" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**parameters of subset operation**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[0;31mSignature:\u001b[0m\n", + "\u001b[0mrooki\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mcollection\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mpre_checked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mapply_fixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0moriginal_files\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mtime_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0marea\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mDocstring:\u001b[0m\n", + "Run subsetting on climate model data. Calls daops operators.\n", + "\n", + "Parameters\n", + "----------\n", + "collection : string\n", + " A dataset identifier or list of comma separated identifiersExample: c3s-cmip5.output1.ICHEC.EC-EARTH.historical.day.atmos.day.r1i1p1.tas.latest\n", + "time : string\n", + " The time interval (start/end) to subset over separated by '/' or a list of time points separated by ','. The format is according to the ISO-8601 standard. Example: 1860-01-01/1900-12-30 or 1860-01-01, 1870-01-01, 1880-01-01\n", + "time_components : string\n", + " Optional time components to describe parts of the time period (e.g. year, month and day). Example: month:01,02,03 or year:1970,1980|month:01,02,03\n", + "area : string\n", + " The area to subset over as 4 comma separated values.Example: 0.,49.,10.,65\n", + "level : string\n", + " The level range to subset over separated by a / or a list of level values separated by ','.Example: 1000/2000 or 1000, 2000, 3000\n", + "pre_checked : boolean\n", + " Use checked data only.\n", + "apply_fixes : boolean\n", + " Apply fixes to datasets.\n", + "original_files : boolean\n", + " Return original files only.\n", + "\n", + "Returns\n", + "-------\n", + "output : ComplexData:mimetype:`application/metalink+xml; version=4.0`\n", + " Metalink v4 document with references to NetCDF files.\n", + "prov : ComplexData:mimetype:`application/json`\n", + " Provenance document using W3C standard.\n", + "prov_plot : ComplexData:mimetype:`image/png`\n", + " Provenance document as diagram.\n", + "\u001b[0;31mFile:\u001b[0m ~/\n", + "\u001b[0;31mType:\u001b[0m method" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rooki.subset?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**data inventory**\n", + "\n", + "https://github.com/cp4cds/c3s_34g_manifests/tree/master/inventories\n", + "\n", + "using:\n", + "https://github.com/cp4cds/c3s_34g_manifests/blob/master/inventories/c3s-cmip6/c3s-cmip6_v20210126.yml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**run subset**" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resp = rooki.subset(\n", + " collection='c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.day.tas.gr1.v20190619',\n", + " time='2016-01-01/2016-12-30',\n", + ")\n", + "resp.ok" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**show metalink output**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://rook7.cloud.dkrz.de:80/outputs/rook/9acf3060-d80e-11ef-8c79-fa163eb671ca/input.meta4'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resp.url" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + " 2025-01-21T16:44:35Z\n", + " PyWPS/4.5.2\n", + "\n", + " \n", + " NetCDF file\n", + " 18951905\n", + " http://rook7.cloud.dkrz.de:80/outputs/rook/9d9dec78-d80e-11ef-87be-fa163eb671ca/tas_day_INM-CM5-0_ssp245_r1i1p1f1_gr1_20160101-20161230.nc\n", + " \n", + " \n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(resp.xml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Size in MBytes**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18.07394504547119" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resp.size_in_mb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**URLs in metalink document ...**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://rook7.cloud.dkrz.de:80/outputs/rook/9d9dec78-d80e-11ef-87be-fa163eb671ca/tas_day_INM-CM5-0_ssp245_r1i1p1f1_gr1_20160101-20161230.nc']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resp.download_urls()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**download files ...**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading to /tmp/metalink_r3b75l6o/tas_day_INM-CM5-0_ssp245_r1i1p1f1_gr1_20160101-20161230.nc.\n" + ] + }, + { + "data": { + "text/plain": [ + "['/tmp/metalink_r3b75l6o/tas_day_INM-CM5-0_ssp245_r1i1p1f1_gr1_20160101-20161230.nc']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resp.download()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**... and open with xarray**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Already downloaded tas_day_INM-CM5-0_ssp245_r1i1p1f1_gr1_20160101-20161230.nc.\n" + ] + } + ], + "source": [ + "dsets = resp.datasets()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 31MB\n",
+       "Dimensions:    (time: 364, bnds: 2, lat: 120, lon: 180)\n",
+       "Coordinates:\n",
+       "  * time       (time) object 3kB 2016-01-01 12:00:00 ... 2016-12-30 12:00:00\n",
+       "  * lat        (lat) float64 960B -89.25 -87.75 -86.25 ... 86.25 87.75 89.25\n",
+       "  * lon        (lon) float64 1kB 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n",
+       "    height     float64 8B ...\n",
+       "Dimensions without coordinates: bnds\n",
+       "Data variables:\n",
+       "    time_bnds  (time, bnds) object 6kB ...\n",
+       "    lat_bnds   (lat, bnds) float64 2kB ...\n",
+       "    lon_bnds   (lon, bnds) float64 3kB ...\n",
+       "    tas        (time, lat, lon) float32 31MB ...\n",
+       "Attributes: (12/47)\n",
+       "    Conventions:            CF-1.7 CMIP-6.2\n",
+       "    activity_id:            ScenarioMIP\n",
+       "    branch_method:          standard\n",
+       "    branch_time_in_child:   60225.0\n",
+       "    branch_time_in_parent:  60225.0\n",
+       "    contact:                Evgeny Volodin (volodinev@gmail.com)\n",
+       "    ...                     ...\n",
+       "    title:                  INM-CM5-0 output prepared for CMIP6\n",
+       "    tracking_id:            hdl:21.14100/89cab4a2-c867-4bb5-a215-622f969cd844\n",
+       "    variable_id:            tas\n",
+       "    variant_label:          r1i1p1f1\n",
+       "    license:                CMIP6 model data produced by Lawrence Livermore P...\n",
+       "    cmor_version:           3.4.0
" + ], + "text/plain": [ + " Size: 31MB\n", + "Dimensions: (time: 364, bnds: 2, lat: 120, lon: 180)\n", + "Coordinates:\n", + " * time (time) object 3kB 2016-01-01 12:00:00 ... 2016-12-30 12:00:00\n", + " * lat (lat) float64 960B -89.25 -87.75 -86.25 ... 86.25 87.75 89.25\n", + " * lon (lon) float64 1kB 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n", + " height float64 8B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " time_bnds (time, bnds) object 6kB ...\n", + " lat_bnds (lat, bnds) float64 2kB ...\n", + " lon_bnds (lon, bnds) float64 3kB ...\n", + " tas (time, lat, lon) float32 31MB ...\n", + "Attributes: (12/47)\n", + " Conventions: CF-1.7 CMIP-6.2\n", + " activity_id: ScenarioMIP\n", + " branch_method: standard\n", + " branch_time_in_child: 60225.0\n", + " branch_time_in_parent: 60225.0\n", + " contact: Evgeny Volodin (volodinev@gmail.com)\n", + " ... ...\n", + " title: INM-CM5-0 output prepared for CMIP6\n", + " tracking_id: hdl:21.14100/89cab4a2-c867-4bb5-a215-622f969cd844\n", + " variable_id: tas\n", + " variant_label: r1i1p1f1\n", + " license: CMIP6 model data produced by Lawrence Livermore P...\n", + " cmor_version: 3.4.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = dsets[0]\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Conventions': 'CF-1.7 CMIP-6.2',\n", + " 'activity_id': 'ScenarioMIP',\n", + " 'branch_method': 'standard',\n", + " 'branch_time_in_child': np.float64(60225.0),\n", + " 'branch_time_in_parent': np.float64(60225.0),\n", + " 'contact': 'Evgeny Volodin (volodinev@gmail.com)',\n", + " 'creation_date': '2019-06-18T15:27:34Z',\n", + " 'data_specs_version': '01.00.29',\n", + " 'experiment': 'update of RCP4.5 based on SSP2',\n", + " 'experiment_id': 'ssp245',\n", + " 'external_variables': 'areacella',\n", + " 'forcing_index': np.int32(1),\n", + " 'frequency': 'day',\n", + " 'further_info_url': 'https://furtherinfo.es-doc.org/CMIP6.INM.INM-CM5-0.ssp245.none.r1i1p1f1',\n", + " 'grid': 'gs2x1.5',\n", + " 'grid_label': 'gr1',\n", + " 'history': '2019-06-18T15:27:33Z ;rewrote data to be consistent with ScenarioMIP for variable clt found in table day.',\n", + " 'initialization_index': np.int32(1),\n", + " 'institution': 'Institute for Numerical Mathematics, Russian Academy of Science, Moscow 119991, Russia',\n", + " 'institution_id': 'INM',\n", + " 'mip_era': 'CMIP6',\n", + " 'nominal_resolution': '100 km',\n", + " 'parent_activity_id': 'CMIP',\n", + " 'parent_experiment_id': 'historical',\n", + " 'parent_mip_era': 'CMIP6',\n", + " 'parent_source_id': 'INM-CM5-0',\n", + " 'parent_time_units': 'days since 1850-01-01',\n", + " 'parent_variant_label': 'r1i1p1f1',\n", + " 'physics_index': np.int32(1),\n", + " 'product': 'model-output',\n", + " 'realization_index': np.int32(1),\n", + " 'realm': 'atmos',\n", + " 'references': 'RJNAMM 2018, 367-374. DOI: 10.1515/rnam-2018-0032',\n", + " 'run_variant': 'standard',\n", + " 'source': 'INM-CM5-0 (2016): \\naerosol: INM-AER1\\natmos: INM-AM5-0 (2x1.5; 180 x 120 longitude/latitude; 73 levels; top level sigma = 0.0002)\\natmosChem: none\\nland: INM-LND1\\nlandIce: none\\nocean: INM-OM5 (North Pole shifted to 60N, 90E. 0.5x0.25; 720 x 720 longitude/latitude; 40 levels; vertical sigma coordinate)\\nocnBgchem: none\\nseaIce: INM-ICE1',\n", + " 'source_id': 'INM-CM5-0',\n", + " 'source_type': 'AOGCM AER',\n", + " 'sub_experiment': 'none',\n", + " 'sub_experiment_id': 'none',\n", + " 'table_id': 'day',\n", + " 'table_info': 'Creation Date:(20 February 2019) MD5:951084b632bd52c3f6224e495b1cb65e',\n", + " 'title': 'INM-CM5-0 output prepared for CMIP6',\n", + " 'tracking_id': 'hdl:21.14100/89cab4a2-c867-4bb5-a215-622f969cd844',\n", + " 'variable_id': 'tas',\n", + " 'variant_label': 'r1i1p1f1',\n", + " 'license': 'CMIP6 model data produced by Lawrence Livermore PCMDI is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.',\n", + " 'cmor_version': '3.4.0'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.attrs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**provenance**" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://rook7.cloud.dkrz.de:80/outputs/rook/9acf3060-d80e-11ef-8c79-fa163eb671ca/provenance.png'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prov_plot_url = resp.provenance_image()\n", + "prov_plot_url" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Image\n", + "Image(prov_plot_url)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "WPS Environment", + "language": "python", + "name": "wps_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb:Zone.Identifier b/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb:Zone.Identifier new file mode 100644 index 0000000..5d9065f --- /dev/null +++ b/single-user-d4science-dkrzwps/notebooks/demo-rooki-subset-by-time.ipynb:Zone.Identifier @@ -0,0 +1,4 @@ +[ZoneTransfer] +ZoneId=3 +ReferrerUrl=https://jupyterhub.d4science.org/user/khan/lab/tree/demo-rooki-subset-by-time.ipynb +HostUrl=https://jupyterhub.d4science.org/user/khan/files/demo-rooki-subset-by-time.ipynb?_xsrf=2%7Cfe84d425%7Cbf04ba7801193c7745b06b35eb348b1e%7C1736772124 diff --git a/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb b/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb new file mode 100644 index 0000000..8be633c --- /dev/null +++ b/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb @@ -0,0 +1,2659 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#
Analyze and Visualize Data with Intake-ESM, XArray and hvPlot
\n", + "##
A Climate Data Use Case
\n", + "\n", + "We will show here how to count the annual summer days for a particular geolocation of your choice using the results of a climate model, in particular, we can chose one of the historical or one of the shared socioeconomic pathway (ssp) experiments of the Coupled Model Intercomparison Project [CMIP6](https://pcmdi.llnl.gov/CMIP6/).\n", + "\n", + "This Jupyter notebook is meant to run in the Jupyterhub server of the German Climate Computing Center [DKRZ](https://www.dkrz.de/) which is an [ESGF](https://esgf.llnl.gov/) repository that hosts 4 petabytes of CMIP6 data. Please, choose the Python 3 unstable kernel on the Kernel tab above, it contains all the common geoscience packages. See more information on how to run Jupyter notebooks at DKRZ [here](https://www.dkrz.de/up/systems/mistral/programming/jupyter-notebook). Find there how to run this Jupyter notebook in the DKRZ server out of the Jupyterhub, which will entail that you create the environment accounting for the required package dependencies. Running this Jupyter notebook in your premise, which is also known as [client-side](https://en.wikipedia.org/wiki/Client-side) computing, will also require that you install the necessary packages on you own but it will anyway fail because you will not have direct access to the data pool. Direct access to the data pool is one of the main benefits of the [server-side](https://en.wikipedia.org/wiki/Server-side) data-near computing we demonstrate in this use case. \n", + "\n", + "Thanks to the data and computer scientists Marco Kulüke, Fabian Wachsmann, Regina Kwee-Hinzmann, Caroline Arnold, Felix Stiehler, Maria Moreno, and Stephan Kindermann at DKRZ for their contribution to this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this use case you will learn the following:\n", + "- How to access a dataset from the DKRZ CMIP6 model data archive\n", + "- How to count the annual number of summer days for a particular geolocation using this model dataset\n", + "- How to visualize the results\n", + "\n", + "\n", + "You will use:\n", + "- [Intake](https://github.com/intake/intake) for finding the data in the catalog of the DKRZ archive \n", + "- [Xarray](http://xarray.pydata.org/en/stable/) for loading and processing the data \n", + "- [Pandas](https://pandas.pydata.org/) for structuring the data \n", + "- [hvPlot](https://hvplot.holoviz.org/index.html) for visualizing the data in the Jupyter notebook and save the plots on your local computer " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 0. Load Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " const force = true;\n", + " const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " const reloading = false;\n", + " const Bokeh = root.Bokeh;\n", + "\n", + " // Set a timeout for this load but only if we are not already initializing\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " // Don't load bokeh if it is still initializing\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " // There is nothing to load\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error(e) {\n", + " const src_el = e.srcElement\n", + " console.error(\"failed to load \" + (src_el.href || src_el.src));\n", + " }\n", + "\n", + " const skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", + " root._bokeh_is_loading = css_urls.length + 0;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " const existing_stylesheets = []\n", + " const links = document.getElementsByTagName('link')\n", + " for (let i = 0; i < links.length; i++) {\n", + " const link = links[i]\n", + " if (link.href != null) {\n", + " existing_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (let i = 0; i < css_urls.length; i++) {\n", + " const url = css_urls[i];\n", + " const escaped = encodeURI(url)\n", + " if (existing_stylesheets.indexOf(escaped) !== -1) {\n", + " on_load()\n", + " continue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } var existing_scripts = []\n", + " const scripts = document.getElementsByTagName('script')\n", + " for (let i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + " existing_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (let i = 0; i < js_urls.length; i++) {\n", + " const url = js_urls[i];\n", + " const escaped = encodeURI(url)\n", + " if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n", + " if (!window.requirejs) {\n", + " on_load();\n", + " }\n", + " continue;\n", + " }\n", + " const element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (let i = 0; i < js_modules.length; i++) {\n", + " const url = js_modules[i];\n", + " const escaped = encodeURI(url)\n", + " if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n", + " if (!window.requirejs) {\n", + " on_load();\n", + " }\n", + " continue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " const url = js_exports[name];\n", + " const escaped = encodeURI(url)\n", + " if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n", + " if (!window.requirejs) {\n", + " on_load();\n", + " }\n", + " continue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " const js_urls = [\"https://cdn.holoviz.org/panel/1.5.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.5/dist/panel.min.js\"];\n", + " const js_modules = [];\n", + " const js_exports = {};\n", + " const css_urls = [];\n", + " const inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (let i = 0; i < inline_js.length; i++) {\n", + " try {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " } catch(e) {\n", + " if (!reloading) {\n", + " throw e;\n", + " }\n", + " }\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + " var NewBokeh = root.Bokeh;\n", + " if (Bokeh.versions === undefined) {\n", + " Bokeh.versions = new Map();\n", + " }\n", + " if (NewBokeh.version !== Bokeh.version) {\n", + " Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + " }\n", + " root.Bokeh = Bokeh;\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " // If the timeout and bokeh was not successfully loaded we reset\n", + " // everything and try loading again\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " root._bokeh_is_loading = 0\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n", + " if (!reloading && !bokeh_loaded) {\n", + " if (root.Bokeh) {\n", + " root.Bokeh = undefined;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.5.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.5/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " }) \n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "57a87336-72e6-4688-ac0f-92aa2a4c608e" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np # fundamental package for scientific computing\n", + "import pandas as pd # data analysis and manipulation tool\n", + "import xarray as xr # handling labelled multi-dimensional arrays\n", + "import intake # to find data in a catalog, this notebook explains how it works\n", + "from ipywidgets import widgets # to use widgets in the Jupyer Notebook\n", + "from geopy.geocoders import Nominatim # Python client for several popular geocoding web services\n", + "import folium # visualization tool for maps\n", + "import hvplot.pandas # visualization tool for interactive plots\n", + "import fsspec # unify various projects and classes to work with remote filesystems andfile-system-like abstractions using a standard pythonic interface\n", + "import zarr\n", + "import intake_esm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Which dataset do we need? -> Place, and Year\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feel free to change the default values.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1bb6a02052174481a52f47aab8388b1d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Text(value='Hamburg', description='Enter place:')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7f1b3ef990384d61a0f7aedf6b25ee82", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Dropdown(description='Select year: ', options=(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 202…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Produce the widget where we can select what geolocation and year are interested on \n", + "\n", + "print(\"Feel free to change the default values.\")\n", + "\n", + "place_box = widgets.Text(description=\"Enter place:\", value=\"Hamburg\")\n", + "display(place_box)\n", + "\n", + "year_box = widgets.Dropdown(options=range(2015, 2101), description=\"Select year: \", disabled=False,)\n", + "display(year_box)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 Find Coordinates of chosen Place\n", + "If ambiguous, the most likely coordinates will be chosen, e.g. \"Hamburg\" results in \"Hamburg, 20095, Deutschland\", (53.55 North, 10.00 East)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hamburg, Deutschland\n", + "(53.550341, 10.000654)\n" + ] + } + ], + "source": [ + "# We use the module Nominatim, which gives us the geographical coordinates of the place we selected above\n", + "\n", + "geolocator = Nominatim(user_agent=\"any_agent\")\n", + "location = geolocator.geocode(place_box.value)\n", + "\n", + "print(location.address)\n", + "print((location.latitude, location.longitude))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### 1.2 Show Place on a Map" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# We use the folium package to plot our selected geolocation in a map\n", + "\n", + "m = folium.Map(location=[location.latitude, location.longitude])\n", + "tooltip = location.latitude, location.longitude\n", + "folium.Marker([location.latitude, location.longitude], tooltip=tooltip).add_to(m)\n", + "display(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have defined the place and time. Now, we can search for the climate model dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Intake Catalog\n", + "Similar to the shopping catalog at your favorite online bookstore, the intake catalog contains information (e.g. model, variables, and time range) about each dataset (the title, author, and number of pages of the book, for instance) that you can access before loading the data. It means that thanks to the catalog, you can find where is the book just by using some keywords and you do not need to hold it in your hand to know the number of pages, for instance.\n", + "\n", + "### 2.1 Load the Intake Catalog\n", + "We load the catalog descriptor with the intake package. The catalog is updated daily. The catalog descriptor is created by the DKRZ developers that manage the catalog, you do not need to care so much about it, knowing where it is and loading it is enough:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.12/site-packages/intake_esm/cat.py:249: DtypeWarning: Columns (21,22,23) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\n" + ] + } + ], + "source": [ + "# Open the catalog with the intake package and name it \"col\" as short for \"collection\"\n", + "col=intake.open_esm_datastore(\"https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/dkrz_cmip6_disk.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['activity_id', 'institution_id', 'source_id', 'experiment_id',\n", + " 'member_id', 'table_id', 'variable_id', 'grid_label', 'dcpp_init_year',\n", + " 'version', 'time_range', 'path', 'opendap_url', 'project',\n", + " 'simulation_id', 'grid_id', 'frequency', 'time_reduction', 'long_name',\n", + " 'units', 'realm', 'level_type', 'time_min', 'time_max', 'format',\n", + " 'uri'],\n", + " dtype='object')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col.df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what is inside the intake catalog. The underlying data base is given as a pandas dataframe which we can access with \"col.df\". Then, \"col.df.head()\" shows us the first rows of the table of the catalog." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This catalog contains all datasets of the CMIP6 archive at DKRZ. In the next step we narrow the results down by chosing a model and variable." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2 Browse the Intake Catalog\n", + "In this example we chose the Max-Planck Earth System Model in High Resolution Mode (\"MPI-ESM1-2-HR\") and the maximum temperature near surface (\"tasmax\") as variable. We also choose an experiment. CMIP6 comprises several kind of experiments. Each experiment has various simulation members. you can find more information in the [CMIP6 Model and Experiment Documentation](https://pcmdi.llnl.gov/CMIP6/Guide/dataUsers.html#5-model-and-experiment-documentation)." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activity_idinstitution_idsource_idexperiment_idmember_idtable_idvariable_idgrid_labeldcpp_init_yearversion...frequencytime_reductionlong_nameunitsrealmlevel_typetime_mintime_maxformaturi
0CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN18500101.018541231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...
1CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN18550101.018591231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...
2CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN18600101.018641231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...
3CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN18650101.018691231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...
4CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN18700101.018741231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...
..................................................................
648ScenarioMIPDKRZMPI-ESM1-2-HRssp585r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN20800101.020841231netcdf/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ...
649ScenarioMIPDKRZMPI-ESM1-2-HRssp585r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN20850101.020891231netcdf/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ...
650ScenarioMIPDKRZMPI-ESM1-2-HRssp585r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN20900101.020941231netcdf/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ...
651ScenarioMIPDKRZMPI-ESM1-2-HRssp585r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN20950101.020991231netcdf/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ...
652ScenarioMIPDKRZMPI-ESM1-2-HRssp585r1i1p1f1daytasmaxgnNaNv20190710...daymaximumDaily Maximum Near-Surface Air TemperatureKatmosNaN21000101.021001231netcdf/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ...
\n", + "

653 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " activity_id institution_id source_id experiment_id member_id \\\n", + "0 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 \n", + "1 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 \n", + "2 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 \n", + "3 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 \n", + "4 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 \n", + ".. ... ... ... ... ... \n", + "648 ScenarioMIP DKRZ MPI-ESM1-2-HR ssp585 r1i1p1f1 \n", + "649 ScenarioMIP DKRZ MPI-ESM1-2-HR ssp585 r1i1p1f1 \n", + "650 ScenarioMIP DKRZ MPI-ESM1-2-HR ssp585 r1i1p1f1 \n", + "651 ScenarioMIP DKRZ MPI-ESM1-2-HR ssp585 r1i1p1f1 \n", + "652 ScenarioMIP DKRZ MPI-ESM1-2-HR ssp585 r1i1p1f1 \n", + "\n", + " table_id variable_id grid_label dcpp_init_year version ... frequency \\\n", + "0 day tasmax gn NaN v20190710 ... day \n", + "1 day tasmax gn NaN v20190710 ... day \n", + "2 day tasmax gn NaN v20190710 ... day \n", + "3 day tasmax gn NaN v20190710 ... day \n", + "4 day tasmax gn NaN v20190710 ... day \n", + ".. ... ... ... ... ... ... ... \n", + "648 day tasmax gn NaN v20190710 ... day \n", + "649 day tasmax gn NaN v20190710 ... day \n", + "650 day tasmax gn NaN v20190710 ... day \n", + "651 day tasmax gn NaN v20190710 ... day \n", + "652 day tasmax gn NaN v20190710 ... day \n", + "\n", + " time_reduction long_name units realm \\\n", + "0 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "1 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "2 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "3 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "4 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + ".. ... ... ... ... \n", + "648 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "649 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "650 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "651 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "652 maximum Daily Maximum Near-Surface Air Temperature K atmos \n", + "\n", + " level_type time_min time_max format \\\n", + "0 NaN 18500101.0 18541231 netcdf \n", + "1 NaN 18550101.0 18591231 netcdf \n", + "2 NaN 18600101.0 18641231 netcdf \n", + "3 NaN 18650101.0 18691231 netcdf \n", + "4 NaN 18700101.0 18741231 netcdf \n", + ".. ... ... ... ... \n", + "648 NaN 20800101.0 20841231 netcdf \n", + "649 NaN 20850101.0 20891231 netcdf \n", + "650 NaN 20900101.0 20941231 netcdf \n", + "651 NaN 20950101.0 20991231 netcdf \n", + "652 NaN 21000101.0 21001231 netcdf \n", + "\n", + " uri \n", + "0 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... \n", + "1 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... \n", + "2 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... \n", + "3 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... \n", + "4 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... \n", + ".. ... \n", + "648 /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ... \n", + "649 /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ... \n", + "650 /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ... \n", + "651 /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ... \n", + "652 /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ... \n", + "\n", + "[653 rows x 26 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Store the name of the model we chose in a variable named \"climate_model\"\n", + "\n", + "climate_model = \"MPI-ESM1-2-HR\" # here we choose Max-Plack Institute's Earth Sytem Model in high resolution\n", + "\n", + "# This is how we tell intake what data we want\n", + "\n", + "query = dict(\n", + " source_id = climate_model, # the model\n", + "# experiment_id = \"ssp370\",\n", + " member_id = \"r1i1p1f1\",\n", + " variable_id = \"tasmax\", # temperature at surface, maximum\n", + " table_id = \"day\", # daily maximum\n", + ")\n", + "\n", + "# Intake looks for the query we just defined in the catalog of the CMIP6 data pool at DKRZ\n", + "cat = col.search(**query)\n", + "\n", + "# Show query results\n", + "cat.df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The result of the query are like the list of results you get when you search for articles in the internet by writing keywords in your search engine (Duck duck go, Ecosia, Google,...). Thanks to the intake package, we did not need to know the path of each dataset, just selecting some keywords (the model name, the variable,...) was enough to obtain the results. If advance users are still interested in the location of the data inside the DKRZ archive, intake also provides the path and the OpenDAP URL (see the last columns above). \n", + "\n", + "\n", + "Now we will find which file in the dataset contains our selected year so in the next section we can just load that specific file and not the whole dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3 Find the Dataset That Contains the Year You Selected in Drop Down Menu Above" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activity_idinstitution_idsource_idexperiment_idmember_idtable_idvariable_idgrid_labeldcpp_init_yearversion...long_nameunitsrealmlevel_typetime_mintime_maxformaturistart_yearend_year
0CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...Daily Maximum Near-Surface Air TemperatureKatmosNaN18500101.018541231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...18501854
1CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...Daily Maximum Near-Surface Air TemperatureKatmosNaN18550101.018591231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...18551859
2CMIPMPI-MMPI-ESM1-2-HR1pctCO2r1i1p1f1daytasmaxgnNaNv20190710...Daily Maximum Near-Surface Air TemperatureKatmosNaN18600101.018641231netcdf/work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E...18601864
\n", + "

3 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " activity_id institution_id source_id experiment_id member_id table_id \\\n", + "0 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 day \n", + "1 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 day \n", + "2 CMIP MPI-M MPI-ESM1-2-HR 1pctCO2 r1i1p1f1 day \n", + "\n", + " variable_id grid_label dcpp_init_year version ... \\\n", + "0 tasmax gn NaN v20190710 ... \n", + "1 tasmax gn NaN v20190710 ... \n", + "2 tasmax gn NaN v20190710 ... \n", + "\n", + " long_name units realm level_type \\\n", + "0 Daily Maximum Near-Surface Air Temperature K atmos NaN \n", + "1 Daily Maximum Near-Surface Air Temperature K atmos NaN \n", + "2 Daily Maximum Near-Surface Air Temperature K atmos NaN \n", + "\n", + " time_min time_max format \\\n", + "0 18500101.0 18541231 netcdf \n", + "1 18550101.0 18591231 netcdf \n", + "2 18600101.0 18641231 netcdf \n", + "\n", + " uri start_year end_year \n", + "0 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... 1850 1854 \n", + "1 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... 1855 1859 \n", + "2 /work/ik1017/CMIP6/data/CMIP6/CMIP/MPI-M/MPI-E... 1860 1864 \n", + "\n", + "[3 rows x 27 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a copy of cat.df, thus further modifications do not affect it\n", + "\n", + "query_result_df = cat.df.copy() # new dataframe\n", + "\n", + "\n", + "# Each dataset contains many files, extract the initial and final year of each file\n", + "\n", + "query_result_df[\"start_year\"] = query_result_df[\"time_range\"].str[0:4].astype(int) # add column with start year\n", + "query_result_df[\"end_year\"] = query_result_df[\"time_range\"].str[9:13].astype(int) # add column with end year\n", + "\n", + "\n", + "# Delete the time range column\n", + "\n", + "query_result_df.drop(columns=[\"time_range\"], inplace = True) # \"inplace = False\" will drop the column in the view but not in the actual dataframe\n", + "query_result_df.iloc[0:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/MPI-M/MPI-ESM1-2-HR/piControl/r1i1p1f1/day/tasmax/gn/v20190710/tasmax_day_MPI-ESM1-2-HR_piControl_r1i1p1f1_gn_20150101-20191231.nc'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the file that contains the year we selected in the drop down menu above, e.g. 2015\n", + "\n", + "selected_file = query_result_df[(year_box.value >= query_result_df[\"start_year\"]) & (\n", + " year_box.value <= query_result_df[\"end_year\"])]\n", + "\n", + "\n", + "# Path of the file that contains the selected year\n", + "\n", + "selected_path = selected_file[\"opendap_url\"].values[0] \n", + "\n", + "\n", + "# Show the path of the file that contains the selected year\n", + "\n", + "selected_path" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloaded: tasmax_day_MPI-ESM1-2-HR_piControl_r1i1p1f1_gn_20150101-20191231.nc\n", + "Dataset loaded successfully.\n", + "Data for the selected year sliced successfully.\n" + ] + } + ], + "source": [ + "# This script dynamically constructs fileServer URLs and fetches data for loading into the model.\n", + "import os\n", + "import requests\n", + "import xarray as xr\n", + "\n", + "# Define the HTTP download function\n", + "def download_data(url, save_path):\n", + " \"\"\"Downloads data from an HTTP URL and saves it to a specified path.\"\"\"\n", + " response = requests.get(url, stream=True)\n", + " if response.status_code == 200:\n", + " with open(save_path, 'wb') as f:\n", + " for chunk in response.iter_content(chunk_size=8192):\n", + " f.write(chunk)\n", + " print(f\"Downloaded: {save_path}\")\n", + " else:\n", + " raise Exception(f\"Failed to download data. Status code: {response.status_code}\")\n", + "\n", + "# Function to fetch and load data based on the selected year\n", + "def fetch_and_load_data(selected_file):\n", + " \"\"\"Fetches the data file for the selected year and loads it.\"\"\"\n", + " # Extract the OPeNDAP URL\n", + " opendap_url = selected_file[\"opendap_url\"].values[0]\n", + "\n", + " # Construct the fileServer URL from the OPeNDAP URL\n", + " file_server_url = opendap_url.replace(\"dodsC\", \"fileServer\")\n", + "\n", + " # Extract the file name from the URL\n", + " save_file = os.path.basename(file_server_url)\n", + "\n", + " # Download the file\n", + " download_data(file_server_url, save_file)\n", + "\n", + " # Load the data using xarray\n", + " try:\n", + " ds = xr.open_dataset(save_file)\n", + " print(\"Dataset loaded successfully.\")\n", + " return ds\n", + " except Exception as e:\n", + " raise Exception(f\"Error loading dataset: {e}\")\n", + "\n", + "# Example selection logic for the year\n", + "selected_file = query_result_df[(year_box.value >= query_result_df[\"start_year\"]) & (\n", + " year_box.value <= query_result_df[\"end_year\"])]\n", + "\n", + "if not selected_file.empty:\n", + " # Fetch and load the dataset\n", + " ds_tasmax = fetch_and_load_data(selected_file)\n", + "\n", + " # Open variable \"tasmax\" over the whole time range\n", + " tasmax_xr = ds_tasmax[\"tasmax\"]\n", + "\n", + " # Define start and end time string\n", + " time_start = str(year_box.value) + \"-01-01T12:00:00.000000000\"\n", + " time_end = str(year_box.value) + \"-12-31T12:00:00.000000000\"\n", + "\n", + " # Slice selected year\n", + " tasmax_year_xr = tasmax_xr.loc[time_start:time_end, :, :]\n", + " print(\"Data for the selected year sliced successfully.\")\n", + "else:\n", + " print(\"No data file available for the selected year.\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the model data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'tasmax' (time: 365, lat: 192, lon: 384)> Size: 108MB\n",
+       "[26910720 values with dtype=float32]\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 3kB 2015-01-01T12:00:00 ... 2015-12-31T12:...\n",
+       "  * lat      (lat) float64 2kB -89.28 -88.36 -87.42 -86.49 ... 87.42 88.36 89.28\n",
+       "  * lon      (lon) float64 3kB 0.0 0.9375 1.875 2.812 ... 357.2 358.1 359.1\n",
+       "    height   float64 8B ...\n",
+       "Attributes:\n",
+       "    standard_name:  air_temperature\n",
+       "    long_name:      Daily Maximum Near-Surface Air Temperature\n",
+       "    comment:        maximum near-surface (usually, 2 meter) air temperature (...\n",
+       "    units:          K\n",
+       "    cell_methods:   area: mean time: maximum\n",
+       "    cell_measures:  area: areacella\n",
+       "    history:        2019-08-14T18:59:51Z altered by CMOR: Treated scalar dime...
" + ], + "text/plain": [ + " Size: 108MB\n", + "[26910720 values with dtype=float32]\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 3kB 2015-01-01T12:00:00 ... 2015-12-31T12:...\n", + " * lat (lat) float64 2kB -89.28 -88.36 -87.42 -86.49 ... 87.42 88.36 89.28\n", + " * lon (lon) float64 3kB 0.0 0.9375 1.875 2.812 ... 357.2 358.1 359.1\n", + " height float64 8B ...\n", + "Attributes:\n", + " standard_name: air_temperature\n", + " long_name: Daily Maximum Near-Surface Air Temperature\n", + " comment: maximum near-surface (usually, 2 meter) air temperature (...\n", + " units: K\n", + " cell_methods: area: mean time: maximum\n", + " cell_measures: area: areacella\n", + " history: 2019-08-14T18:59:51Z altered by CMOR: Treated scalar dime..." + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's have a look at the xarray data array\n", + "\n", + "tasmax_year_xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see not only the numbers, but also information about it, such as long name, units, and the data history. This information is called metadata." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Compare Model Grid Cell with chosen Location" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Find nearest model coordinate by finding the index of the nearest grid point\n", + "\n", + "abslat = np.abs(tasmax_year_xr[\"lat\"] - location.latitude)\n", + "abslon = np.abs(tasmax_year_xr[\"lon\"] - location.longitude)\n", + "c = np.maximum(abslon, abslat)\n", + "\n", + "([xloc], [yloc]) = np.where(c == np.min(c)) # xloc and yloc are the indices of the neares model grid point" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Draw map again\n", + "\n", + "m = folium.Map(location=[location.latitude, location.longitude], zoom_start=8)\n", + "\n", + "\n", + "tooltip = location.latitude, location.longitude\n", + "folium.Marker(\n", + " [location.latitude, location.longitude],\n", + " tooltip=tooltip,\n", + " popup=\"Location selected by You\",\n", + ").add_to(m)\n", + "\n", + "\n", + "tooltip = float(tasmax_year_xr[\"lat\"][yloc].values), float(tasmax_year_xr[\"lon\"][xloc].values)\n", + "folium.Marker(\n", + " [tasmax_year_xr[\"lat\"][yloc], tasmax_year_xr[\"lon\"][xloc]],\n", + " tooltip=tooltip,\n", + " popup=\"Model Grid Cell Center\",\n", + ").add_to(m)\n", + "\n", + "\n", + "# Define coordinates of model grid cell (just for visualization)\n", + "\n", + "rect_lat1_model = (tasmax_year_xr[\"lat\"][yloc - 1] + tasmax_year_xr[\"lat\"][yloc]) / 2\n", + "rect_lon1_model = (tasmax_year_xr[\"lon\"][xloc - 1] + tasmax_year_xr[\"lon\"][xloc]) / 2\n", + "rect_lat2_model = (tasmax_year_xr[\"lat\"][yloc + 1] + tasmax_year_xr[\"lat\"][yloc]) / 2\n", + "rect_lon2_model = (tasmax_year_xr[\"lon\"][xloc + 1] + tasmax_year_xr[\"lon\"][xloc]) / 2\n", + "\n", + "\n", + "# Draw model grid cell\n", + "\n", + "folium.Rectangle(\n", + " bounds=[[rect_lat1_model, rect_lon1_model], [rect_lat2_model, rect_lon2_model]],\n", + " color=\"#ff7800\",\n", + " fill=True,\n", + " fill_color=\"#ffff00\",\n", + " fill_opacity=0.2,\n", + ").add_to(m)\n", + "\n", + "m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Climate models have a finite resolution. Hence, models do not provide the data of a particular point, but the mean over a model grid cell. Take this in mind when comparing model data with observed data (e.g. weather stations).\n", + "\n", + "\n", + "Now, we will visualize the daily maximum temperature time series of the model grid cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Draw Temperature Time Series and Count Summer days" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The definition of a summer day varies from region to region. According to the [German Weather Service](https://www.dwd.de/EN/ourservices/germanclimateatlas/explanations/elements/_functions/faqkarussel/sommertage.html), \"a summer day is a day on which the maximum air temperature is at least 25.0°C\". Depending on the place you selected, you might want to apply a different threshold to calculate the summer days index. " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":NdOverlay [Variable]\n", + " :Curve [index] (Temperature in °C)" + ] + }, + "execution_count": 14, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "ccaf0711-7739-4969-bd75-2644f2ee7e38" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "tasmax_year_place_xr = tasmax_year_xr[:, yloc, xloc] - 273.15 # Convert Kelvin to °C\n", + "tasmax_year_place_df = pd.DataFrame(index = tasmax_year_place_xr['time'].values, \n", + " columns = ['Model Temperature', 'Summer Day Threshold']) # create the dataframe\n", + "\n", + "tasmax_year_place_df.loc[:, 'Model Temperature'] = tasmax_year_place_xr.values # insert model data into the dataframe\n", + "tasmax_year_place_df.loc[:, 'Summer Day Threshold'] = 25 # insert the threshold into the dataframe\n", + "\n", + "\n", + "# Plot data and define title and legend\n", + "\n", + "tasmax_year_place_df.hvplot.line(y=['Model Temperature', 'Summer Day Threshold'], \n", + " value_label='Temperature in °C', legend='bottom', \n", + " title='Daily maximum Temperature near Surface for '+place_box.value, \n", + " height=500, width=620)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, the maximum daily temperature is highly variable over the year. As we are using the mean temperature in a model grid cell, the amount of summer days might we different that what you would expect at a single location." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "According to the German Weather Service definition, in the ssp370 scenario the MPI-ESM1-2-HR model shows 3 summer days for Hamburg in 2015.\n" + ] + } + ], + "source": [ + "# Summer days index calculation\n", + "\n", + "no_summer_days_model = tasmax_year_place_df[\"Model Temperature\"][tasmax_year_place_df[\"Model Temperature\"] > 25].size # count the number of summer days\n", + "\n", + "\n", + "# Print results in a sentence\n", + "\n", + "print(\"According to the German Weather Service definition, in the ssp370 scenario the \" \n", + " +climate_model +\" model shows \" +str(no_summer_days_model) +\" summer days for \" +str(place_box.value) \n", + " + \" in \" + str(year_box.value) +\".\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Try another location and year](#selection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb:Zone.Identifier b/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb:Zone.Identifier new file mode 100644 index 0000000..a82967c --- /dev/null +++ b/single-user-d4science-dkrzwps/notebooks/temperature_threshold_analysis_new.ipynb:Zone.Identifier @@ -0,0 +1,4 @@ +[ZoneTransfer] +ZoneId=3 +ReferrerUrl=https://mvre.diva.cloud.awi.de/user/khan@dkrz.de/lab/workspaces/auto-y/tree/work/temperature_threshold_analysis_new.ipynb +HostUrl=https://mvre.diva.cloud.awi.de/user/khan@dkrz.de/files/work/temperature_threshold_analysis_new.ipynb?_xsrf=MnwxOjB8MTA6MTczNzQ2NzcyMXw1Ol94c3JmfDE0ODpkbTU0YW5sZlJqQlZVa2RMWm0xemRrUTJOV2hxWmxWMGRWQkllVnBvVVZwQ1JuVTVkbkprYW1wME1EMDZOekEzWkRaak9UVmhNREppTURCbU1HUmtZalEzWkdRNE5HVmlObUZqT1RFM1l6YzJPR05rT0dNNVkyTTRZalV5TVdWbU1XVTFaREE0WWpVNVpEUXhNZz09fDgwN2UxYjNiMTUyMWQzNGY2ZjhmMWY2MWI4OTNjYzRiYjMzOThmZjNlNGUwNDY3MmE0ZTE4M2Y0MDkzZTNkNjQ