From 8249d5cc087677f87661321f28ecb998bc52d721 Mon Sep 17 00:00:00 2001
From: eurodatacube-submissions
 <61697821+eurodatacube-submissions@users.noreply.github.com>
Date: Tue, 25 Oct 2022 17:12:44 +0200
Subject: [PATCH 1/2] Add notebooks/contributions/test_ccadi_UC3.ipynb for pull
 request submission

---
 notebooks/contributions/test_ccadi_UC3.ipynb | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 notebooks/contributions/test_ccadi_UC3.ipynb

diff --git a/notebooks/contributions/test_ccadi_UC3.ipynb b/notebooks/contributions/test_ccadi_UC3.ipynb
new file mode 100644
index 00000000..81f80eb7
--- /dev/null
+++ b/notebooks/contributions/test_ccadi_UC3.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "code", "execution_count": 1, "id": "d3e94585-e44b-443f-bbd5-07dba42214e8", "metadata": {}, "outputs": [], "source": ["import urllib.request as request\n", "import h5py\n", "import os\n", "from ipywidgets import widgets, IntSlider, jslink, interact, interactive, fixed, interact_manual\n", "import markdown\n", "from erddapy import ERDDAP\n", "from ipyleaflet import Map, Marker, GeoData, ImageOverlay, basemaps, basemap_to_tiles, LayersControl, ScaleControl, FullScreenControl, WidgetControl\n", "# import pandas as pd\n", "import numpy as np\n", "# from IPython.display import display\n", "from netCDF4 import num2date\n", "# from datetime import datetime\n", "import geopandas as gpd\n", "########################################################\n", "import sys\n", "import pandas as pd\n", "import ipywidgets as widgets\n", "from IPython.display import Markdown, HTML, Javascript, display, Image\n", "import subprocess\n", "import csv\n", "from __future__ import print_function\n", "# from ipywidgets import interact, interactive, fixed, interact_manual\n", "import csv\n", "import re\n", "import warnings\n", "#from init import *\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)\n", "def printmd(string):\n", "    display(Markdown(string))\n", "###############################################################\n", "from ftplib import FTP\n", "import multiprocessing as mlp\n", "import shutil\n", "import tempfile\n", "import urllib.request\n", "from datetime import datetime, timedelta\n", "\n", "from osgeo import gdal\n", "# import numpy as np\n", "from osgeo import ogr\n", "from osgeo import osr\n", "# import pandas as pd\n", "import pyproj\n", "\n", "from multiprocessing import Manager\n", "# from ipywidgets import widgets, IntSlider, jslink\n", "from ipyleaflet import Map, projections, GeoData, basemap_to_tiles, basemaps, WidgetControl, ScaleControl, FullScreenControl, LayersControl #, ImageOverlay, \n", "import geopandas as gpd\n", "import leafmap\n", "import altair as alt\n", "# import localtileserver  # was needed localy to be able to add raster on the leafmap Map."]}, {"cell_type": "code", "execution_count": 2, "id": "ee1fb199-9539-48f5-a84c-4f393d963bdc", "metadata": {}, "outputs": [], "source": ["class merge_btl_nutrient:\n", "    \n", "    def get_btlfile(self, btl_url, wdir):\n", "        request.urlretrieve(btl_url, os.path.join(wdir, \"btl.h5\"))\n", "        return 0\n", "\n", "    def btl_to_dataframe(self, wdir):\n", "        f = h5py.File(os.path.join(wdir, \"btl.h5\"), 'r')\n", "        df_btl = pd.DataFrame()\n", "        for grp in f:\n", "            tempo_list = []\n", "            tempo_columns = []\n", "            for c in f[grp]:\n", "                tempo_columns.append(c)\n", "                tempo_list.append(f[grp][c])\n", "            list_array = np.transpose(np.array(tempo_list))\n", "            tempo_df = pd.DataFrame(list_array, columns=tempo_columns)\n", "            tempo_df['station'] = [f[grp].attrs['Station'].strip().split(' ')[-1]] * len(tempo_df)\n", "            tempo_df['cast'] = [int(f[grp].attrs['Cast_Number'].strip())] * len(tempo_df)\n", "            df_btl = pd.DataFrame.append(tempo_df, df_btl)\n", "        f.close()\n", "#         # added to extract a csv format of the btl.h5 data to send out to the GUI team\n", "#         df_btl.to_csv(os.path.join(wdir, \"btl.csv\"), header=1, index=0)\n", "        #######\n", "        return df_btl\n", "\n", "    def merge(self, df_nutrient, df_btl, file_ge_btl):\n", "        ge_time_header = [i for i in list(df_nutrient.columns) if\n", "                          ((i.lower().__contains__('date')) or \n", "                           (i.lower().__contains__('time')))].pop()\n", "        btl_time_header = [i for i in list(df_btl.columns) if\n", "                           ((i.lower().__contains__('date')) or \n", "                           (i.lower().__contains__('time')))].pop()\n", "        ge_station_header = [i for i in list(df_nutrient.columns) if\n", "                             i.lower().__contains__('station')].pop()\n", "        btl_station_header = [i for i in list(df_btl.columns) if\n", "                              i.lower().__contains__('station')].pop()\n", "        ge_bottle_header = [i for i in list(df_nutrient.columns) if\n", "                            (i.lower().__contains__('bottle'))].pop()\n", "        \"\"\" TODO: bopo should be replaced with RosPos when the data with the corrected variable name will be served on Hyrax.\"\"\"\n", "        btl_bottle_header = [i for i in list(df_btl.columns) if\n", "                             i.lower().__contains__('bopo')].pop() \n", "        ge_cast_header = [i for i in list(df_nutrient.columns) if\n", "                          i.lower().__contains__('cast')].pop()\n", "        btl_cast_header = [i for i in list(df_btl.columns) if\n", "                           i.lower().__contains__('cast')].pop()\n", "        ge_jointField = [ge_time_header, ge_station_header, ge_bottle_header, ge_cast_header]\n", "        btl_jointField = [btl_time_header, btl_station_header, btl_bottle_header, btl_cast_header]\n", "        \n", "        #####################################\n", "        df_nutrient_header = [h.split(' ')[0] for h in df_nutrient.columns]\n", "        df_nutrient.columns = df_nutrient_header\n", "        dfnutrient_to_merge = df_nutrient\n", "        dfbtl_to_merge = df_btl\n", "        dfbtl_to_merge = dfbtl_to_merge.drop(btl_time_header, axis=1)\n", "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_time_header, axis=1)\n", "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_station_header, axis=1)\n", "        dfnutrient_to_merge[ge_time_header] = pd.to_datetime(df_nutrient[ge_time_header]).dt.strftime('%Y-%m-%d')\n", "        # dfnutrient_to_merge[ge_time_header] = df_nutrient[ge_time_header].dt.strftime('%Y-%m-%d')\n", "        u = []\n", "        for i in df_nutrient[ge_station_header].values:\n", "            if i.isdigit():\n", "                u.append('G' + i)\n", "            else:\n", "                u.append(i)\n", "        dfnutrient_to_merge[ge_station_header] = u\n", "        hdf_time_units = \"days since 1970-01-01 00:00:00\"\n", "        list_tmp = []\n", "        import cftime\n", "        for i in range(len(df_btl[btl_time_header])):\n", "            u=num2date(df_btl[btl_time_header].values[i], hdf_time_units)\n", "            u=cftime.DatetimeGregorian.strftime(u, '%Y-%m-%d')\n", "            list_tmp.append(u)\n", "        dfbtl_to_merge[btl_time_header] = list_tmp\n", "        '''https://www.datasciencemadesimple.com/join-merge-data-frames-pandas-python/'''\n", "        df = pd.merge(dfnutrient_to_merge, dfbtl_to_merge, how=\"inner\", left_on=ge_jointField, right_on=btl_jointField)\n", "        df.to_csv(file_ge_btl, header=1, index=0)\n", "        del df\n", "        return 0\n", "\n", "\n", "class merging_gui_jupiter():\n", "    # This is the class where the GUI is made\n", "    def __init__(self):\n", "        self.gridwindow={} # making an empty grid window\n", "        self.vbox_widgets = [] # making an empty vertical box\n", "        self.gridwindow['grid'] = widgets.GridspecLayout(1, 1)\n", "        #####\n", "        self.getBTLbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n", "        self.getNutrientbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n", "        self.Continuebutton = widgets.Button(description=\"Continue\", layout=widgets.Layout(width='max-content'))\n", "        # BTL file retrieval\n", "        layout = widgets.Layout(height='auto', width='125px')\n", "        printmd('<h1><b>Merging bottle file with the nutrient file</b></h1>')\n", "        # read text\n", "        f=open(\"md_texts/nutrient_btl_infos.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_html1 = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['text1'] = widgets.HTML(text_html1)\n", "#         # Reading the images of the CTD-Rosette ##########\n", "        ctd_img = open(\"images/ctd-rosette.jpg\", \"rb\")\n", "        ship_img = open(\"images/CCGSAmundsen.png\", 'rb')\n", "        ctd = ctd_img.read()\n", "        ship = ship_img.read()\n", "        gridimage1 = widgets.Image(value=ship, format='jpg', width=300)\n", "        Figure1 = widgets.Label(r'\\(\\textbf{Figure 1:}\\)'+' Canadian Coast Guard Ship ' + r'\\(\\textit{Amundsen}\\)', layout=widgets.Layout(height='auto', width='auto'))\n", "        gridimage2 = widgets.Image(value=ctd, format='png', width=300)\n", "        Figure2 = widgets.Label(r'\\(\\textbf{Figure 2:}\\)'+' CTD-Rosette', layout=widgets.Layout(height='auto', width='auto'))\n", "        ship_img.close()\n", "        ctd_img.close()\n", "        image_vbox1 = widgets.VBox(children=[gridimage1, Figure1])\n", "        image_vbox2 = widgets.VBox(children=[gridimage2, Figure2])\n", "        self.gridwindow['image'] = widgets.HBox(children=[image_vbox1, image_vbox2])\n", "# #         image_vbox = [[gridimage1, gridimage2], ['CCGS Amundsen', 'CTD-Rosette']]\n", "#         gridwindow['image'] = widgets.HBox(children=[gridimage2, gridimage1])\n", "        ###################################################\n", "        self.vbox_widgets.append(self.gridwindow['text1'])\n", "        self.vbox_widgets.append(self.gridwindow['image'])\n", "        \n", "                # read text\n", "        f=open(\"md_texts/data_retrieval.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        data_retrieval = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['data_retrieval'] = widgets.HTML(data_retrieval)\n", "        self.vbox_widgets.append(self.gridwindow['data_retrieval'])\n", "        ## Bottle file retrieval ######\n", "        label = widgets.Label('Bottle files', layout=layout)\n", "        self.BottleData = widgets.Text(\n", "            value=\"http://jorvik.uwaterloo.ca:8080/opendap/data/CCADI/Amundsen_BTL_GreenEdge2016_LEG1.h5\",\n", "            layout=widgets.Layout(width='50%')\n", "        )\n", "        self.gridwindow['bottle'] = widgets.HBox(children=[label, self.BottleData, self.getBTLbutton])\n", "        self.vbox_widgets.append(self.gridwindow['bottle'])\n", "        label = widgets.Label('Nutrient file', layout=layout)\n", "        self.nutrientServer = widgets.Text(\n", "            value=\"https://CanWINerddap.ad.umanitoba.ca/erddap\",\n", "            layout=widgets.Layout(width='50%')\n", "        )\n", "        self.gridwindow['nutrientserver'] = widgets.HBox(children=[label, self.nutrientServer, self.getNutrientbutton])\n", "        self.vbox_widgets.append(self.gridwindow['nutrientserver'])\n", "        \n", "        self.list0 = widgets.SelectMultiple(\n", "            options=[\"Empty\"],\n", "            value=[\"Empty\"],\n", "            disabled=False\n", "        )\n", "\n", "        self.list1 = widgets.SelectMultiple(\n", "            options=[\"Empty\"],\n", "            value=[\"Empty\"],\n", "            disabled=False\n", "        )\n", "        \n", "        self.depthRange = widgets.FloatRangeSlider(\n", "                value=[0, 0],\n", "                min=0,\n", "                max=5000,\n", "                step=0.1,\n", "                disabled=False,\n", "                continuous_update=False,\n", "                orientation='horizontal',\n", "                readout=True,\n", "                readout_format='.1f',\n", "        )\n", "        \n", "        self.outputdir = \"2016_int_btl_csv\"\n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  # pass all the content of the vertical box into the left side of the grid\n", "        \n", "        self.getBTLbutton.on_click(self.getBTLdata)\n", "        self.getNutrientbutton.on_click(self.getNutrientdata)\n", "        display(self.gridwindow['grid'])\n", "        self.Merge_Button=widgets.Button(\n", "            description='Merge',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        self.merge_btl_nutrient()\n", "        \n", "        \n", "        ###########\n", "             \n", "    def continue_to_pyco2sys(self): \n", "        def on_button_pyco2sys(b):\n", "            continueprocess()\n", "\n", "        outmerge=widgets.Output()\n", "        @outmerge.capture()\n", "        def continueprocess():\n", "            checkInputfile()\n", "#             self.continue_to_sic()\n", "        \n", "        gridwindow={}\n", "        vbox_widgets = []\n", "        gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "        layout = widgets.Layout(height='auto', width='125px')\n", "        f=open(\"md_texts/variable_Join_list.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var_sel = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['variable_selection'] = widgets.HTML(text_var_sel)\n", "        vbox_widgets.append(gridwindow['variable_selection'])\n", "        label = widgets.Label('Bottle_variables', layout=widgets.Layout(width='50%'))\n", "        label = widgets.Label('Nutrient variables', layout=widgets.Layout(width='50%'))\n", "        gridwindow['bottle variable list'] = widgets.HBox(children=[label, self.list0])\n", "\n", "        gridwindow['nutrient variable list'] = widgets.HBox(children=[label, self.list1])\n", "        gridwindow['var_list'] = widgets.HBox(children=[gridwindow['bottle variable list'], gridwindow['nutrient variable list']])\n", "        vbox_widgets.append(gridwindow['var_list'])\n", "        label = widgets.Label('Sample depth:', layout=layout)\n", "        # read text\n", "        f=open(\"md_texts/variable_meaning.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['variable_meaning'] = widgets.HTML(text_var)\n", "        # read text\n", "        f=open(\"md_texts/sample_depth_range.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var_sel = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['sample_depth_range'] = widgets.HTML(text_var_sel)\n", "        vbox_widgets.append(gridwindow['sample_depth_range'])\n", "        gridwindow['Sample depth'] = widgets.HBox(children=[label, self.depthRange])\n", "        vbox_widgets.append(gridwindow['Sample depth'])\n", "\n", "        self.btl = pd.DataFrame()\n", "        self.nutrient=pd.DataFrame()\n", "        \n", "        continue_button1=widgets.Button(\n", "            description='Continue',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        gridwindow['merge'] = widgets.HBox(children=[self.Merge_Button])\n", "        gridwindow['to_pyco2sys'] = widgets.HBox(children=[gridwindow['merge'], continue_button1])\n", "        vbox_widgets.append(gridwindow['to_pyco2sys'])\n", "        gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n", "        self.Merge_Button.on_click(self.clickMerge)\n", "        continue_button1.on_click(on_button_pyco2sys)\n", "        display(gridwindow['grid'])\n", "        display(outmerge)\n", "        return 0       \n", "\n", "\n", "        \n", "    def merge_btl_nutrient(self): \n", "        def on_button_continuemerge(b):\n", "            continuemerge()\n", "\n", "        out=widgets.Output()\n", "        @out.capture()\n", "        def continuemerge():           \n", "            ###### transit to the PyCO2SYS #####\n", "            self.continue_to_pyco2sys()\n", "            \n", "\n", "        self.continue_button=widgets.Button(\n", "            description='continue',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        self.gridwindow['continue'] = widgets.HBox(children=[self.continue_button])\n", "        self.vbox_widgets.append(self.gridwindow['continue'])\n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)\n", "        self.continue_button.on_click(on_button_continuemerge)\n", "        display(out)\n", "\n", "    def getBTLdata(self,a):\n", "        if not os.path.exists(self.outputdir):\n", "            os.makedirs(self.outputdir)\n", "        merge_btl_nutrient().get_btlfile(self.BottleData.value, self.outputdir)\n", "        df_btl=merge_btl_nutrient().btl_to_dataframe(self.outputdir)\n", "        self.list0.options=tuple(df_btl.columns)\n", "        self.list0.value=[\"BOPO\",\"CTDTmp90\",\"Cast_Number\",\"P_sal_CTD\",\"Pres_Z\",\"depth\",\"latitude\",\"longitude\",\"time\",\"station\"]\n", "        self.list0.rows = 24\n", "        self.depthRange.min = df_btl['depth'].min()\n", "        self.depthRange.max = df_btl['depth'].max()\n", "        self.depthRange.value = [self.depthRange.min, self.depthRange.max]\n", "        self.getBTLbutton.description=\"Success!\"\n", "        self.getBTLbutton.button_style='success'\n", "        del df_btl\n", "        return 0\n", "    \n", "    def getNutrientdata(self, a):\n", "        if not os.path.exists(self.outputdir):\n", "            os.makedirs(self.outputdir)\n", "        e_DataSearch = ERDDAP(server=self.nutrientServer.value)\n", "        result_search = e_DataSearch.get_search_url(search_for=\"greenedge\", response=\"csv\")\n", "        self.datasetID = [k \n", "                          for k in pd.read_csv(result_search)[\"Dataset ID\"] \n", "                          if k.lower().__contains__(\"greenedge_nutrient\")].pop()\n", "        #self.datasetID = pd.read_csv(result_search)[\"Dataset ID\"][0]\n", "        #print(result_search)\n", "\n", "        e_datafetch = ERDDAP(server=self.nutrientServer.value, protocol=\"tabledap\", response=\"csv\")\n", "        e_datafetch.dataset_id = self.datasetID\n", "\n", "        df_nutrient = e_datafetch.to_pandas(parse_dates=True)\n", "        file_ge = os.path.join(self.outputdir, f'{self.datasetID}.csv')  ## Nutrient file name \n", "        df_nutrient.to_csv(file_ge, index=False, header=True)\n", "        self.list1.options=df_nutrient.columns\n", "        \"\"\" Adjustment done in order to look easily for the variables needed in the data field\"\"\"\n", "        station = [k for k in df_nutrient.columns if k.lower().__contains__(\"station\")].pop()\n", "        sample_date = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_date\")].pop()\n", "        #sample_date = sample_date.split(\" \")[0]\n", "        sample_depth = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_depth\")].pop()\n", "        #sample_depth = sample_depth.split(\" \")[0]\n", "        cast = [k for k in df_nutrient.columns if k.lower().__contains__(\"cast\")].pop()\n", "        bottle = [k for k in df_nutrient.columns if k.lower().__contains__(\"bottle\")].pop()\n", "        dic_um = [k for k in df_nutrient.columns if k.lower().__contains__(\"dic_um\")].pop()\n", "        #dic_um = dic_um.split(\" \")[0]\n", "        totalk = [k for k in df_nutrient.columns if k.lower().__contains__(\"totalk_l_um\")].pop()\n", "        #totalk = totalk.split(\" \")[0]\n", "        self.list1.value=[station, sample_date, sample_depth, cast, bottle,dic_um, totalk]\n", "        self.list1.rows = 24\n", "        self.getNutrientbutton.description=\"Success!\"\n", "        self.getNutrientbutton.button_style='success'\n", "        del df_nutrient\n", "        return 0\n", "\n", "        \n", "    def clickMerge(self, a):\n", "        file_ge_btl = os.path.join(self.outputdir, 'merged_btl_nutrient.csv') ## Merged file name to be fed to the PyCO2SYS\n", "        objectsForMerging = merge_btl_nutrient()\n", "        if os.path.exists(file_ge_btl):\n", "            os.remove(file_ge_btl)\n", "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n", "            df_btl.reset_index(drop=True, inplace=True)\n", "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n", "            df_btl = df_btl.loc[cond]\n", "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n", "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n", "            del df_nutrient, df_btl\n", "        else:\n", "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n", "            df_btl.reset_index(drop=True, inplace=True)\n", "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n", "            df_btl = df_btl.loc[cond]\n", "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n", "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n", "            del df_nutrient, df_btl\n", "        self.Merge_Button.description=\"Done\"\n", "        self.Merge_Button.button_style=\"success\"\n", "        return 0\n", "\n"]}, {"cell_type": "code", "execution_count": 3, "id": "fde05a6e-c6d1-451e-96e2-055ef4fade3d", "metadata": {}, "outputs": [], "source": ["#Define the required parameters\n", "\n", "def checkInputfile():\n", "    #Create an empty list for all the required parameters in the file. \n", "    req_param_inFile=[]\n", "    opt_param_inFile=[]\n", "    \n", "    pd.set_option('display.max_columns', 50)\n", "    \n", "    #-------------------INPUT FILE-------------------------------------------\n", "    input_file =os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient.csv\")                \n", "    df = pd.read_csv(input_file)\n", "    df=df.reset_index(drop=True)\n", "    #------------------------------------------------------------------------\n", "    \n", "    #Checking for the stadardized names in the input file to automatically pull out all the required, optional and mandatory parameters present\n", "    \n", "    # 1. KEY PARAMETERS \n", "    # Parameters- Total Alkalinity, DIC, PH, PCO2, fCO2, CO232, biCO2\n", "    \n", "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2'] #BODC standardized names\n", "    fullNames=['Total alkalinity (umolkg1)','Dissolved inorganic carbon (umolkg1)','Partial pressure of carbon dioxide (pCO2) (uatm)',\\\n", "               'Fugacity of carbon dioxide (fCo2) (uatm)','Carbonate ion concentration (CO32) (umolkg1)','Bicarbonate ion (umol kg1)']   # Full name that will show up in widget\n", "    \n", "    \n", "    for name, fname in zip(standardizedNames, fullNames): \n", "        if name in df.columns:\n", "            req_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n", "        \n", "    \n", "    # 2. OPTIONAL PARAMETERS \n", "    # Parameters- SiOx, PO4, Ammonia, Sulfide\n", "    \n", "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide'] #BODC standardized names\n", "    fullNames=['Total Silicate (umolkg1)','Total Phosphate (SRP) (umolkg1)','Total Ammonia (umolkg1)','Total Sulfide (umolkg1)']   # Full name that will show up in widget\n", "    \n", "    for name, fname in zip(standardizedNames, fullNames): \n", "        if name in df.columns:\n", "            opt_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n", "    \n", "    \n", "    getUserParameters(df, req_param_inFile, opt_param_inFile) \n"]}, {"cell_type": "code", "execution_count": 4, "id": "85597719-a3eb-4f3f-928a-7eea387ba0c9", "metadata": {}, "outputs": [], "source": ["def getUserParameters(df,req_param_inFile,opt_param_inFile):\n", "\n", "    #The user will be presented with the parameters automatically pulled out from the input file. They will have a chance to make changes to the selections. \n", "    #Only exception is if there are only two req parameters in input file, they will not be able to make any changes/selctions in this case.\n", "    \n", "    #Key Parameters Widget\n", "    req_param_user=widgets.SelectMultiple(\n", "        options=req_param_inFile,\n", "        #value=req_param_inFile,\n", "        #description='Key Parameters:',\n", "        disabled=False,\n", "    )\n", "    req_param_user.layout.margin='0.5% 0% 5% 0%'\n", "    req_param_user.layout.width='20%'\n", "    req_param_user.layout.height='70%'\n", "\n", "\n", "    #Optional Parameters Widget\n", "    opt_param_user=widgets.SelectMultiple(\n", "        options=opt_param_inFile,\n", "        value=opt_param_inFile,\n", "        #description='Optional Parameters:',\n", "        disabled=False,\n", "    )\n", "    opt_param_user.layout.margin='0.5% 0% 3% 0%'\n", "    opt_param_user.layout.width='20%'\n", "\n", "    cont_button1=widgets.Button(\n", "    description='Continue',\n", "    disabled=False,\n", "    button_style='', \n", "    tooltip='Click me',\n", "    icon=''\n", "    )\n", "\n", "    \n", "    # Onclick function for the first Continue button widget\n", "    output = widgets.Output()\n", "    @output.capture()\n", "    def on_button_clicked(b):\n", "        getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile)\n", "    \n", "\n", "    # Key parameters, aka carbonate system parameters\n", "    printmd('### <br><br/> Carbonate System Parameters ###') \n", "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#carbonate-system-parameters).')\n", "    #if there is only one or no key parameters in input file\n", "    if len(req_param_inFile)<2:\n", "        printmd(\"<br>**There are not enough key parameters for calculation of the full carbonate system. Please check input file and try again.**<br />\")\n", "        sys.exit(-1)\n", "    \n", "    #If only two key parameters in the input file, automatically use those two\n", "    if len(req_param_inFile)==2:\n", "        printmd(\"<br>**The following key carbonate parameters were found in the input file and will be used in calculations.**<br />\")\n", "\n", "        for name in req_param_inFile:\n", "            printmd('- {}'.format(name))\n", " \n", "    #If there are more than two key parameters in the input file, ask user to select any two\n", "    if len(req_param_inFile)>2:\n", "        printmd(\"<br>**The following key carbonate parameters were found in the input file. Choose any two parameters.**<br />\")\n", "        display(req_param_user) #display widget\n", "\n", "\n", "    # Optional parameters, aka Nutrients & solutes\n", "    printmd('### <br><br/> Nutrients and other solutes ###') \n", "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#nutrients-and-other-solutes).')\n", "    \n", "    # If there is at least one opt parameter in file, display them and ask user to select any of them. All are automatically selected in the widget\n", "    if len(opt_param_inFile)>0:\n", "        printmd(\"<br>**The following nutrient parameters are in the input file. Choose any parameter(s).**</b>\")\n", "        display(opt_param_user)   #display widget\n", "    \n", "    \n", "    display(cont_button1) #display continue button\n", "    cont_button1.on_click(on_button_clicked)  #Call onclick function\n", "    display(output) #display widget ouput when button is clicked\n", "    "]}, {"cell_type": "code", "execution_count": 5, "id": "b305cb57-05ea-42b0-927b-ca9e7ea0547b", "metadata": {}, "outputs": [], "source": ["def getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile):    \n", "    \n", "    # Constants\n", "    printmd('### <br><br/> Settings for constants ###') \n", "    printmd('More information on these constants an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#settings). Default constants chosen based on [Jiang et al., 2022](https://www.frontiersin.org/articles/10.3389/fmars.2021.705638/full).')\n", "    #Widgets for the different constants\n", "    phstr = widgets.Output()\n", "    @phstr.capture()\n", "    def constStrings1():\n", "        printmd(\"<br>**Choose the pH scale:**\")\n", "        \n", "    constStrings1()\n", "\n", "    option_list=['1. Total',\n", "                 '2. Seawater',\n", "                 '3. Free',\n", "                 '4. NBS, i.e. relative to NBS/NIST reference standards']\n", "    \n", "    #PH Scale\n", "    phscale=widgets.RadioButtons(\n", "        options=option_list,  \n", "        disabled=False,\n", "        layout={'width': 'max-content'},\n", "    )\n", "    phscale.layout.margin='0.5% 1% 3% 0%'\n", "    #phscale.layout.width='40%' \n", "\n", "\n", "    # Carbonic Acid Dissociation\n", "    k1k2str = widgets.Output()\n", "    @k1k2str.capture()\n", "    def constStrings2():\n", "        printmd(\"**Choose the set of equilibrium constant parameterisations to model carbonic acid dissociation:**\")\n", "    constStrings2()\n", "\n", "    option_list=['1. RRV93 (0 < T < 45 \u00b0C, 5 < S < 45, Total scale, artificial seawater).',\n", "                '2. GP89 (\u22121 < T < 40 \u00b0C, 10 < S < 50, Seawater scale, artificial seawater).',\n", "                '3. H73a and H73b refit by DM87 (2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, artificial seawater).',\n", "                '4. MCHP73 refit by DM87 (2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, real seawater).',\n", "                '5. H73a, H73b and MCHP73 refit by DM87(2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, real seawater)',\n", "                '6. MCHP73 aka \"GEOSECS\" (2 < T < 35 \u00b0C, 19 < S < 43, NBS scale, real seawater).',\n", "                '7. MCHP73 without certain species aka \"Peng\" (2 < T < 35 \u00b0C, 19 < S < 43, NBS scale, real seawater).',\n", "                '8. M79 (0 < T < 50 \u00b0C, S = 0, freshwater only).',            \n", "                '9. CW98 (2 < T < 30 \u00b0C, 0 < S < 40, NBS scale, real estuarine seawater).',\n", "                '10. LDK00 (2 < T < 35 \u00b0C, 19 < S < 43, Total scale, real seawater).',\n", "                '11. MM02 (0 < T < 45 \u00b0C, 5 < S < 42, Seawater scale, real seawater).',\n", "                '12. MPL02 (\u22121.6 < T < 35 \u00b0C, 34 < S < 37, Seawater scale, field measurements).',\n", "                '13. MGH06 (0 < T < 50 \u00b0C, 1 < S < 50, Seawater scale, real seawater).',\n", "                '14. M10 (0 < T < 50 \u00b0C, 1 < S < 50, Seawater scale, real seawater).',\n", "                '15. WMW14 (0 < T < 45 \u00b0C, 0 < S < 45, Seawater scale, real seawater).',\n", "                '16. SLH20 (\u22121.67 < T < 31.80 \u00b0C, 30.73 < S < 37.57, Total scale, field measurements).',\n", "                '17. SB21 (15 < T < 35 \u00b0C, 19.6 < S < 41, Total scale, real seawater).']\n", "    \n", "    k1k2=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='10. LDK00 (2 < T < 35 \u00b0C, 19 < S < 43, Total scale, real seawater).',\n", "        #rows=len(option_list),\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    k1k2.layout.margin='0.5% 1% 3% 0%'\n", "    #k1k2.layout.width='50%'\n", "    #k1k2.layout.height='100%'\n", "\n", "    # Bisulfate ion dissociation \n", "    kso4str = widgets.Output()\n", "    @kso4str.capture()\n", "    def constStrings3():\n", "        printmd(\"**Choose the equilibrium constant parameterisations to model bisulfate ion dissociation:**\")\n", "    constStrings3()\n", "\n", "    option_list=['1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n", "                '2. KRCB77: Khoo et al. (1977) Anal. Chem.',\n", "                '3. WM13: Waters & Millero (2013) Mar. Chem./ WMW14: Waters et al. (2014) Mar. Chem.']\n", "    \n", "    kso4=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n", "        layout={'width': 'max-content'},\n", "        #description='Parameter:',\n", "        disabled=False,\n", "    )\n", "    kso4.layout.margin='0.5% 1% 3% 0%'\n", "    #kso4.layout.width='40%'\n", "\n", "    # Total borate \n", "    bostr = widgets.Output()\n", "    @bostr.capture()\n", "    def constStrings4():\n", "        printmd(\"**Choose which boron:salinity relationship to use to estimate total borate:**\")\n", "    constStrings4()\n", "\n", "    option_list=['1. U74: Uppstr\u00f6m (1974) DeepSea Res.',\n", "                '2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta']\n", "    \n", "    bo=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    bo.layout.margin='0.5% 1% 3% 0%'\n", "    # bo.layout.width='40%'\n", "    \n", "    # hydrogen fluoride dissociation\n", "    hfstr = widgets.Output()\n", "    @hfstr.capture()\n", "    def constStrings5():\n", "        printmd(\"**Choose which which equilibrium constant parameterisation to use for hydrogen fluoride dissociation:**\")\n", "    constStrings5()\n", "\n", "    option_list=['1. DR79: Dickson & Riley (1979) Mar. Chem.',\n", "                '2. PF87: Perez & Fraga (1987) Mar. Chem.']\n", "    \n", "    hf=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='2. PF87: Perez & Fraga (1987) Mar. Chem.',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    hf.layout.margin='0.5% 1% 3% 0%'\n", "    # hf.layout.width='40%' \n", "\n", "    # opt_gas_constant\n", "    gcstr = widgets.Output()\n", "    @gcstr.capture()\n", "    def constStrings6():\n", "        printmd(\"**Choose which value to use for the gas constant:**\")\n", "    constStrings6()\n", "    option_list=['1. DOEv2',\n", "                '2. DOEv3',\n", "                '3. 2018 CODATA']\n", "    \n", "    gc=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='3. 2018 CODATA',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    gc.layout.margin='0.5% 1% 3% 0%'\n", "    # gc.layout.width='40%' \n", "    \n", "    #Continue button On-click function\n", "    output = widgets.Output()\n", "    @output.capture()\n", "    def on_button_clicked(b):        \n", "        runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc)\n", "\n", "    # Button widget\n", "    button2=widgets.Button(\n", "    description='Continue',\n", "    disabled=False,\n", "    button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", "    tooltip='Click me',\n", "    icon=''\n", "    )\n", "\n", "    box2 = widgets.VBox([phstr,phscale, k1k2str, k1k2, kso4str, kso4, bostr, bo, hfstr, hf, gcstr,gc, button2])\n", "    display(box2)\n", "\n", "    button2.on_click(on_button_clicked)\n", "    display(output)"]}, {"cell_type": "code", "execution_count": 6, "id": "a5b6eb4f-eaac-4575-becd-701685897e6c", "metadata": {}, "outputs": [], "source": ["def runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc):\n", "\n", "\n", "    #Build the argument list for pyco2sys to ingest\n", "    #The argument names are defined in the PyCO2 sys documentation.\n", "    kwargs={}\n", "   \n", "\n", "    #KEY PARAMETERS\n", "    \n", "    # pyco2sys labels the two key parameters chosen as par 1 and par 2. 'par1check' checks if par1 has yet been assigned, otherwise it sets a key parameter to par 2\n", "    par1check=\"False\"\n", "    par2check=\"False\"\n", "\n", "    \n", "    #Check if the parameter was chosen by the user, or if it was automatically selected (only two parameters were in the file). In that case the variable would not be in 'req_param_user' from the widget.\n", "    #If the varibale is to be used in the calculation, then get the data from the input file (saved as the data frame df)\n", "    #The variable is then added to the arguments for pyco2sys\n", "    \n", "    substrings=['alkalinity','Dissolved inorganic carbon','pH','Partial pressure of carbon dioxide', 'Fugacity of carbon dioxide','Carbonate ion concentration', 'Bicarbonate Ion']\n", "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2']\n", "    types=[1,2,3,4,5,6,7]   #These are the different types according to pyco2sys documentation \n", "    \n", "    \n", "    for name, substr, t in zip(standardizedNames, substrings, types):           # Loop through the substrings and the standardized names  \n", "        if par1check==\"False\" or par2check==\"False\":                            #Check if either par1 or par2 is false. We need at least two key paramaters (par1 and par2 represent these two parameters)\n", "            if any(substr in string for string in req_param_inFile):            #If it is in the input file\n", "                if len(req_param_inFile)==2:                                    #If it is one of only two key variables in the input file (here it would not be in 'req_param_user.value' as it would be automatically selected- no widget used)\n", "   \n", "                    if par1check==\"True\":        \n", "                        kwargs['par2']=df[name].to_numpy(dtype=float)            #Get the data using the standardized name from the data frame (input file)\n", "                        kwargs['par2_type']=t\n", "                        par2check=\"True\"\n", "                    else:\n", "                        kwargs['par1']=df[name].to_numpy(dtype=float)             \n", "                        kwargs['par1_type']=t\n", "                        par1check=\"True\"\n", "\n", "                elif len(req_param_inFile)>2:                                    # If there are more than two key parameters in the input file\n", "                    if any(substr in string for string in req_param_user.value): #If there are more than two key variables in the file, then check if the user actually selected this varibale\n", "                        if par1check==\"True\":        \n", "                            kwargs['par2']=df[name].to_numpy(dtype=float)          \n", "                            kwargs['par2_type']=t\n", "                            par2check=\"True\"\n", "                        else:\n", "                            kwargs['par1']=df[name].to_numpy(dtype=float)          \n", "                            kwargs['par1_type']=t\n", "                            par1check=\"True\"\n", "\n", "\n", "\n", "                            \n", "    # OPTIONAL PARAMETERS\n", "    \n", "    substrings_opt=['Silicate','Phosphate','Ammonia','Sulfide' ]\n", "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide']\n", "    pyco2sysNames=['total_silicate','total_phosphate','total_phosphate','total_sulfide']  # Names that pyco2sys expects in the argument list\n", "    \n", "    \n", "    if len(opt_param_inFile)>0:  # if there is at least one optional paramter in the input file.        \n", "        for name, substr, pName in zip(standardizedNames, substrings_opt, pyco2sysNames):     \n", "            if any(substr in string for string in opt_param_user.value):  # If the optional parameter was chosen by the user\n", "                kwargs[pName]=df[name].to_numpy(dtype=float)              # Get the data using the standardized name\n", "        \n", "\n", "        \n", "    # MANDATORY  PARAMETERS\n", "\n", "    # Also check if the user wanted to use any other output temperature and pressure in the calculations.\n", "    # If they did not, the temp_out and press_out would be 'nan'\n", "    \n", "    #Set the output temperature and pressure to nan\n", "    Temperature_out=float('nan')\n", "    Pressure_out=float('nan')\n", "    \n", "    Temperature=df['CTDTmp90'].to_numpy(dtype=float)\n", "    kwargs['temperature']=Temperature\n", "    #if np.isnan(Temperature_out)==False:    \n", "    kwargs['temperature_out']=Temperature_out\n", "\n", "    Pressure=df['Pres_Z'].to_numpy(dtype=float)\n", "    kwargs['pressure']=Pressure   \n", "    #if np.isnan(Pressure_out)==False:    \n", "    kwargs['temperature_out']=Pressure_out   \n", "\n", "    Salinity=df['P_sal_CTD'].to_numpy(dtype=float)\n", "    kwargs['salinity']=Salinity   \n", "\n", "\n", "\n", "    #Get the values of the widget selections for the different CONSTANTS. All parameters are selected by default.\n", "    \n", "    k1k2Value=k1k2.value\n", "    kso4Value=kso4.value\n", "    boValue=bo.value\n", "    hfValue=hf.value\n", "    phscaleValue=phscale.value\n", "    gcValue=gc.value\n", "\n", "    kso4Value_temp=kso4Value[:2]\n", "    boValue_temp=boValue[:2]\n", "    hfValue_temp=hfValue[:2]\n", "    k1k2Value_temp=k1k2Value[:2]\n", "    phscaleValue_temp=phscaleValue[:2]\n", "    gcValue_temp=gcValue[:2]\n", "        \n", "    if '.' in kso4Value_temp:      \n", "        kso4Value_temp=kso4Value_temp[:1]\n", "    \n", "    if '.' in boValue_temp:       \n", "        boValue_temp=boValue_temp[:1]       \n", "\n", "    if '.' in hfValue_temp:       \n", "        hfValue_temp=hfValue_temp[:1]       \n", "        \n", "    if '.' in k1k2Value_temp:       \n", "        k1k2Value_temp=k1k2Value_temp[:1]\n", "              \n", "    if '.' in phscaleValue_temp:   \n", "        phscaleValue_temp=phscaleValue_temp[:1]\n", "        \n", "    if '.' in gcValue_temp:\n", "        gcValue_temp=gcValue_temp[:1]\n", "    \n", "    k1k2Value=int(k1k2Value_temp)\n", "    kso4Value=int(kso4Value_temp)\n", "    boValue=int(boValue_temp)\n", "    hfValue=int(hfValue_temp)\n", "    phscaleValue=int(phscaleValue_temp)\n", "    gcValue=int(gcValue_temp)\n", "        \n", "    #Add them as arguments for pyco2sys\n", "    kwargs['opt_k_carbonic']=k1k2Value\n", "    kwargs['opt_k_bisulfate']=kso4Value\n", "    kwargs['opt_total_borate']=boValue\n", "    kwargs['opt_k_fluoride']=hfValue\n", "    kwargs['opt_gas_constant']=gcValue\n", "\n", "\n", "    # Import PyCO2SYS\n", "    import PyCO2SYS as pyco2\n", "\n", "    # Run pyCO2SYS!\n", "    output_dict = pyco2.sys(**kwargs)\n", "\n", "    \n", "    Output(df,output_dict)"]}, {"cell_type": "code", "execution_count": 7, "id": "8fafc9c0-74e1-4966-9e73-1a9f4b9b56c1", "metadata": {}, "outputs": [], "source": ["def Output(df,output_dict):\n", "    \n", "#     ## Added to make the \"continue button\" to continue the process into the Sea Ice Concentration retrieval: \n", "    def on_button_sic(b):\n", "        continue_sic()\n", "\n", "    outsic=widgets.Output()\n", "    @outsic.capture()\n", "    def continue_sic():\n", "        ccadi_uc3_mapping() # sea ice concentration processing\n", "\n", "    ###########################################\n", "    \n", "    \n", "    #The dictionary output_dict is uneven. Some elements are just a single int, string or float. The length of 'par1' will always be the longest length (it holds the first of the two key parameters from the input file)\n", "    #For the values in the dictionary that are not single values, they are arrays. \n", "    #To be able to create even data frames, we need to seperate the values in the array and create a list.\n", "    #Otherise, the array is saved as one value for each key, instad of a list of multiple values.\n", "    \n", "    \n", "    longlength=len(output_dict['par1'])\n", "    newlist=[]\n", "\n", "    for val in output_dict.values():\n", "\n", "        #if value is a single integer\"\n", "        if isinstance(val, int):\n", "            list0=[val] * longlength\n", "            newlist.append(list0)\n", "\n", "        elif isinstance(val, str):\n", "            list1=[val] * longlength\n", "            newlist.append(list1)\n", "\n", "        elif isinstance(val, float):\n", "            list2=[val] * longlength\n", "            newlist.append(list2)\n", "        else:\n", "            arr=val\n", "            list3 = arr.tolist()\n", "            newlist.append(list3)\n", "            \n", "            \n", "    #Creae a new dict that has keys associated with a list of values, all of the same length.            \n", "    newdict={}\n", "    i=0\n", "    for key in output_dict.keys():\n", "        newdict[key]=newlist[i]\n", "        i=i+1\n", "\n", "\n", "    #Create a new dataframe and save as csv.\n", "    output_df=pd.DataFrame.from_dict(newdict)\n", "\n", "    #Merge this resulting data frame with extra variables from the input file that were not used in calculations\n", "    cols_to_use = df.columns.difference(output_df.columns)   #variales that are different from those in output file\n", "    input_subset=df[cols_to_use]\n", "    merged_df = pd.concat([output_df,input_subset] , axis=1)  #Merged dataframe\n", "\n", "        \n", "    # Organize data frame so that specific varibales are at the front\n", "    front_metadata=['project name','platform name','Cruise', 'Station','sample date','TIME','latitude','longitude','sample depth','Cast','Bottle']\n", "    \n", "    #Loop through the list of metadata variables that should be at the front\n", "    col_position=-1\n", "    for var in front_metadata: \n", "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n", "            stripped_string = re.sub(\"[^0-9a-zA-Z]+\", \" \",col)         # Strip the column headers of all non- laphanumeric characters\n", "            if var.lower() in stripped_string.lower():                 # Check for column name regardless of case\n", "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n", "                col_position=col_position+1                            # Find the next front position\n", "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n", "\n", "    front_data=['saturation_aragonite', 'saturation_aragonite_out','saturation_calcite','saturation_calcite_out','pCO2','fCO2','bicarbonate','pH_total']\n", "\n", "    #Loop through the list of calculated carbonate chemistry variables that should be at the front\n", "    for var in front_data: \n", "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n", "            if col==var:                 \n", "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n", "                col_position=col_position+1                            # Find the next front position\n", "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n", "    \n", "    \n", "    #Remove par1, par2, par1 and par2 types- added by pyco2, not needed by user \n", "    merged_df.pop('par1')\n", "    merged_df.pop('par2')\n", "    merged_df.pop('par1_type')\n", "    merged_df.pop('par2_type')\n", "\n", "    #Remove duplicate columns from final data frame\n", "    duplicateColumnNames = list()\n", "\n", "    for x in range(merged_df.shape[1]):                   # Iterate over all the columns in dataframe\n", "        col_name1= merged_df.columns[x]                   # Select column at xth index.\n", "\n", "        for y in range(x + 1, merged_df.shape[1]):        # Iterate over all the columns in DataFrame from (x+1)th index till end\n", "            col_name2= merged_df.columns[y]\n", "            \n", "            if col_name1.lower()==col_name2.lower():      # Check if column names are the same regardless of case\n", "                duplicateColumnNames.append(col_name1)\n", "                continue\n", "                \n", "            if '.1' in col_name1:                         # Check if there is a duplicate (same case), pandas will save this with a .1 at the end of the duplicated variable\n", "                col_name1_stripped=col_name1.strip('.1')  # Remove .1 and check again for equality \n", "                if col_name1_stripped.lower()==col_name2.lower():\n", "                    duplicateColumnNames.append(col_name1)\n", "                    continue\n", "                \n", "            if '.1' in col_name2:\n", "                col_name2_stripped=col_name2.strip('.1')\n", "                if col_name1.lower()==col_name2_stripped.lower():\n", "                     duplicateColumnNames.append(col_name2)\n", "                        \n", "    merged_df = merged_df.drop(columns=duplicateColumnNames)  #Drop all duplicates\n", "    merged_df=merged_df.dropna(axis=1,how='all')              #Drop all empty columns\n", "\n", "    # OUTPUT FILE----------------------------------------------------------------------\n", "    if os.path.isfile(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))==True:  \n", "        os.remove(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))\n", "    merged_df.to_csv(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"), index=False)   \n", "    # OUTPUT FILE----------------------------------------------------------------------\n", "\n", "    \n", "    printmd('**<br />PCO2sys ran successfully! Output file is saved as merged_btl_nutrient_pyco2sys.csv**')\n", "    printmd(\"<br />**Retrieving ice concentration now...**\")\n", " \n", "    # add the \"continue button\" on the GUI ######################################\n", "    gridwindow={}\n", "    vbox_widgets = []\n", "    gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "\n", "    continue_button2=widgets.Button(\n", "        description='Continue',\n", "        disabled=False,\n", "        button_style='', \n", "        tooltip='Click me',\n", "        icon=''\n", "        )\n", "    gridwindow['to_sic'] =  widgets.HBox(children=[continue_button2])\n", "    vbox_widgets.append(gridwindow['to_sic'])\n", "    gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n", "    display(gridwindow['grid'])\n", "    continue_button2.on_click(on_button_sic)\n", "    display(outsic)\n", "    ##################################################\n", "\n", "    "]}, {"cell_type": "code", "execution_count": 8, "id": "ee45f378-f39f-4d89-86e1-5aa09db5cfec", "metadata": {}, "outputs": [], "source": ["L = mlp.Lock()\n", "class addSeaIceConcentration:\n", "    def __init__(self, inputfile):\n", "        self.inputfile = inputfile\n", "\n", "    def createShapefile(self, df, output_shp, time_header, prglabel):\n", "        ''' This function creates a shapefile from the geographical locations inside the input CSV file.\n", "            The coordinate system used for the output shapetile is the World Geodetic System (WGS) 1984.\n", "            Both Latitude and Longitude are in decimal degrees'''\n", "        prglabel.value = 'Creating the final shapefile...'\n", "        shpfile = os.path.join(os.path.dirname(self.inputfile), output_shp, output_shp + '.shp')\n", "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_shp)):\n", "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_shp))\n", "        driver = ogr.GetDriverByName(\"ESRI Shapefile\")\n", "        if os.path.exists(shpfile):\n", "            driver.DeleteDataSource(shpfile)\n", "        ds = driver.CreateDataSource(shpfile)\n", "        spref = osr.SpatialReference()\n", "        spref.ImportFromEPSG(4326)\n", "        layer = ds.CreateLayer('StationsLocations', spref, ogr.wkbPoint)\n", "        # create field to the layer\n", "        for c in df.columns:\n", "            u = list(df[c])\n", "            vint = [i for i in u if isinstance(i, int)]\n", "            vfloat = [i for i in u if isinstance(i, float)]\n", "            if c.__contains__(time_header):\n", "                layer.CreateField(ogr.FieldDefn(time_header, ogr.OFTDateTime))\n", "            elif c.__contains__('CIS_dates'):\n", "                layer.CreateField(ogr.FieldDefn('CIS_dates', ogr.OFTDateTime))\n", "            elif c.__contains__('Time'):\n", "                fieldname = ogr.FieldDefn('Time UTC', ogr.OFTString)\n", "                fieldname.SetWidth(20)\n", "                layer.CreateField(fieldname)\n", "            elif len(u) == len(vint):\n", "                df[c] = df[c].astype(float)\n", "                ##############################################\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTInteger)\n", "                fieldname.SetPrecision(0)\n", "                layer.CreateField(fieldname)\n", "            elif len(u) == len(vfloat):\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTReal)\n", "                fieldname.SetPrecision(6)\n", "                layer.CreateField(fieldname)\n", "            else:\n", "                df[c].astype(str)\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTString)\n", "                fieldname.SetWidth(30)\n", "                layer.CreateField(fieldname)\n", "\n", "        c_fid = 0\n", "        ld = layer.GetLayerDefn()\n", "        lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "        lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "        for i in df.index:\n", "            # create new point object\n", "            point = ogr.Geometry(ogr.wkbPoint)\n", "            point.AddPoint(float(df[lon].values[i]), float(df[lat].values[i]))\n", "            # create new feature\n", "            featureDfn = layer.GetLayerDefn()\n", "            feature = ogr.Feature(featureDfn)\n", "            feature.SetGeometry(point)\n", "            for k in range(0, df.columns.__len__()):\n", "                fieldName = ld.GetFieldDefn(k).GetName()\n", "                feature.SetField(fieldName, df[df.columns[k]].values[i])\n", "            c_fid += 1\n", "            # add the new feature to the new layer\n", "            layer.CreateFeature(feature)\n", "        del layer, ds, df\n", "        return 0\n", "\n", "    def extractFromUniBremenAMSR2(self, prm):\n", "        ''' Sea Ice Concentration (SIC) from Bremen University are downloaded. The SIC located at each geographical\n", "        coordinate indicated in the input csv file are extracted and then added as an extra-column to the csv file. '''\n", "        L.acquire()\n", "        ddate = pd.to_datetime(prm[0])\n", "        dlat = prm[1]\n", "        dlon = prm[2]\n", "        q = prm[3]\n", "        m = datetime.strftime(ddate, '%b').lower()  # extract the month in lowercase charachter\n", "        y = datetime.strftime(ddate, '%Y')  # extract the year\n", "        with tempfile.TemporaryDirectory() as tmpDir:\n", "            url = r'https://seaice.uni-bremen.de/data/amsr2/asi_daygrid_swath/n6250/{0}/{1}/Arctic/asi-AMSR2-n6250-{2}-v5.4.tif'.format(\n", "                y, m, datetime.strftime(ddate, '%Y%m%d'))\n", "            if not os.path.exists(os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                               'asi-AMSR2_{0}.tif'.format(\n", "                                                   datetime.strftime(ddate, '%Y%m%d')))):\n", "                urllib.request.urlretrieve(url, os.path.join(tmpDir, 'asi-AMSR2.tif'))\n", "                if (q == 'Y'):\n", "                    shutil.copy2(os.path.join(tmpDir, 'asi-AMSR2.tif'),\n", "                                 os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                              'asi-AMSR2_{0}.tif'.format(\n", "                                                  datetime.strftime(ddate, '%Y%m%d'))))\n", "                    src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                                'asi-AMSR2_{0}.tif'.format(\n", "                                                    datetime.strftime(ddate, '%Y%m%d')))\n", "                else:\n", "                    src_filename = os.path.join(tmpDir, 'asi-AMSR2.tif')\n", "            else:\n", "                src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                            'asi-AMSR2_{0}.tif'.format(\n", "                                                datetime.strftime(ddate, '%Y%m%d')))\n", "            try:\n", "                outval = self.pointExtract(src_filename, dlat, dlon)\n", "                prm[4].put(prm[5])\n", "            except:\n", "                outval = np.nan\n", "                prm[4].put(prm[5])\n", "                pass\n", "        L.release()\n", "        return ([float(outval), datetime.strftime(ddate.to_pydatetime(), '%Y-%m-%d')])\n", "\n", "    def extractFromCSI(self, prm):\n", "        '''For each acquisition date in the csv file, differences are calculated between each one of them and each one included\n", "        in the CIS tar files. Then the tar file corresponding the minimum difference is choosen for the extraction\n", "        of the Sea Ice Concentration. '''\n", "        L.acquire()  # this is needed to lock each process to let them running separately without writing in a same variable at the same time\n", "        ddate = pd.to_datetime(prm[0])  # Acquisition date from the csv file.\n", "        dlat = prm[1]\n", "        dlon = prm[2]\n", "        fcis = prm[3]\n", "        CISRaster = prm[4]\n", "\n", "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcis]\n", "        wq = np.array(CIS_acquisition_times)\n", "        CIS_acquisition_times = list(np.unique(wq))\n", "        csv_acquisition_time = datetime.strftime(ddate, '%Y%m%dT%H%M%S')\n", "        sample_date = datetime.strptime(csv_acquisition_time, '%Y%m%dT%H%M%S').date()\n", "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n", "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n", "        outraster = os.path.join(CISRaster, [i for i in fcis if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0][:-4] + '.tif')\n", "        intval = self.pointExtract(outraster, dlat, dlon)\n", "        outval = intval[0][0]\n", "        prm[5].put(prm[6])\n", "        L.release()  # this releases the locked process\n", "        return ([float(outval), datetime.strftime(closest_date, '%Y-%m-%d')])\n", "#         return ([float(outval), datetime.strftime(CIS_acquisition_times[s[0][0]], '%Y-%m-%d')])\n", "\n", "    def selectCISFiles(self, prm):\n", "        '''This function select the filenames from the CIS acquired at the nearest time as each one of the within the\n", "        input csv file. The output list of files will be used to download them. '''\n", "        L.acquire()\n", "        ddate = pd.to_datetime(prm[0])\n", "        fcislist = prm[3]\n", "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcislist]\n", "        wq = np.array(CIS_acquisition_times)\n", "        CIS_acquisition_times = list(np.unique(wq))\n", "        u = datetime.strftime(ddate, '%Y%m%dT%H%M%S') # sample date\n", "        sample_date = datetime.strptime(u, '%Y%m%dT%H%M%S').date()\n", "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n", "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n", "        prm[5].put(prm[6])\n", "        L.release()\n", "        return [i for i in fcislist if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0]\n", "\n", "    def binaryretrieve(self, j):\n", "        '''The connection to the FTP server of the Canadian Ice Service is done in this function.\n", "        All the spatial coordinate are assumed to be within the region Eastern_Arctic and in the same year 2016\n", "        as the case of the GreenEdge data. '''\n", "        L.acquire()\n", "        i = j[0]\n", "        shp_for_UC3 = j[1]\n", "        hostname = 'sidads.colorado.edu'\n", "        ftp = FTP(hostname)\n", "        ftp.login(user='anonymous', passwd='')\n", "        ## This should be changed to be dynamically change depending on the region (here: Eastern_Arctic) and the year.\n", "        # The User Guide from the Canadian Ice Service (https://nsidc.org/data/G02171/versions/1?qt-data_set_tabs=3#qt-data_set_tabs) discribe all the possible region names\n", "        ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/2016/') \n", "        if not os.path.exists(os.path.join(shp_for_UC3, i)):\n", "            with open(os.path.join(shp_for_UC3, i), 'wb') as localfile:\n", "                ftp.retrbinary('RETR ' + i, localfile.write, 1024)\n", "        ftp.quit()\n", "        j[2].put(j[0])\n", "        L.release()\n", "        return 0\n", "\n", "    def fetchTarFromCIS(self, tarfile, shp_for_UC3, CISRaster, prglabel, pr, pStatus):\n", "        ''' This function manage the file retrieval from the CIS and then call to another function to do the vector\n", "        shapefiles into rasters. '''\n", "\n", "        if not os.path.exists(shp_for_UC3):\n", "            os.makedirs(shp_for_UC3)\n", "        '''For now, we assume all the data in the csv file were acquired in the same year (2016) and from\n", "        the same region (Eastern_Arctic) as the case of the GreenEdge data.'''\n", "        pool = mlp.Pool(processes=mlp.cpu_count()-2)\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        tarfilelist = [[i, shp_for_UC3, queue] for i in tarfile]\n", "        s = pool.map_async(self.binaryretrieve, tarfilelist)\n", "        ##\n", "        while True:\n", "            if s.ready():\n", "                break\n", "            else:\n", "                c1 = int(queue.qsize() * 100 / len(tarfilelist))\n", "                pr.value=c1\n", "                prglabel.value = 'Fetching files from the CIS server...'\n", "                pStatus.value = f'{pr.value}%'\n", "        ##\n", "        del pool, s, queue, m\n", "        # Start a new progressbar for the shapefile converstion\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        pr.value=0\n", "        prglabel.value = 'Converting shapefiles into raster files...'\n", "        pStatus.value = f'{pr.value}%'\n", "        ###################################################################\n", "        for f in tarfile:\n", "            '''Here the *.shp file have already been extracted from the *.tar file, \n", "            so we only need to point to it as shp_filename'''\n", "            shutil.unpack_archive(os.path.join(shp_for_UC3, f), shp_for_UC3, f[-3:])\n", "            shp_filename = os.path.join(shp_for_UC3, f[:-4] + '.shp')\n", "            outraster = os.path.join(CISRaster, f[:-4] + '.tif')\n", "            if not os.path.exists(outraster):\n", "                self.makeRasterFromSHP(shp_filename, outraster, 100)\n", "            queue.put(1)\n", "            c1 = int(queue.qsize() * 100 / len(tarfilelist))\n", "            pr.value=c1\n", "            pStatus.value = f'{pr.value}%'\n", "        del m, queue\n", "        return 0\n", "\n", "\n", "    def makeRasterFromSHP(self, shp_filename, outraster, pxlsize):\n", "        ''' This function handle the transformation of the vector shapefile format into rasters format.\n", "        The ogr python binding package is used to read the vector shepefile before their transfmation into raster.\n", "         Here only the Field CT holding the Sea Ice Concentration data are rasterized.\n", "         If needed, other Field amoung all of those included in the shapefile can be added as an additional band\n", "         into the output raster. '''\n", "        shpfile = ogr.Open(shp_filename)\n", "        layer = shpfile.GetLayer()\n", "        xmin, xmax, ymin, ymax = layer.GetExtent()\n", "        cols = int((xmax - xmin) / pxlsize)\n", "        rows = int((ymax - ymin) / pxlsize)\n", "        rdrive = gdal.GetDriverByName('GTiff')\n", "        ds = rdrive.Create(outraster, cols, rows, 1, gdal.GDT_Byte)\n", "        ds.SetGeoTransform([xmin, pxlsize, 0, ymax, 0, -pxlsize])\n", "        gdal.RasterizeLayer(ds, [1], layer, options=['ATTRIBUTE=CT'])\n", "        ds.SetProjection(layer.GetSpatialRef().ExportToPrettyWkt())\n", "        ds.GetRasterBand(1).SetNoDataValue(0)\n", "        del ds, rdrive, shpfile, layer\n", "        return 0\n", "\n", "    def pointExtract(self, src_filename, dlat, dlon):\n", "        ''' The extraction process is handled inside this function.\n", "        The GDAL python binding package is used here to read the raster files needed for the extraction.\n", "        The pyproj package is used to bring the geographical coordinates from the input csv to match the spatial\n", "         coordinate system of the rasters in order to make the extraction of the right collocated pixel with the csv data. '''\n", "        src_ds = gdal.Open(src_filename)\n", "        gt = src_ds.GetGeoTransform()\n", "        band = src_ds.GetRasterBand(1)\n", "        proj = osr.SpatialReference(wkt=src_ds.GetProjection())\n", "        #####\n", "        wgs84 = pyproj.CRS(\"EPSG:4326\")\n", "        rstProj = pyproj.CRS(proj.ExportToProj4())\n", "        #####\n", "        point = ogr.Geometry(ogr.wkbPoint)\n", "        point.AddPoint(float(dlat), float(dlon))  # to make sure the corrdinates are not in string format\n", "        mx, my = pyproj.Transformer.from_proj(wgs84, rstProj).transform(point.GetX(), point.GetY())\n", "        px = int((mx - gt[0]) / gt[1])  # x pixel\n", "        py = int((my - gt[3]) / gt[5])  # y pixel\n", "        intval = band.ReadAsArray(px, py, 1, 1)\n", "        del band, src_ds, point, proj, gt, mx, my, px, py\n", "        return intval\n", "\n", "    def getCISTarFileList(self, dlist, prglabel, pr, pStatus):\n", "        ''' This function extract the file list needed for the extraction. It uses the acquisition year\n", "        in the input csv file to locate the same year used in the FTP data endpoint in order to select the\n", "        files to be downloaded. '''\n", "        dl = pd.to_datetime(dlist)\n", "        csv_year = [datetime.strftime(s, '%Y') for s in dl]\n", "        csv_year = np.unique(csv_year)\n", "        fcis_gen = []\n", "        fcis = []\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        for y in csv_year:\n", "            hostname = 'sidads.colorado.edu'\n", "            ftp = FTP(hostname)\n", "            ftp.login(user='anonymous', passwd='')\n", "            ''' There is more regions to be considered. Here the region Easter_Arctic is directly selected. '''\n", "            # TODO: Find a way to make automatic selection of the region of interest regarding the spatial extent of coordinate in the csv file used\n", "            ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/{0}/'.format(y))\n", "            files = ftp.nlst()  # This extract all the files within the folder named with the YEAR in.\n", "            ftp.quit()\n", "            fcis_gen.append([i for i in files if i.__contains__('cis')])\n", "        for i in fcis_gen:\n", "            fcis = fcis + i\n", "            queue.put(i)\n", "            p = int(queue.qsize()*100/len(fcis_gen))\n", "            pr.value=p\n", "            prglabel.value = 'Building file list...'\n", "            pStatus.value = f'{pr.value}%'\n", "        del fcis_gen, m, queue\n", "        return fcis\n", "\n", "    def ExtractionPixelValues(self, param_to_extract, q, rstsource, prglabel, pr, pStatus):\n", "        ''' * inputfile: Comma Separated Value (CSV) file with a header containning Date, Latitude, and Longitude.\n", "            * param_to_extract: name of the parameter to be extracted from the raster. This will become the name of\n", "            the new column that will be added to the initial csv file. Here it is about Sea_Ice_Concentration.\n", "            The format of the Date in the csv file should be mm/dd/yyyy.\n", "            The Latitude and Longitude Should be in full decimal format and their values are in the range [-180, 180].\n", "\n", "            * The Extraction Process is don using parallel computing to accelerate the process. Parallel processing is\n", "            very usefull here as the Extraction of the data coresponding to each rows are independent from each other.\n", "        '''\n", "\n", "        outfile = os.path.join(os.path.split(self.inputfile)[0],\n", "                               os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}.csv'.format(param_to_extract,\n", "                                                                                             rstsource))\n", "        if os.path.exists(outfile):\n", "            os.remove(outfile)\n", "        df = pd.read_csv(self.inputfile, header=0, sep=',', parse_dates=True, skiprows=[1])\n", "        time_header=[i for i in df.columns if\n", "                     ((i.__contains__('Date')) or (i.__contains__('date')) or\n", "                      (i.__contains__('Time')) or (i.__contains__('time')))].pop()\n", "        if rstsource == 'BU':\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], q, queue, i] for i in df.index]\n", "            if q == 'Y':\n", "                if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters')):\n", "                    os.makedirs(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'))\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            s = pool.map_async(self.extractFromUniBremenAMSR2, p)\n", "            ##\n", "            while True:\n", "                if s.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1 \n", "                    pStatus.value = f'{pr.value}%'\n", "            print(queue.qsize())\n", "            ##\n", "            a = np.array(s.get())\n", "            del s\n", "            b = np.transpose(a)\n", "            df[param_to_extract] = b[0]\n", "            df[time_header] = b[1]\n", "            del pool, m, queue\n", "        else:\n", "            CISRaster = os.path.join(os.path.dirname(self.inputfile), 'CISraster')\n", "            if not os.path.exists(CISRaster):\n", "                os.makedirs(CISRaster)\n", "            ''' Retrieval from the CIS server of filename list corresponding to each dataset in the csv file'''\n", "            fcis = self.getCISTarFileList(df[time_header], prglabel, pr, pStatus)\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            pr.value=0\n", "            ''' Selection of each shapefile with a closest acquisition time to each dataset of the csv file. '''\n", "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            imglist = pool.map_async(self.selectCISFiles, p)\n", "            prglabel.value = 'Selecting CIS file...'\n", "            while True:\n", "                if imglist.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1\n", "                    pStatus.value = f'{pr.value}%'\n", "            imgarray0 = np.array(imglist.get())\n", "            imgarray1 = np.unique(imgarray0)\n", "            imglist0 = list(imgarray1)\n", "            CIS_shp = os.path.join(os.path.dirname(self.inputfile), 'CIS_shp')\n", "            if not os.path.exists(CIS_shp):\n", "                os.makedirs(CIS_shp)\n", "            del imgarray0, imgarray1, pool, m, queue\n", "            '''Fetching the files from the remote server'''\n", "            self.fetchTarFromCIS(imglist0, CIS_shp, CISRaster, prglabel, pr, pStatus)\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n", "            s = pool.map_async(self.extractFromCSI, p)\n", "            ##\n", "            while True:\n", "                if s.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1\n", "                    prglabel.value = 'Extracting SIC from newly created rasters...'\n", "                    pStatus.value = f'{pr.value}%'\n", "            del imglist0\n", "            ##\n", "            a = np.array(s.get())\n", "            del pool, m, queue\n", "            b = np.transpose(a)\n", "            df[param_to_extract] = b[0]\n", "            df['CIS_dates'] = b[1]\n", "            del s\n", "            if q == 'N':\n", "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CISraster'))\n", "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CIS_shp'))\n", "        df.to_csv(outfile,\n", "                  sep=',', index=False, header=1)\n", "        output_shp = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_SHP'.format(param_to_extract, rstsource) # Name of the output shapefile\n", "        self.createShapefile(df, output_shp, time_header, prglabel)\n", "        prglabel.value = 'Processing Finished!!'\n", "        pStatus.value = f'{pr.value}%'\n", "        output_gpkg = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_gpkg'.format(param_to_extract, rstsource) # Name of the output Geopackage (gpkg) file\n", "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_gpkg)):\n", "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_gpkg))\n", "        gpkgfile = os.path.join(os.path.dirname(self.inputfile), output_gpkg, output_gpkg + '.gpkg')\n", "        os.system(f'ogr2ogr -a_srs EPSG:4326 -oo X_POSSIBLE_NAMES=Lon* -oo Y_POSSIBLE_NAMES=Lat*  -f \"GPKG\" {gpkgfile} {outfile}') # Creates the gpkg file from the shapefile. This can be created directly from the csv.\n", "        return output_shp\n", "\n", "    def getSeaIceSource(self, workDir, r, gLocalCopy, prglabel, pr, pStatus):\n", "        ''' The choice between the Sea Ice Data from the Canadian Sea Ice Service and Bremen University is handled\n", "         in this function. When the choice is done, another function is called to take care of the process of\n", "         extracting Sea Ice Concentration from the chosen data source. '''\n", "        if not os.path.exists(self.inputfile):\n", "            print(\"Input file not exists !\")\n", "            exit(-1)\n", "        if r == 1:\n", "            r = 'BU'\n", "        elif r == 2:\n", "            r = 'CIS'\n", "        else:\n", "            exit(-1)\n", "        if gLocalCopy == 'y':\n", "            gLocalCopy = 'Y'\n", "        elif gLocalCopy == 'n':\n", "            gLocalCopy = 'N'\n", "        t = datetime.now()\n", "        output_shp = self.ExtractionPixelValues('sea_ice_co', gLocalCopy, r, prglabel, pr, pStatus)\n", "        print(datetime.now() - t)\n", "        return output_shp\n", "\n", "class ccadi_uc3_mapping():\n", "    def __init__(self):\n", "        ## initiate the grid to display the contents of the page ###\n", "        self.gridwindow={}\n", "        self.vbox_widgets = []\n", "        self.gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "                \n", "        #####\n", "\n", "         # read text\n", "        f=open(\"md_texts/SeaIceConcentration.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_html1 = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['InfoSIC'] = widgets.HTML(text_html1)\n", "        self.vbox_widgets.append(self.gridwindow['InfoSIC'])\n", "        # Fields\n", "        self.wdField = widgets.Text(\n", "            value=os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"),\n", "            layout=widgets.Layout(width='max-content')\n", "        )\n", "\n", "        self.chkb1 = widgets.Checkbox(\n", "            value=False,\n", "            description='Canadian Ice Service',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.chkb2 = widgets.Checkbox(\n", "            value=False,\n", "            description='Bremen University',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.chkb3 = widgets.Checkbox(\n", "            value=False,\n", "            description='Keep a local copy of the raster images',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.gridwindow['checkbox'] = widgets.VBox(children=[self.chkb1, self.chkb2, self.chkb3])\n", "        self.vbox_widgets.append(self.gridwindow['checkbox'])\n", "        self.prg = widgets.IntProgress(\n", "            value=0,\n", "            min=0,\n", "            max=100,\n", "            bar_style='success',\n", "            style={'bar_color': 'green'},\n", "            orientation='horizontal',\n", "            layout=widgets.Layout(width='800px')\n", "        )\n", "        self.status = widgets.Label(value=f'{self.prg.value}%', layout=widgets.Layout(width='max-content'))\n", "        self.prg_label = widgets.Label('', layout=widgets.Layout(width='max-content'))\n", "        self.vbox_widgets.append(self.prg_label)\n", "        self.gridwindow['progressbar'] = widgets.HBox(children=[self.prg, self.status])\n", "        self.vbox_widgets.append(self.gridwindow['progressbar'])\n", "        self.okButton = widgets.Button(description=\"OK\")\n", "        \n", "        self.okButton.on_click(self.clickOkbutton)\n", "        #####\n", "        \n", "        self.UC3_mapping()\n", "\n", "\n", "    def check_checkBox(self):\n", "        c = 0\n", "        value = ''\n", "        q = 'n'\n", "        if self.chkb1.value==True:\n", "            value = self.chkb1.description\n", "            c = 2\n", "        if self.chkb2.value==True:\n", "            value = self.chkb2.description\n", "            c = 1\n", "        if (self.chkb3.value==True):\n", "            q = 'y'\n", "        return c, q\n", "\n", "\n", "    def clickOkbutton(self, b):\n", "        self.prg.value=0\n", "        self.prg_label.value = 'Processing...'\n", "        workDir = os.path.dirname(self.wdField.value)\n", "        if not os.path.exists(workDir):\n", "            os.makedirs(workDir)\n", "        inputfile = self.wdField.value\n", "        r, q = self.check_checkBox()\n", "        output_shp = addSeaIceConcentration(inputfile).getSeaIceSource(workDir, r, q, self.prg_label, self.prg, self.status)\n", "        \n", "\n", "    def UC3_mapping(self): \n", "        def on_button_clicked(b):\n", "            showmap()\n", "\n", "        out=widgets.Output()\n", "        @out.capture()\n", "        def showmap():\n", "\n", "            workDir=os.path.join(\"2016_int_btl_csv\")\n", "            shp=os.path.join(workDir, \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP\", \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP.shp\")\n", "            data_full=gpd.read_file(shp)\n", "                        \n", "            # Create a Geo-id which is needed by the Folium (it needs to have a unique identifier for each row)\n", "            data_full['geoid'] = data_full.index.astype(str)\n", "\n", "#             dataf_0m=data.loc[np.round(data[\"sample_dep\"].values)==1]\n", "            dataf_10m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==10]\n", "            dataf_20m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==20]\n", "            dataf_30m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==30]\n", "            dataf_40m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==40]\n", "            dataf_50m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==50]\n", "            dataf_60m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==60]\n", "            dataf_70m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==70]\n", "            dataf_80m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==80]\n", "            dataf_90m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==90]\n", "            dataf_100m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==100]\n", "\n", "            ###\n", "\n", "            lonCent = (data_full.bounds.maxx + data_full.bounds.minx).mean()/2\n", "            latCent = (data_full.bounds.maxy + data_full.bounds.miny).mean()/2\n", "            # creating a map object\n", "            m = leafmap.folium.Map(location=(latCent,lonCent), projections=\"epsg3575\", zoom_start=6)\n", "            #rst = os.path.join(\"2016_int_btl_csv\",\"CISraster\",\"cis_SGRDREA_20160606T1800Z_pl_a.tif\")\n", "\n", "            ###\n", "            # Create the variable plot upon click on the stations on the map\n", "            def chart_func(df, st): #new function\n", "                chart_temp = alt.Chart(df).mark_line(color='red').transform_fold(\n", "                    fold=['CTDTmp90', 'sample_dep'], \n", "                    as_=['variable', 'value']).encode(\n", "                        x=alt.X('CTDTmp90:Q', \n", "                                axis=alt.Axis(title='Temperature (\u00b0C)', \n", "                                              titleColor='red'), \n", "                                scale=alt.Scale(domain=[df['CTDTmp90'].min(), \n", "                                                        df['CTDTmp90'].max()])),\n", "                        y=alt.Y('sample_dep:Q',\n", "                                axis=alt.Axis(title='Depth (m)'), \n", "                                scale=alt.Scale(reverse=True, \n", "                                                domain=[0, df['sample_dep'].max()])),\n", "                        color=alt.value('red')\n", "                )\n", "                chart_sal=alt.Chart(df).mark_line(color='green').transform_fold(\n", "                    fold=['P_sal_CTD', 'sample_dep'], \n", "                    as_=['variable', 'value']).encode(\n", "                        x=alt.X('P_sal_CTD:Q', \n", "                                axis=alt.Axis(title='Salinity', \n", "                                              titleColor='green'), \n", "                                scale=alt.Scale(domain=[df['P_sal_CTD'].min(), \n", "                                                        df['P_sal_CTD'].max()])),\n", "                        y=alt.Y('sample_dep:Q', \n", "                                axis=alt.Axis(title='Depth (m)'), \n", "                                scale=alt.Scale(reverse=True, \n", "                                                domain=[0, df['sample_dep'].max()])),\n", "                        color=alt.value('green')\n", "                )\n", "                ufchart=alt.layer(chart_temp, chart_sal, \n", "                                  title=f\"Vertical profil of Salinity and Temperature at Station: {st}\", \n", "                                  width=400, height=400).resolve_scale(x='independent').configure_axisTop(titleColor='green').configure_axisBottom(titleColor='red').resolve_legend(color='independent')            \n", "                return ufchart.to_json()\n", "            \n", "            # extract unique coordinates\n", "            data_full=data_full.round({'latitude':3, 'longitude':3})\n", "            df=data_full[['latitude', 'longitude']].drop_duplicates() # drop all duplicated coordinates and keep the row indexes\n", "            u=[]\n", "            for i in df.index:  # use the indexes (kept in the precedent lines) to build a new dataframe from df\n", "                u.append(data_full.values[i])\n", "            dg=pd.DataFrame(u, columns=data_full.columns)\n", "            data_coord=dg[['station', 'latitude', 'longitude']]\n", "            del dg\n", "            full_profile = leafmap.folium.FeatureGroup(name=\"Full profiles\")\n", "            for i, st in zip(df.index, data_coord['station'].values[:]):\n", "                ds0=data_full[\n", "                    [\n", "                        'sample_dep',\n", "                        'P_sal_CTD', \n", "                        'station', \n", "                        'CTDTmp90', \n", "                        'latitude', \n", "                        'longitude']\n", "                ].loc[\n", "                    data_full[\"station\"].values==st\n", "                ]\n", "                ds2=ds0.dropna().round({\n", "                    \"CTDTmp90\":2, \n", "                    \"P_sal_CTD\":2, \n", "                    'latitude': 3, \n", "                    'longitude':3})\n", "                chart=chart_func(ds0, st)\n", "                pp=leafmap.folium.Popup(max_width=600).add_child(leafmap.folium.VegaLite(chart, width=600))\n", "                full_profile.add_child(leafmap.folium.CircleMarker(\n", "                    location=[data_full['latitude'].values[i], data_full['longitude'].values[i]], radius=6,\n", "                    popup=pp,\n", "                ))\n", "            full_profile.add_to(m)\n", "            \n", "            # Select only needed columns           \n", "            data_10m = dataf_10m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 10m\n", "            leafmap.folium.features.GeoJson(dataf_10m,\n", "                                            name='Data at 10m depth',\n", "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "\n", "            # Select only needed columns\n", "            data_20m = dataf_20m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 20m\n", "            leafmap.folium.features.GeoJson(dataf_20m,\n", "                                            name='Data at 20m depth',\n", "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "            \n", "            # Select only needed columns\n", "            data_30m = dataf_30m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 30m\n", "            leafmap.folium.features.GeoJson(dataf_30m,\n", "                                            name='Data at 30m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "            # Select only needed columns\n", "            data_40m = dataf_40m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 40m\n", "            leafmap.folium.features.GeoJson(dataf_40m,\n", "                                            name='Data at 40m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "#           # Select only needed columns\n", "            data_50m = dataf_50m[['geoid', 'P_sal_CTD', 'station', \n", "                                  'sample_dep', 'CTDTmp90', 'geometry', \n", "                                  'latitude', 'longitude']]\n", "    \n", "            # Add data near the sea surface: 50m\n", "            leafmap.folium.features.GeoJson(dataf_50m,\n", "                                            name='Data at 50m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "#             ######################################################################################################################\n", "\n", "            leafmap.folium.LayerControl().add_to(m)\n", "            display(m)\n", "        \n", "        self.showmap_button=widgets.Button(\n", "                description='Show Map',\n", "                disabled=False,\n", "                button_style='', \n", "                tooltip='Click me',\n", "                icon=''\n", "                )\n", "        self.gridwindow['ok_and_continue'] = widgets.HBox(children=[self.okButton, self.showmap_button])\n", "        self.vbox_widgets.append(self.gridwindow['ok_and_continue'])\n", "        \n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  #\n", "        \n", "        self.accordion0 = widgets.Accordion(\n", "            children=[widgets.HBox(children = [self.gridwindow['grid'][0, 0]])]\n", "        )\n", "        self.accordion0.set_title(0, 'Adding Sea Ice Concentrations into the combined BTL_Nutrient file.')\n", "        display(self.accordion0)\n", "\n", "        self.showmap_button.on_click(on_button_clicked)\n", "        display(out)\n", "    \n", "\n", "\n", "    "]}, {"cell_type": "code", "execution_count": 9, "id": "3c73708d-556c-49d8-add3-61098022648b", "metadata": {}, "outputs": [{"data": {"text/markdown": ["<h1><b>Merging bottle file with the nutrient file</b></h1>"], "text/plain": ["<IPython.core.display.Markdown object>"]}, "metadata": {}, "output_type": "display_data"}, {"ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'md_texts/nutrient_btl_infos.md'", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)", "Cell \u001b[0;32mIn [9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmerging_gui_jupiter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn [2], line 96\u001b[0m, in \u001b[0;36mmerging_gui_jupiter.__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     94\u001b[0m printmd(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<h1><b>Merging bottle file with the nutrient file</b></h1>\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     95\u001b[0m \u001b[38;5;66;03m# read text\u001b[39;00m\n\u001b[0;32m---> 96\u001b[0m f\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmd_texts/nutrient_btl_infos.md\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     97\u001b[0m fc\u001b[38;5;241m=\u001b[39mf\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m     98\u001b[0m f\u001b[38;5;241m.\u001b[39mclose()\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'md_texts/nutrient_btl_infos.md'"]}], "source": ["merging_gui_jupiter()"]}], "metadata": {"kernelspec": {"display_name": "Python [conda env:edc-default-2022.10-14]", "language": "python", "name": "conda-env-edc-default-2022.10-14-py"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "properties": {"id": "c250de70-680e-43e1-a62f-07f420e9a180", "license": null, "name": "UC3 Ocean Acidification Notebook", "requirements": [], "tags": ["Jupyter", "Polar"], "tosAgree": true, "type": "Jupyter Notebook", "version": "0.0.1", "description": "Ocean acidification in Baffin Bay", "authors": [{"id": "39a21097-0c47-4efa-afb8-21161b20e5ff", "name": "yanique.campbell@umanitoba.ca"}]}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file

From cd5f3e20f91bbeaf1bf361ca157872c2d9dca17d Mon Sep 17 00:00:00 2001
From: eurodatacube-submissions
 <61697821+eurodatacube-submissions@users.noreply.github.com>
Date: Tue, 25 Oct 2022 17:13:49 +0200
Subject: [PATCH 2/2] Add executed notebook under
 notebooks/contributions/test_ccadi_UC3.ipynb [skip ci]

---
 notebooks/contributions/test_ccadi_UC3.ipynb | 2096 +++++++++++++++++-
 1 file changed, 2095 insertions(+), 1 deletion(-)

diff --git a/notebooks/contributions/test_ccadi_UC3.ipynb b/notebooks/contributions/test_ccadi_UC3.ipynb
index 81f80eb7..19efa836 100644
--- a/notebooks/contributions/test_ccadi_UC3.ipynb
+++ b/notebooks/contributions/test_ccadi_UC3.ipynb
@@ -1 +1,2095 @@
-{"cells": [{"cell_type": "code", "execution_count": 1, "id": "d3e94585-e44b-443f-bbd5-07dba42214e8", "metadata": {}, "outputs": [], "source": ["import urllib.request as request\n", "import h5py\n", "import os\n", "from ipywidgets import widgets, IntSlider, jslink, interact, interactive, fixed, interact_manual\n", "import markdown\n", "from erddapy import ERDDAP\n", "from ipyleaflet import Map, Marker, GeoData, ImageOverlay, basemaps, basemap_to_tiles, LayersControl, ScaleControl, FullScreenControl, WidgetControl\n", "# import pandas as pd\n", "import numpy as np\n", "# from IPython.display import display\n", "from netCDF4 import num2date\n", "# from datetime import datetime\n", "import geopandas as gpd\n", "########################################################\n", "import sys\n", "import pandas as pd\n", "import ipywidgets as widgets\n", "from IPython.display import Markdown, HTML, Javascript, display, Image\n", "import subprocess\n", "import csv\n", "from __future__ import print_function\n", "# from ipywidgets import interact, interactive, fixed, interact_manual\n", "import csv\n", "import re\n", "import warnings\n", "#from init import *\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)\n", "def printmd(string):\n", "    display(Markdown(string))\n", "###############################################################\n", "from ftplib import FTP\n", "import multiprocessing as mlp\n", "import shutil\n", "import tempfile\n", "import urllib.request\n", "from datetime import datetime, timedelta\n", "\n", "from osgeo import gdal\n", "# import numpy as np\n", "from osgeo import ogr\n", "from osgeo import osr\n", "# import pandas as pd\n", "import pyproj\n", "\n", "from multiprocessing import Manager\n", "# from ipywidgets import widgets, IntSlider, jslink\n", "from ipyleaflet import Map, projections, GeoData, basemap_to_tiles, basemaps, WidgetControl, ScaleControl, FullScreenControl, LayersControl #, ImageOverlay, \n", "import geopandas as gpd\n", "import leafmap\n", "import altair as alt\n", "# import localtileserver  # was needed localy to be able to add raster on the leafmap Map."]}, {"cell_type": "code", "execution_count": 2, "id": "ee1fb199-9539-48f5-a84c-4f393d963bdc", "metadata": {}, "outputs": [], "source": ["class merge_btl_nutrient:\n", "    \n", "    def get_btlfile(self, btl_url, wdir):\n", "        request.urlretrieve(btl_url, os.path.join(wdir, \"btl.h5\"))\n", "        return 0\n", "\n", "    def btl_to_dataframe(self, wdir):\n", "        f = h5py.File(os.path.join(wdir, \"btl.h5\"), 'r')\n", "        df_btl = pd.DataFrame()\n", "        for grp in f:\n", "            tempo_list = []\n", "            tempo_columns = []\n", "            for c in f[grp]:\n", "                tempo_columns.append(c)\n", "                tempo_list.append(f[grp][c])\n", "            list_array = np.transpose(np.array(tempo_list))\n", "            tempo_df = pd.DataFrame(list_array, columns=tempo_columns)\n", "            tempo_df['station'] = [f[grp].attrs['Station'].strip().split(' ')[-1]] * len(tempo_df)\n", "            tempo_df['cast'] = [int(f[grp].attrs['Cast_Number'].strip())] * len(tempo_df)\n", "            df_btl = pd.DataFrame.append(tempo_df, df_btl)\n", "        f.close()\n", "#         # added to extract a csv format of the btl.h5 data to send out to the GUI team\n", "#         df_btl.to_csv(os.path.join(wdir, \"btl.csv\"), header=1, index=0)\n", "        #######\n", "        return df_btl\n", "\n", "    def merge(self, df_nutrient, df_btl, file_ge_btl):\n", "        ge_time_header = [i for i in list(df_nutrient.columns) if\n", "                          ((i.lower().__contains__('date')) or \n", "                           (i.lower().__contains__('time')))].pop()\n", "        btl_time_header = [i for i in list(df_btl.columns) if\n", "                           ((i.lower().__contains__('date')) or \n", "                           (i.lower().__contains__('time')))].pop()\n", "        ge_station_header = [i for i in list(df_nutrient.columns) if\n", "                             i.lower().__contains__('station')].pop()\n", "        btl_station_header = [i for i in list(df_btl.columns) if\n", "                              i.lower().__contains__('station')].pop()\n", "        ge_bottle_header = [i for i in list(df_nutrient.columns) if\n", "                            (i.lower().__contains__('bottle'))].pop()\n", "        \"\"\" TODO: bopo should be replaced with RosPos when the data with the corrected variable name will be served on Hyrax.\"\"\"\n", "        btl_bottle_header = [i for i in list(df_btl.columns) if\n", "                             i.lower().__contains__('bopo')].pop() \n", "        ge_cast_header = [i for i in list(df_nutrient.columns) if\n", "                          i.lower().__contains__('cast')].pop()\n", "        btl_cast_header = [i for i in list(df_btl.columns) if\n", "                           i.lower().__contains__('cast')].pop()\n", "        ge_jointField = [ge_time_header, ge_station_header, ge_bottle_header, ge_cast_header]\n", "        btl_jointField = [btl_time_header, btl_station_header, btl_bottle_header, btl_cast_header]\n", "        \n", "        #####################################\n", "        df_nutrient_header = [h.split(' ')[0] for h in df_nutrient.columns]\n", "        df_nutrient.columns = df_nutrient_header\n", "        dfnutrient_to_merge = df_nutrient\n", "        dfbtl_to_merge = df_btl\n", "        dfbtl_to_merge = dfbtl_to_merge.drop(btl_time_header, axis=1)\n", "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_time_header, axis=1)\n", "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_station_header, axis=1)\n", "        dfnutrient_to_merge[ge_time_header] = pd.to_datetime(df_nutrient[ge_time_header]).dt.strftime('%Y-%m-%d')\n", "        # dfnutrient_to_merge[ge_time_header] = df_nutrient[ge_time_header].dt.strftime('%Y-%m-%d')\n", "        u = []\n", "        for i in df_nutrient[ge_station_header].values:\n", "            if i.isdigit():\n", "                u.append('G' + i)\n", "            else:\n", "                u.append(i)\n", "        dfnutrient_to_merge[ge_station_header] = u\n", "        hdf_time_units = \"days since 1970-01-01 00:00:00\"\n", "        list_tmp = []\n", "        import cftime\n", "        for i in range(len(df_btl[btl_time_header])):\n", "            u=num2date(df_btl[btl_time_header].values[i], hdf_time_units)\n", "            u=cftime.DatetimeGregorian.strftime(u, '%Y-%m-%d')\n", "            list_tmp.append(u)\n", "        dfbtl_to_merge[btl_time_header] = list_tmp\n", "        '''https://www.datasciencemadesimple.com/join-merge-data-frames-pandas-python/'''\n", "        df = pd.merge(dfnutrient_to_merge, dfbtl_to_merge, how=\"inner\", left_on=ge_jointField, right_on=btl_jointField)\n", "        df.to_csv(file_ge_btl, header=1, index=0)\n", "        del df\n", "        return 0\n", "\n", "\n", "class merging_gui_jupiter():\n", "    # This is the class where the GUI is made\n", "    def __init__(self):\n", "        self.gridwindow={} # making an empty grid window\n", "        self.vbox_widgets = [] # making an empty vertical box\n", "        self.gridwindow['grid'] = widgets.GridspecLayout(1, 1)\n", "        #####\n", "        self.getBTLbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n", "        self.getNutrientbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n", "        self.Continuebutton = widgets.Button(description=\"Continue\", layout=widgets.Layout(width='max-content'))\n", "        # BTL file retrieval\n", "        layout = widgets.Layout(height='auto', width='125px')\n", "        printmd('<h1><b>Merging bottle file with the nutrient file</b></h1>')\n", "        # read text\n", "        f=open(\"md_texts/nutrient_btl_infos.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_html1 = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['text1'] = widgets.HTML(text_html1)\n", "#         # Reading the images of the CTD-Rosette ##########\n", "        ctd_img = open(\"images/ctd-rosette.jpg\", \"rb\")\n", "        ship_img = open(\"images/CCGSAmundsen.png\", 'rb')\n", "        ctd = ctd_img.read()\n", "        ship = ship_img.read()\n", "        gridimage1 = widgets.Image(value=ship, format='jpg', width=300)\n", "        Figure1 = widgets.Label(r'\\(\\textbf{Figure 1:}\\)'+' Canadian Coast Guard Ship ' + r'\\(\\textit{Amundsen}\\)', layout=widgets.Layout(height='auto', width='auto'))\n", "        gridimage2 = widgets.Image(value=ctd, format='png', width=300)\n", "        Figure2 = widgets.Label(r'\\(\\textbf{Figure 2:}\\)'+' CTD-Rosette', layout=widgets.Layout(height='auto', width='auto'))\n", "        ship_img.close()\n", "        ctd_img.close()\n", "        image_vbox1 = widgets.VBox(children=[gridimage1, Figure1])\n", "        image_vbox2 = widgets.VBox(children=[gridimage2, Figure2])\n", "        self.gridwindow['image'] = widgets.HBox(children=[image_vbox1, image_vbox2])\n", "# #         image_vbox = [[gridimage1, gridimage2], ['CCGS Amundsen', 'CTD-Rosette']]\n", "#         gridwindow['image'] = widgets.HBox(children=[gridimage2, gridimage1])\n", "        ###################################################\n", "        self.vbox_widgets.append(self.gridwindow['text1'])\n", "        self.vbox_widgets.append(self.gridwindow['image'])\n", "        \n", "                # read text\n", "        f=open(\"md_texts/data_retrieval.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        data_retrieval = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['data_retrieval'] = widgets.HTML(data_retrieval)\n", "        self.vbox_widgets.append(self.gridwindow['data_retrieval'])\n", "        ## Bottle file retrieval ######\n", "        label = widgets.Label('Bottle files', layout=layout)\n", "        self.BottleData = widgets.Text(\n", "            value=\"http://jorvik.uwaterloo.ca:8080/opendap/data/CCADI/Amundsen_BTL_GreenEdge2016_LEG1.h5\",\n", "            layout=widgets.Layout(width='50%')\n", "        )\n", "        self.gridwindow['bottle'] = widgets.HBox(children=[label, self.BottleData, self.getBTLbutton])\n", "        self.vbox_widgets.append(self.gridwindow['bottle'])\n", "        label = widgets.Label('Nutrient file', layout=layout)\n", "        self.nutrientServer = widgets.Text(\n", "            value=\"https://CanWINerddap.ad.umanitoba.ca/erddap\",\n", "            layout=widgets.Layout(width='50%')\n", "        )\n", "        self.gridwindow['nutrientserver'] = widgets.HBox(children=[label, self.nutrientServer, self.getNutrientbutton])\n", "        self.vbox_widgets.append(self.gridwindow['nutrientserver'])\n", "        \n", "        self.list0 = widgets.SelectMultiple(\n", "            options=[\"Empty\"],\n", "            value=[\"Empty\"],\n", "            disabled=False\n", "        )\n", "\n", "        self.list1 = widgets.SelectMultiple(\n", "            options=[\"Empty\"],\n", "            value=[\"Empty\"],\n", "            disabled=False\n", "        )\n", "        \n", "        self.depthRange = widgets.FloatRangeSlider(\n", "                value=[0, 0],\n", "                min=0,\n", "                max=5000,\n", "                step=0.1,\n", "                disabled=False,\n", "                continuous_update=False,\n", "                orientation='horizontal',\n", "                readout=True,\n", "                readout_format='.1f',\n", "        )\n", "        \n", "        self.outputdir = \"2016_int_btl_csv\"\n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  # pass all the content of the vertical box into the left side of the grid\n", "        \n", "        self.getBTLbutton.on_click(self.getBTLdata)\n", "        self.getNutrientbutton.on_click(self.getNutrientdata)\n", "        display(self.gridwindow['grid'])\n", "        self.Merge_Button=widgets.Button(\n", "            description='Merge',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        self.merge_btl_nutrient()\n", "        \n", "        \n", "        ###########\n", "             \n", "    def continue_to_pyco2sys(self): \n", "        def on_button_pyco2sys(b):\n", "            continueprocess()\n", "\n", "        outmerge=widgets.Output()\n", "        @outmerge.capture()\n", "        def continueprocess():\n", "            checkInputfile()\n", "#             self.continue_to_sic()\n", "        \n", "        gridwindow={}\n", "        vbox_widgets = []\n", "        gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "        layout = widgets.Layout(height='auto', width='125px')\n", "        f=open(\"md_texts/variable_Join_list.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var_sel = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['variable_selection'] = widgets.HTML(text_var_sel)\n", "        vbox_widgets.append(gridwindow['variable_selection'])\n", "        label = widgets.Label('Bottle_variables', layout=widgets.Layout(width='50%'))\n", "        label = widgets.Label('Nutrient variables', layout=widgets.Layout(width='50%'))\n", "        gridwindow['bottle variable list'] = widgets.HBox(children=[label, self.list0])\n", "\n", "        gridwindow['nutrient variable list'] = widgets.HBox(children=[label, self.list1])\n", "        gridwindow['var_list'] = widgets.HBox(children=[gridwindow['bottle variable list'], gridwindow['nutrient variable list']])\n", "        vbox_widgets.append(gridwindow['var_list'])\n", "        label = widgets.Label('Sample depth:', layout=layout)\n", "        # read text\n", "        f=open(\"md_texts/variable_meaning.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['variable_meaning'] = widgets.HTML(text_var)\n", "        # read text\n", "        f=open(\"md_texts/sample_depth_range.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_var_sel = markdown.markdown(fc)\n", "        del fc\n", "        gridwindow['sample_depth_range'] = widgets.HTML(text_var_sel)\n", "        vbox_widgets.append(gridwindow['sample_depth_range'])\n", "        gridwindow['Sample depth'] = widgets.HBox(children=[label, self.depthRange])\n", "        vbox_widgets.append(gridwindow['Sample depth'])\n", "\n", "        self.btl = pd.DataFrame()\n", "        self.nutrient=pd.DataFrame()\n", "        \n", "        continue_button1=widgets.Button(\n", "            description='Continue',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        gridwindow['merge'] = widgets.HBox(children=[self.Merge_Button])\n", "        gridwindow['to_pyco2sys'] = widgets.HBox(children=[gridwindow['merge'], continue_button1])\n", "        vbox_widgets.append(gridwindow['to_pyco2sys'])\n", "        gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n", "        self.Merge_Button.on_click(self.clickMerge)\n", "        continue_button1.on_click(on_button_pyco2sys)\n", "        display(gridwindow['grid'])\n", "        display(outmerge)\n", "        return 0       \n", "\n", "\n", "        \n", "    def merge_btl_nutrient(self): \n", "        def on_button_continuemerge(b):\n", "            continuemerge()\n", "\n", "        out=widgets.Output()\n", "        @out.capture()\n", "        def continuemerge():           \n", "            ###### transit to the PyCO2SYS #####\n", "            self.continue_to_pyco2sys()\n", "            \n", "\n", "        self.continue_button=widgets.Button(\n", "            description='continue',\n", "            disabled=False,\n", "            button_style='', \n", "            tooltip='Click me',\n", "            icon=''\n", "            )\n", "        self.gridwindow['continue'] = widgets.HBox(children=[self.continue_button])\n", "        self.vbox_widgets.append(self.gridwindow['continue'])\n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)\n", "        self.continue_button.on_click(on_button_continuemerge)\n", "        display(out)\n", "\n", "    def getBTLdata(self,a):\n", "        if not os.path.exists(self.outputdir):\n", "            os.makedirs(self.outputdir)\n", "        merge_btl_nutrient().get_btlfile(self.BottleData.value, self.outputdir)\n", "        df_btl=merge_btl_nutrient().btl_to_dataframe(self.outputdir)\n", "        self.list0.options=tuple(df_btl.columns)\n", "        self.list0.value=[\"BOPO\",\"CTDTmp90\",\"Cast_Number\",\"P_sal_CTD\",\"Pres_Z\",\"depth\",\"latitude\",\"longitude\",\"time\",\"station\"]\n", "        self.list0.rows = 24\n", "        self.depthRange.min = df_btl['depth'].min()\n", "        self.depthRange.max = df_btl['depth'].max()\n", "        self.depthRange.value = [self.depthRange.min, self.depthRange.max]\n", "        self.getBTLbutton.description=\"Success!\"\n", "        self.getBTLbutton.button_style='success'\n", "        del df_btl\n", "        return 0\n", "    \n", "    def getNutrientdata(self, a):\n", "        if not os.path.exists(self.outputdir):\n", "            os.makedirs(self.outputdir)\n", "        e_DataSearch = ERDDAP(server=self.nutrientServer.value)\n", "        result_search = e_DataSearch.get_search_url(search_for=\"greenedge\", response=\"csv\")\n", "        self.datasetID = [k \n", "                          for k in pd.read_csv(result_search)[\"Dataset ID\"] \n", "                          if k.lower().__contains__(\"greenedge_nutrient\")].pop()\n", "        #self.datasetID = pd.read_csv(result_search)[\"Dataset ID\"][0]\n", "        #print(result_search)\n", "\n", "        e_datafetch = ERDDAP(server=self.nutrientServer.value, protocol=\"tabledap\", response=\"csv\")\n", "        e_datafetch.dataset_id = self.datasetID\n", "\n", "        df_nutrient = e_datafetch.to_pandas(parse_dates=True)\n", "        file_ge = os.path.join(self.outputdir, f'{self.datasetID}.csv')  ## Nutrient file name \n", "        df_nutrient.to_csv(file_ge, index=False, header=True)\n", "        self.list1.options=df_nutrient.columns\n", "        \"\"\" Adjustment done in order to look easily for the variables needed in the data field\"\"\"\n", "        station = [k for k in df_nutrient.columns if k.lower().__contains__(\"station\")].pop()\n", "        sample_date = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_date\")].pop()\n", "        #sample_date = sample_date.split(\" \")[0]\n", "        sample_depth = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_depth\")].pop()\n", "        #sample_depth = sample_depth.split(\" \")[0]\n", "        cast = [k for k in df_nutrient.columns if k.lower().__contains__(\"cast\")].pop()\n", "        bottle = [k for k in df_nutrient.columns if k.lower().__contains__(\"bottle\")].pop()\n", "        dic_um = [k for k in df_nutrient.columns if k.lower().__contains__(\"dic_um\")].pop()\n", "        #dic_um = dic_um.split(\" \")[0]\n", "        totalk = [k for k in df_nutrient.columns if k.lower().__contains__(\"totalk_l_um\")].pop()\n", "        #totalk = totalk.split(\" \")[0]\n", "        self.list1.value=[station, sample_date, sample_depth, cast, bottle,dic_um, totalk]\n", "        self.list1.rows = 24\n", "        self.getNutrientbutton.description=\"Success!\"\n", "        self.getNutrientbutton.button_style='success'\n", "        del df_nutrient\n", "        return 0\n", "\n", "        \n", "    def clickMerge(self, a):\n", "        file_ge_btl = os.path.join(self.outputdir, 'merged_btl_nutrient.csv') ## Merged file name to be fed to the PyCO2SYS\n", "        objectsForMerging = merge_btl_nutrient()\n", "        if os.path.exists(file_ge_btl):\n", "            os.remove(file_ge_btl)\n", "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n", "            df_btl.reset_index(drop=True, inplace=True)\n", "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n", "            df_btl = df_btl.loc[cond]\n", "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n", "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n", "            del df_nutrient, df_btl\n", "        else:\n", "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n", "            df_btl.reset_index(drop=True, inplace=True)\n", "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n", "            df_btl = df_btl.loc[cond]\n", "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n", "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n", "            del df_nutrient, df_btl\n", "        self.Merge_Button.description=\"Done\"\n", "        self.Merge_Button.button_style=\"success\"\n", "        return 0\n", "\n"]}, {"cell_type": "code", "execution_count": 3, "id": "fde05a6e-c6d1-451e-96e2-055ef4fade3d", "metadata": {}, "outputs": [], "source": ["#Define the required parameters\n", "\n", "def checkInputfile():\n", "    #Create an empty list for all the required parameters in the file. \n", "    req_param_inFile=[]\n", "    opt_param_inFile=[]\n", "    \n", "    pd.set_option('display.max_columns', 50)\n", "    \n", "    #-------------------INPUT FILE-------------------------------------------\n", "    input_file =os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient.csv\")                \n", "    df = pd.read_csv(input_file)\n", "    df=df.reset_index(drop=True)\n", "    #------------------------------------------------------------------------\n", "    \n", "    #Checking for the stadardized names in the input file to automatically pull out all the required, optional and mandatory parameters present\n", "    \n", "    # 1. KEY PARAMETERS \n", "    # Parameters- Total Alkalinity, DIC, PH, PCO2, fCO2, CO232, biCO2\n", "    \n", "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2'] #BODC standardized names\n", "    fullNames=['Total alkalinity (umolkg1)','Dissolved inorganic carbon (umolkg1)','Partial pressure of carbon dioxide (pCO2) (uatm)',\\\n", "               'Fugacity of carbon dioxide (fCo2) (uatm)','Carbonate ion concentration (CO32) (umolkg1)','Bicarbonate ion (umol kg1)']   # Full name that will show up in widget\n", "    \n", "    \n", "    for name, fname in zip(standardizedNames, fullNames): \n", "        if name in df.columns:\n", "            req_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n", "        \n", "    \n", "    # 2. OPTIONAL PARAMETERS \n", "    # Parameters- SiOx, PO4, Ammonia, Sulfide\n", "    \n", "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide'] #BODC standardized names\n", "    fullNames=['Total Silicate (umolkg1)','Total Phosphate (SRP) (umolkg1)','Total Ammonia (umolkg1)','Total Sulfide (umolkg1)']   # Full name that will show up in widget\n", "    \n", "    for name, fname in zip(standardizedNames, fullNames): \n", "        if name in df.columns:\n", "            opt_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n", "    \n", "    \n", "    getUserParameters(df, req_param_inFile, opt_param_inFile) \n"]}, {"cell_type": "code", "execution_count": 4, "id": "85597719-a3eb-4f3f-928a-7eea387ba0c9", "metadata": {}, "outputs": [], "source": ["def getUserParameters(df,req_param_inFile,opt_param_inFile):\n", "\n", "    #The user will be presented with the parameters automatically pulled out from the input file. They will have a chance to make changes to the selections. \n", "    #Only exception is if there are only two req parameters in input file, they will not be able to make any changes/selctions in this case.\n", "    \n", "    #Key Parameters Widget\n", "    req_param_user=widgets.SelectMultiple(\n", "        options=req_param_inFile,\n", "        #value=req_param_inFile,\n", "        #description='Key Parameters:',\n", "        disabled=False,\n", "    )\n", "    req_param_user.layout.margin='0.5% 0% 5% 0%'\n", "    req_param_user.layout.width='20%'\n", "    req_param_user.layout.height='70%'\n", "\n", "\n", "    #Optional Parameters Widget\n", "    opt_param_user=widgets.SelectMultiple(\n", "        options=opt_param_inFile,\n", "        value=opt_param_inFile,\n", "        #description='Optional Parameters:',\n", "        disabled=False,\n", "    )\n", "    opt_param_user.layout.margin='0.5% 0% 3% 0%'\n", "    opt_param_user.layout.width='20%'\n", "\n", "    cont_button1=widgets.Button(\n", "    description='Continue',\n", "    disabled=False,\n", "    button_style='', \n", "    tooltip='Click me',\n", "    icon=''\n", "    )\n", "\n", "    \n", "    # Onclick function for the first Continue button widget\n", "    output = widgets.Output()\n", "    @output.capture()\n", "    def on_button_clicked(b):\n", "        getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile)\n", "    \n", "\n", "    # Key parameters, aka carbonate system parameters\n", "    printmd('### <br><br/> Carbonate System Parameters ###') \n", "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#carbonate-system-parameters).')\n", "    #if there is only one or no key parameters in input file\n", "    if len(req_param_inFile)<2:\n", "        printmd(\"<br>**There are not enough key parameters for calculation of the full carbonate system. Please check input file and try again.**<br />\")\n", "        sys.exit(-1)\n", "    \n", "    #If only two key parameters in the input file, automatically use those two\n", "    if len(req_param_inFile)==2:\n", "        printmd(\"<br>**The following key carbonate parameters were found in the input file and will be used in calculations.**<br />\")\n", "\n", "        for name in req_param_inFile:\n", "            printmd('- {}'.format(name))\n", " \n", "    #If there are more than two key parameters in the input file, ask user to select any two\n", "    if len(req_param_inFile)>2:\n", "        printmd(\"<br>**The following key carbonate parameters were found in the input file. Choose any two parameters.**<br />\")\n", "        display(req_param_user) #display widget\n", "\n", "\n", "    # Optional parameters, aka Nutrients & solutes\n", "    printmd('### <br><br/> Nutrients and other solutes ###') \n", "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#nutrients-and-other-solutes).')\n", "    \n", "    # If there is at least one opt parameter in file, display them and ask user to select any of them. All are automatically selected in the widget\n", "    if len(opt_param_inFile)>0:\n", "        printmd(\"<br>**The following nutrient parameters are in the input file. Choose any parameter(s).**</b>\")\n", "        display(opt_param_user)   #display widget\n", "    \n", "    \n", "    display(cont_button1) #display continue button\n", "    cont_button1.on_click(on_button_clicked)  #Call onclick function\n", "    display(output) #display widget ouput when button is clicked\n", "    "]}, {"cell_type": "code", "execution_count": 5, "id": "b305cb57-05ea-42b0-927b-ca9e7ea0547b", "metadata": {}, "outputs": [], "source": ["def getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile):    \n", "    \n", "    # Constants\n", "    printmd('### <br><br/> Settings for constants ###') \n", "    printmd('More information on these constants an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#settings). Default constants chosen based on [Jiang et al., 2022](https://www.frontiersin.org/articles/10.3389/fmars.2021.705638/full).')\n", "    #Widgets for the different constants\n", "    phstr = widgets.Output()\n", "    @phstr.capture()\n", "    def constStrings1():\n", "        printmd(\"<br>**Choose the pH scale:**\")\n", "        \n", "    constStrings1()\n", "\n", "    option_list=['1. Total',\n", "                 '2. Seawater',\n", "                 '3. Free',\n", "                 '4. NBS, i.e. relative to NBS/NIST reference standards']\n", "    \n", "    #PH Scale\n", "    phscale=widgets.RadioButtons(\n", "        options=option_list,  \n", "        disabled=False,\n", "        layout={'width': 'max-content'},\n", "    )\n", "    phscale.layout.margin='0.5% 1% 3% 0%'\n", "    #phscale.layout.width='40%' \n", "\n", "\n", "    # Carbonic Acid Dissociation\n", "    k1k2str = widgets.Output()\n", "    @k1k2str.capture()\n", "    def constStrings2():\n", "        printmd(\"**Choose the set of equilibrium constant parameterisations to model carbonic acid dissociation:**\")\n", "    constStrings2()\n", "\n", "    option_list=['1. RRV93 (0 < T < 45 \u00b0C, 5 < S < 45, Total scale, artificial seawater).',\n", "                '2. GP89 (\u22121 < T < 40 \u00b0C, 10 < S < 50, Seawater scale, artificial seawater).',\n", "                '3. H73a and H73b refit by DM87 (2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, artificial seawater).',\n", "                '4. MCHP73 refit by DM87 (2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, real seawater).',\n", "                '5. H73a, H73b and MCHP73 refit by DM87(2 < T < 35 \u00b0C, 20 < S < 40, Seawater scale, real seawater)',\n", "                '6. MCHP73 aka \"GEOSECS\" (2 < T < 35 \u00b0C, 19 < S < 43, NBS scale, real seawater).',\n", "                '7. MCHP73 without certain species aka \"Peng\" (2 < T < 35 \u00b0C, 19 < S < 43, NBS scale, real seawater).',\n", "                '8. M79 (0 < T < 50 \u00b0C, S = 0, freshwater only).',            \n", "                '9. CW98 (2 < T < 30 \u00b0C, 0 < S < 40, NBS scale, real estuarine seawater).',\n", "                '10. LDK00 (2 < T < 35 \u00b0C, 19 < S < 43, Total scale, real seawater).',\n", "                '11. MM02 (0 < T < 45 \u00b0C, 5 < S < 42, Seawater scale, real seawater).',\n", "                '12. MPL02 (\u22121.6 < T < 35 \u00b0C, 34 < S < 37, Seawater scale, field measurements).',\n", "                '13. MGH06 (0 < T < 50 \u00b0C, 1 < S < 50, Seawater scale, real seawater).',\n", "                '14. M10 (0 < T < 50 \u00b0C, 1 < S < 50, Seawater scale, real seawater).',\n", "                '15. WMW14 (0 < T < 45 \u00b0C, 0 < S < 45, Seawater scale, real seawater).',\n", "                '16. SLH20 (\u22121.67 < T < 31.80 \u00b0C, 30.73 < S < 37.57, Total scale, field measurements).',\n", "                '17. SB21 (15 < T < 35 \u00b0C, 19.6 < S < 41, Total scale, real seawater).']\n", "    \n", "    k1k2=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='10. LDK00 (2 < T < 35 \u00b0C, 19 < S < 43, Total scale, real seawater).',\n", "        #rows=len(option_list),\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    k1k2.layout.margin='0.5% 1% 3% 0%'\n", "    #k1k2.layout.width='50%'\n", "    #k1k2.layout.height='100%'\n", "\n", "    # Bisulfate ion dissociation \n", "    kso4str = widgets.Output()\n", "    @kso4str.capture()\n", "    def constStrings3():\n", "        printmd(\"**Choose the equilibrium constant parameterisations to model bisulfate ion dissociation:**\")\n", "    constStrings3()\n", "\n", "    option_list=['1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n", "                '2. KRCB77: Khoo et al. (1977) Anal. Chem.',\n", "                '3. WM13: Waters & Millero (2013) Mar. Chem./ WMW14: Waters et al. (2014) Mar. Chem.']\n", "    \n", "    kso4=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n", "        layout={'width': 'max-content'},\n", "        #description='Parameter:',\n", "        disabled=False,\n", "    )\n", "    kso4.layout.margin='0.5% 1% 3% 0%'\n", "    #kso4.layout.width='40%'\n", "\n", "    # Total borate \n", "    bostr = widgets.Output()\n", "    @bostr.capture()\n", "    def constStrings4():\n", "        printmd(\"**Choose which boron:salinity relationship to use to estimate total borate:**\")\n", "    constStrings4()\n", "\n", "    option_list=['1. U74: Uppstr\u00f6m (1974) DeepSea Res.',\n", "                '2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta']\n", "    \n", "    bo=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    bo.layout.margin='0.5% 1% 3% 0%'\n", "    # bo.layout.width='40%'\n", "    \n", "    # hydrogen fluoride dissociation\n", "    hfstr = widgets.Output()\n", "    @hfstr.capture()\n", "    def constStrings5():\n", "        printmd(\"**Choose which which equilibrium constant parameterisation to use for hydrogen fluoride dissociation:**\")\n", "    constStrings5()\n", "\n", "    option_list=['1. DR79: Dickson & Riley (1979) Mar. Chem.',\n", "                '2. PF87: Perez & Fraga (1987) Mar. Chem.']\n", "    \n", "    hf=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='2. PF87: Perez & Fraga (1987) Mar. Chem.',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    hf.layout.margin='0.5% 1% 3% 0%'\n", "    # hf.layout.width='40%' \n", "\n", "    # opt_gas_constant\n", "    gcstr = widgets.Output()\n", "    @gcstr.capture()\n", "    def constStrings6():\n", "        printmd(\"**Choose which value to use for the gas constant:**\")\n", "    constStrings6()\n", "    option_list=['1. DOEv2',\n", "                '2. DOEv3',\n", "                '3. 2018 CODATA']\n", "    \n", "    gc=widgets.RadioButtons(\n", "        options=option_list,    \n", "        value='3. 2018 CODATA',\n", "        layout={'width': 'max-content'},\n", "        disabled=False,\n", "    )\n", "    gc.layout.margin='0.5% 1% 3% 0%'\n", "    # gc.layout.width='40%' \n", "    \n", "    #Continue button On-click function\n", "    output = widgets.Output()\n", "    @output.capture()\n", "    def on_button_clicked(b):        \n", "        runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc)\n", "\n", "    # Button widget\n", "    button2=widgets.Button(\n", "    description='Continue',\n", "    disabled=False,\n", "    button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", "    tooltip='Click me',\n", "    icon=''\n", "    )\n", "\n", "    box2 = widgets.VBox([phstr,phscale, k1k2str, k1k2, kso4str, kso4, bostr, bo, hfstr, hf, gcstr,gc, button2])\n", "    display(box2)\n", "\n", "    button2.on_click(on_button_clicked)\n", "    display(output)"]}, {"cell_type": "code", "execution_count": 6, "id": "a5b6eb4f-eaac-4575-becd-701685897e6c", "metadata": {}, "outputs": [], "source": ["def runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc):\n", "\n", "\n", "    #Build the argument list for pyco2sys to ingest\n", "    #The argument names are defined in the PyCO2 sys documentation.\n", "    kwargs={}\n", "   \n", "\n", "    #KEY PARAMETERS\n", "    \n", "    # pyco2sys labels the two key parameters chosen as par 1 and par 2. 'par1check' checks if par1 has yet been assigned, otherwise it sets a key parameter to par 2\n", "    par1check=\"False\"\n", "    par2check=\"False\"\n", "\n", "    \n", "    #Check if the parameter was chosen by the user, or if it was automatically selected (only two parameters were in the file). In that case the variable would not be in 'req_param_user' from the widget.\n", "    #If the varibale is to be used in the calculation, then get the data from the input file (saved as the data frame df)\n", "    #The variable is then added to the arguments for pyco2sys\n", "    \n", "    substrings=['alkalinity','Dissolved inorganic carbon','pH','Partial pressure of carbon dioxide', 'Fugacity of carbon dioxide','Carbonate ion concentration', 'Bicarbonate Ion']\n", "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2']\n", "    types=[1,2,3,4,5,6,7]   #These are the different types according to pyco2sys documentation \n", "    \n", "    \n", "    for name, substr, t in zip(standardizedNames, substrings, types):           # Loop through the substrings and the standardized names  \n", "        if par1check==\"False\" or par2check==\"False\":                            #Check if either par1 or par2 is false. We need at least two key paramaters (par1 and par2 represent these two parameters)\n", "            if any(substr in string for string in req_param_inFile):            #If it is in the input file\n", "                if len(req_param_inFile)==2:                                    #If it is one of only two key variables in the input file (here it would not be in 'req_param_user.value' as it would be automatically selected- no widget used)\n", "   \n", "                    if par1check==\"True\":        \n", "                        kwargs['par2']=df[name].to_numpy(dtype=float)            #Get the data using the standardized name from the data frame (input file)\n", "                        kwargs['par2_type']=t\n", "                        par2check=\"True\"\n", "                    else:\n", "                        kwargs['par1']=df[name].to_numpy(dtype=float)             \n", "                        kwargs['par1_type']=t\n", "                        par1check=\"True\"\n", "\n", "                elif len(req_param_inFile)>2:                                    # If there are more than two key parameters in the input file\n", "                    if any(substr in string for string in req_param_user.value): #If there are more than two key variables in the file, then check if the user actually selected this varibale\n", "                        if par1check==\"True\":        \n", "                            kwargs['par2']=df[name].to_numpy(dtype=float)          \n", "                            kwargs['par2_type']=t\n", "                            par2check=\"True\"\n", "                        else:\n", "                            kwargs['par1']=df[name].to_numpy(dtype=float)          \n", "                            kwargs['par1_type']=t\n", "                            par1check=\"True\"\n", "\n", "\n", "\n", "                            \n", "    # OPTIONAL PARAMETERS\n", "    \n", "    substrings_opt=['Silicate','Phosphate','Ammonia','Sulfide' ]\n", "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide']\n", "    pyco2sysNames=['total_silicate','total_phosphate','total_phosphate','total_sulfide']  # Names that pyco2sys expects in the argument list\n", "    \n", "    \n", "    if len(opt_param_inFile)>0:  # if there is at least one optional paramter in the input file.        \n", "        for name, substr, pName in zip(standardizedNames, substrings_opt, pyco2sysNames):     \n", "            if any(substr in string for string in opt_param_user.value):  # If the optional parameter was chosen by the user\n", "                kwargs[pName]=df[name].to_numpy(dtype=float)              # Get the data using the standardized name\n", "        \n", "\n", "        \n", "    # MANDATORY  PARAMETERS\n", "\n", "    # Also check if the user wanted to use any other output temperature and pressure in the calculations.\n", "    # If they did not, the temp_out and press_out would be 'nan'\n", "    \n", "    #Set the output temperature and pressure to nan\n", "    Temperature_out=float('nan')\n", "    Pressure_out=float('nan')\n", "    \n", "    Temperature=df['CTDTmp90'].to_numpy(dtype=float)\n", "    kwargs['temperature']=Temperature\n", "    #if np.isnan(Temperature_out)==False:    \n", "    kwargs['temperature_out']=Temperature_out\n", "\n", "    Pressure=df['Pres_Z'].to_numpy(dtype=float)\n", "    kwargs['pressure']=Pressure   \n", "    #if np.isnan(Pressure_out)==False:    \n", "    kwargs['temperature_out']=Pressure_out   \n", "\n", "    Salinity=df['P_sal_CTD'].to_numpy(dtype=float)\n", "    kwargs['salinity']=Salinity   \n", "\n", "\n", "\n", "    #Get the values of the widget selections for the different CONSTANTS. All parameters are selected by default.\n", "    \n", "    k1k2Value=k1k2.value\n", "    kso4Value=kso4.value\n", "    boValue=bo.value\n", "    hfValue=hf.value\n", "    phscaleValue=phscale.value\n", "    gcValue=gc.value\n", "\n", "    kso4Value_temp=kso4Value[:2]\n", "    boValue_temp=boValue[:2]\n", "    hfValue_temp=hfValue[:2]\n", "    k1k2Value_temp=k1k2Value[:2]\n", "    phscaleValue_temp=phscaleValue[:2]\n", "    gcValue_temp=gcValue[:2]\n", "        \n", "    if '.' in kso4Value_temp:      \n", "        kso4Value_temp=kso4Value_temp[:1]\n", "    \n", "    if '.' in boValue_temp:       \n", "        boValue_temp=boValue_temp[:1]       \n", "\n", "    if '.' in hfValue_temp:       \n", "        hfValue_temp=hfValue_temp[:1]       \n", "        \n", "    if '.' in k1k2Value_temp:       \n", "        k1k2Value_temp=k1k2Value_temp[:1]\n", "              \n", "    if '.' in phscaleValue_temp:   \n", "        phscaleValue_temp=phscaleValue_temp[:1]\n", "        \n", "    if '.' in gcValue_temp:\n", "        gcValue_temp=gcValue_temp[:1]\n", "    \n", "    k1k2Value=int(k1k2Value_temp)\n", "    kso4Value=int(kso4Value_temp)\n", "    boValue=int(boValue_temp)\n", "    hfValue=int(hfValue_temp)\n", "    phscaleValue=int(phscaleValue_temp)\n", "    gcValue=int(gcValue_temp)\n", "        \n", "    #Add them as arguments for pyco2sys\n", "    kwargs['opt_k_carbonic']=k1k2Value\n", "    kwargs['opt_k_bisulfate']=kso4Value\n", "    kwargs['opt_total_borate']=boValue\n", "    kwargs['opt_k_fluoride']=hfValue\n", "    kwargs['opt_gas_constant']=gcValue\n", "\n", "\n", "    # Import PyCO2SYS\n", "    import PyCO2SYS as pyco2\n", "\n", "    # Run pyCO2SYS!\n", "    output_dict = pyco2.sys(**kwargs)\n", "\n", "    \n", "    Output(df,output_dict)"]}, {"cell_type": "code", "execution_count": 7, "id": "8fafc9c0-74e1-4966-9e73-1a9f4b9b56c1", "metadata": {}, "outputs": [], "source": ["def Output(df,output_dict):\n", "    \n", "#     ## Added to make the \"continue button\" to continue the process into the Sea Ice Concentration retrieval: \n", "    def on_button_sic(b):\n", "        continue_sic()\n", "\n", "    outsic=widgets.Output()\n", "    @outsic.capture()\n", "    def continue_sic():\n", "        ccadi_uc3_mapping() # sea ice concentration processing\n", "\n", "    ###########################################\n", "    \n", "    \n", "    #The dictionary output_dict is uneven. Some elements are just a single int, string or float. The length of 'par1' will always be the longest length (it holds the first of the two key parameters from the input file)\n", "    #For the values in the dictionary that are not single values, they are arrays. \n", "    #To be able to create even data frames, we need to seperate the values in the array and create a list.\n", "    #Otherise, the array is saved as one value for each key, instad of a list of multiple values.\n", "    \n", "    \n", "    longlength=len(output_dict['par1'])\n", "    newlist=[]\n", "\n", "    for val in output_dict.values():\n", "\n", "        #if value is a single integer\"\n", "        if isinstance(val, int):\n", "            list0=[val] * longlength\n", "            newlist.append(list0)\n", "\n", "        elif isinstance(val, str):\n", "            list1=[val] * longlength\n", "            newlist.append(list1)\n", "\n", "        elif isinstance(val, float):\n", "            list2=[val] * longlength\n", "            newlist.append(list2)\n", "        else:\n", "            arr=val\n", "            list3 = arr.tolist()\n", "            newlist.append(list3)\n", "            \n", "            \n", "    #Creae a new dict that has keys associated with a list of values, all of the same length.            \n", "    newdict={}\n", "    i=0\n", "    for key in output_dict.keys():\n", "        newdict[key]=newlist[i]\n", "        i=i+1\n", "\n", "\n", "    #Create a new dataframe and save as csv.\n", "    output_df=pd.DataFrame.from_dict(newdict)\n", "\n", "    #Merge this resulting data frame with extra variables from the input file that were not used in calculations\n", "    cols_to_use = df.columns.difference(output_df.columns)   #variales that are different from those in output file\n", "    input_subset=df[cols_to_use]\n", "    merged_df = pd.concat([output_df,input_subset] , axis=1)  #Merged dataframe\n", "\n", "        \n", "    # Organize data frame so that specific varibales are at the front\n", "    front_metadata=['project name','platform name','Cruise', 'Station','sample date','TIME','latitude','longitude','sample depth','Cast','Bottle']\n", "    \n", "    #Loop through the list of metadata variables that should be at the front\n", "    col_position=-1\n", "    for var in front_metadata: \n", "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n", "            stripped_string = re.sub(\"[^0-9a-zA-Z]+\", \" \",col)         # Strip the column headers of all non- laphanumeric characters\n", "            if var.lower() in stripped_string.lower():                 # Check for column name regardless of case\n", "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n", "                col_position=col_position+1                            # Find the next front position\n", "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n", "\n", "    front_data=['saturation_aragonite', 'saturation_aragonite_out','saturation_calcite','saturation_calcite_out','pCO2','fCO2','bicarbonate','pH_total']\n", "\n", "    #Loop through the list of calculated carbonate chemistry variables that should be at the front\n", "    for var in front_data: \n", "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n", "            if col==var:                 \n", "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n", "                col_position=col_position+1                            # Find the next front position\n", "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n", "    \n", "    \n", "    #Remove par1, par2, par1 and par2 types- added by pyco2, not needed by user \n", "    merged_df.pop('par1')\n", "    merged_df.pop('par2')\n", "    merged_df.pop('par1_type')\n", "    merged_df.pop('par2_type')\n", "\n", "    #Remove duplicate columns from final data frame\n", "    duplicateColumnNames = list()\n", "\n", "    for x in range(merged_df.shape[1]):                   # Iterate over all the columns in dataframe\n", "        col_name1= merged_df.columns[x]                   # Select column at xth index.\n", "\n", "        for y in range(x + 1, merged_df.shape[1]):        # Iterate over all the columns in DataFrame from (x+1)th index till end\n", "            col_name2= merged_df.columns[y]\n", "            \n", "            if col_name1.lower()==col_name2.lower():      # Check if column names are the same regardless of case\n", "                duplicateColumnNames.append(col_name1)\n", "                continue\n", "                \n", "            if '.1' in col_name1:                         # Check if there is a duplicate (same case), pandas will save this with a .1 at the end of the duplicated variable\n", "                col_name1_stripped=col_name1.strip('.1')  # Remove .1 and check again for equality \n", "                if col_name1_stripped.lower()==col_name2.lower():\n", "                    duplicateColumnNames.append(col_name1)\n", "                    continue\n", "                \n", "            if '.1' in col_name2:\n", "                col_name2_stripped=col_name2.strip('.1')\n", "                if col_name1.lower()==col_name2_stripped.lower():\n", "                     duplicateColumnNames.append(col_name2)\n", "                        \n", "    merged_df = merged_df.drop(columns=duplicateColumnNames)  #Drop all duplicates\n", "    merged_df=merged_df.dropna(axis=1,how='all')              #Drop all empty columns\n", "\n", "    # OUTPUT FILE----------------------------------------------------------------------\n", "    if os.path.isfile(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))==True:  \n", "        os.remove(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))\n", "    merged_df.to_csv(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"), index=False)   \n", "    # OUTPUT FILE----------------------------------------------------------------------\n", "\n", "    \n", "    printmd('**<br />PCO2sys ran successfully! Output file is saved as merged_btl_nutrient_pyco2sys.csv**')\n", "    printmd(\"<br />**Retrieving ice concentration now...**\")\n", " \n", "    # add the \"continue button\" on the GUI ######################################\n", "    gridwindow={}\n", "    vbox_widgets = []\n", "    gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "\n", "    continue_button2=widgets.Button(\n", "        description='Continue',\n", "        disabled=False,\n", "        button_style='', \n", "        tooltip='Click me',\n", "        icon=''\n", "        )\n", "    gridwindow['to_sic'] =  widgets.HBox(children=[continue_button2])\n", "    vbox_widgets.append(gridwindow['to_sic'])\n", "    gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n", "    display(gridwindow['grid'])\n", "    continue_button2.on_click(on_button_sic)\n", "    display(outsic)\n", "    ##################################################\n", "\n", "    "]}, {"cell_type": "code", "execution_count": 8, "id": "ee45f378-f39f-4d89-86e1-5aa09db5cfec", "metadata": {}, "outputs": [], "source": ["L = mlp.Lock()\n", "class addSeaIceConcentration:\n", "    def __init__(self, inputfile):\n", "        self.inputfile = inputfile\n", "\n", "    def createShapefile(self, df, output_shp, time_header, prglabel):\n", "        ''' This function creates a shapefile from the geographical locations inside the input CSV file.\n", "            The coordinate system used for the output shapetile is the World Geodetic System (WGS) 1984.\n", "            Both Latitude and Longitude are in decimal degrees'''\n", "        prglabel.value = 'Creating the final shapefile...'\n", "        shpfile = os.path.join(os.path.dirname(self.inputfile), output_shp, output_shp + '.shp')\n", "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_shp)):\n", "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_shp))\n", "        driver = ogr.GetDriverByName(\"ESRI Shapefile\")\n", "        if os.path.exists(shpfile):\n", "            driver.DeleteDataSource(shpfile)\n", "        ds = driver.CreateDataSource(shpfile)\n", "        spref = osr.SpatialReference()\n", "        spref.ImportFromEPSG(4326)\n", "        layer = ds.CreateLayer('StationsLocations', spref, ogr.wkbPoint)\n", "        # create field to the layer\n", "        for c in df.columns:\n", "            u = list(df[c])\n", "            vint = [i for i in u if isinstance(i, int)]\n", "            vfloat = [i for i in u if isinstance(i, float)]\n", "            if c.__contains__(time_header):\n", "                layer.CreateField(ogr.FieldDefn(time_header, ogr.OFTDateTime))\n", "            elif c.__contains__('CIS_dates'):\n", "                layer.CreateField(ogr.FieldDefn('CIS_dates', ogr.OFTDateTime))\n", "            elif c.__contains__('Time'):\n", "                fieldname = ogr.FieldDefn('Time UTC', ogr.OFTString)\n", "                fieldname.SetWidth(20)\n", "                layer.CreateField(fieldname)\n", "            elif len(u) == len(vint):\n", "                df[c] = df[c].astype(float)\n", "                ##############################################\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTInteger)\n", "                fieldname.SetPrecision(0)\n", "                layer.CreateField(fieldname)\n", "            elif len(u) == len(vfloat):\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTReal)\n", "                fieldname.SetPrecision(6)\n", "                layer.CreateField(fieldname)\n", "            else:\n", "                df[c].astype(str)\n", "                fieldname = ogr.FieldDefn(c, ogr.OFTString)\n", "                fieldname.SetWidth(30)\n", "                layer.CreateField(fieldname)\n", "\n", "        c_fid = 0\n", "        ld = layer.GetLayerDefn()\n", "        lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "        lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "        for i in df.index:\n", "            # create new point object\n", "            point = ogr.Geometry(ogr.wkbPoint)\n", "            point.AddPoint(float(df[lon].values[i]), float(df[lat].values[i]))\n", "            # create new feature\n", "            featureDfn = layer.GetLayerDefn()\n", "            feature = ogr.Feature(featureDfn)\n", "            feature.SetGeometry(point)\n", "            for k in range(0, df.columns.__len__()):\n", "                fieldName = ld.GetFieldDefn(k).GetName()\n", "                feature.SetField(fieldName, df[df.columns[k]].values[i])\n", "            c_fid += 1\n", "            # add the new feature to the new layer\n", "            layer.CreateFeature(feature)\n", "        del layer, ds, df\n", "        return 0\n", "\n", "    def extractFromUniBremenAMSR2(self, prm):\n", "        ''' Sea Ice Concentration (SIC) from Bremen University are downloaded. The SIC located at each geographical\n", "        coordinate indicated in the input csv file are extracted and then added as an extra-column to the csv file. '''\n", "        L.acquire()\n", "        ddate = pd.to_datetime(prm[0])\n", "        dlat = prm[1]\n", "        dlon = prm[2]\n", "        q = prm[3]\n", "        m = datetime.strftime(ddate, '%b').lower()  # extract the month in lowercase charachter\n", "        y = datetime.strftime(ddate, '%Y')  # extract the year\n", "        with tempfile.TemporaryDirectory() as tmpDir:\n", "            url = r'https://seaice.uni-bremen.de/data/amsr2/asi_daygrid_swath/n6250/{0}/{1}/Arctic/asi-AMSR2-n6250-{2}-v5.4.tif'.format(\n", "                y, m, datetime.strftime(ddate, '%Y%m%d'))\n", "            if not os.path.exists(os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                               'asi-AMSR2_{0}.tif'.format(\n", "                                                   datetime.strftime(ddate, '%Y%m%d')))):\n", "                urllib.request.urlretrieve(url, os.path.join(tmpDir, 'asi-AMSR2.tif'))\n", "                if (q == 'Y'):\n", "                    shutil.copy2(os.path.join(tmpDir, 'asi-AMSR2.tif'),\n", "                                 os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                              'asi-AMSR2_{0}.tif'.format(\n", "                                                  datetime.strftime(ddate, '%Y%m%d'))))\n", "                    src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                                'asi-AMSR2_{0}.tif'.format(\n", "                                                    datetime.strftime(ddate, '%Y%m%d')))\n", "                else:\n", "                    src_filename = os.path.join(tmpDir, 'asi-AMSR2.tif')\n", "            else:\n", "                src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n", "                                            'asi-AMSR2_{0}.tif'.format(\n", "                                                datetime.strftime(ddate, '%Y%m%d')))\n", "            try:\n", "                outval = self.pointExtract(src_filename, dlat, dlon)\n", "                prm[4].put(prm[5])\n", "            except:\n", "                outval = np.nan\n", "                prm[4].put(prm[5])\n", "                pass\n", "        L.release()\n", "        return ([float(outval), datetime.strftime(ddate.to_pydatetime(), '%Y-%m-%d')])\n", "\n", "    def extractFromCSI(self, prm):\n", "        '''For each acquisition date in the csv file, differences are calculated between each one of them and each one included\n", "        in the CIS tar files. Then the tar file corresponding the minimum difference is choosen for the extraction\n", "        of the Sea Ice Concentration. '''\n", "        L.acquire()  # this is needed to lock each process to let them running separately without writing in a same variable at the same time\n", "        ddate = pd.to_datetime(prm[0])  # Acquisition date from the csv file.\n", "        dlat = prm[1]\n", "        dlon = prm[2]\n", "        fcis = prm[3]\n", "        CISRaster = prm[4]\n", "\n", "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcis]\n", "        wq = np.array(CIS_acquisition_times)\n", "        CIS_acquisition_times = list(np.unique(wq))\n", "        csv_acquisition_time = datetime.strftime(ddate, '%Y%m%dT%H%M%S')\n", "        sample_date = datetime.strptime(csv_acquisition_time, '%Y%m%dT%H%M%S').date()\n", "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n", "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n", "        outraster = os.path.join(CISRaster, [i for i in fcis if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0][:-4] + '.tif')\n", "        intval = self.pointExtract(outraster, dlat, dlon)\n", "        outval = intval[0][0]\n", "        prm[5].put(prm[6])\n", "        L.release()  # this releases the locked process\n", "        return ([float(outval), datetime.strftime(closest_date, '%Y-%m-%d')])\n", "#         return ([float(outval), datetime.strftime(CIS_acquisition_times[s[0][0]], '%Y-%m-%d')])\n", "\n", "    def selectCISFiles(self, prm):\n", "        '''This function select the filenames from the CIS acquired at the nearest time as each one of the within the\n", "        input csv file. The output list of files will be used to download them. '''\n", "        L.acquire()\n", "        ddate = pd.to_datetime(prm[0])\n", "        fcislist = prm[3]\n", "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcislist]\n", "        wq = np.array(CIS_acquisition_times)\n", "        CIS_acquisition_times = list(np.unique(wq))\n", "        u = datetime.strftime(ddate, '%Y%m%dT%H%M%S') # sample date\n", "        sample_date = datetime.strptime(u, '%Y%m%dT%H%M%S').date()\n", "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n", "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n", "        prm[5].put(prm[6])\n", "        L.release()\n", "        return [i for i in fcislist if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0]\n", "\n", "    def binaryretrieve(self, j):\n", "        '''The connection to the FTP server of the Canadian Ice Service is done in this function.\n", "        All the spatial coordinate are assumed to be within the region Eastern_Arctic and in the same year 2016\n", "        as the case of the GreenEdge data. '''\n", "        L.acquire()\n", "        i = j[0]\n", "        shp_for_UC3 = j[1]\n", "        hostname = 'sidads.colorado.edu'\n", "        ftp = FTP(hostname)\n", "        ftp.login(user='anonymous', passwd='')\n", "        ## This should be changed to be dynamically change depending on the region (here: Eastern_Arctic) and the year.\n", "        # The User Guide from the Canadian Ice Service (https://nsidc.org/data/G02171/versions/1?qt-data_set_tabs=3#qt-data_set_tabs) discribe all the possible region names\n", "        ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/2016/') \n", "        if not os.path.exists(os.path.join(shp_for_UC3, i)):\n", "            with open(os.path.join(shp_for_UC3, i), 'wb') as localfile:\n", "                ftp.retrbinary('RETR ' + i, localfile.write, 1024)\n", "        ftp.quit()\n", "        j[2].put(j[0])\n", "        L.release()\n", "        return 0\n", "\n", "    def fetchTarFromCIS(self, tarfile, shp_for_UC3, CISRaster, prglabel, pr, pStatus):\n", "        ''' This function manage the file retrieval from the CIS and then call to another function to do the vector\n", "        shapefiles into rasters. '''\n", "\n", "        if not os.path.exists(shp_for_UC3):\n", "            os.makedirs(shp_for_UC3)\n", "        '''For now, we assume all the data in the csv file were acquired in the same year (2016) and from\n", "        the same region (Eastern_Arctic) as the case of the GreenEdge data.'''\n", "        pool = mlp.Pool(processes=mlp.cpu_count()-2)\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        tarfilelist = [[i, shp_for_UC3, queue] for i in tarfile]\n", "        s = pool.map_async(self.binaryretrieve, tarfilelist)\n", "        ##\n", "        while True:\n", "            if s.ready():\n", "                break\n", "            else:\n", "                c1 = int(queue.qsize() * 100 / len(tarfilelist))\n", "                pr.value=c1\n", "                prglabel.value = 'Fetching files from the CIS server...'\n", "                pStatus.value = f'{pr.value}%'\n", "        ##\n", "        del pool, s, queue, m\n", "        # Start a new progressbar for the shapefile converstion\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        pr.value=0\n", "        prglabel.value = 'Converting shapefiles into raster files...'\n", "        pStatus.value = f'{pr.value}%'\n", "        ###################################################################\n", "        for f in tarfile:\n", "            '''Here the *.shp file have already been extracted from the *.tar file, \n", "            so we only need to point to it as shp_filename'''\n", "            shutil.unpack_archive(os.path.join(shp_for_UC3, f), shp_for_UC3, f[-3:])\n", "            shp_filename = os.path.join(shp_for_UC3, f[:-4] + '.shp')\n", "            outraster = os.path.join(CISRaster, f[:-4] + '.tif')\n", "            if not os.path.exists(outraster):\n", "                self.makeRasterFromSHP(shp_filename, outraster, 100)\n", "            queue.put(1)\n", "            c1 = int(queue.qsize() * 100 / len(tarfilelist))\n", "            pr.value=c1\n", "            pStatus.value = f'{pr.value}%'\n", "        del m, queue\n", "        return 0\n", "\n", "\n", "    def makeRasterFromSHP(self, shp_filename, outraster, pxlsize):\n", "        ''' This function handle the transformation of the vector shapefile format into rasters format.\n", "        The ogr python binding package is used to read the vector shepefile before their transfmation into raster.\n", "         Here only the Field CT holding the Sea Ice Concentration data are rasterized.\n", "         If needed, other Field amoung all of those included in the shapefile can be added as an additional band\n", "         into the output raster. '''\n", "        shpfile = ogr.Open(shp_filename)\n", "        layer = shpfile.GetLayer()\n", "        xmin, xmax, ymin, ymax = layer.GetExtent()\n", "        cols = int((xmax - xmin) / pxlsize)\n", "        rows = int((ymax - ymin) / pxlsize)\n", "        rdrive = gdal.GetDriverByName('GTiff')\n", "        ds = rdrive.Create(outraster, cols, rows, 1, gdal.GDT_Byte)\n", "        ds.SetGeoTransform([xmin, pxlsize, 0, ymax, 0, -pxlsize])\n", "        gdal.RasterizeLayer(ds, [1], layer, options=['ATTRIBUTE=CT'])\n", "        ds.SetProjection(layer.GetSpatialRef().ExportToPrettyWkt())\n", "        ds.GetRasterBand(1).SetNoDataValue(0)\n", "        del ds, rdrive, shpfile, layer\n", "        return 0\n", "\n", "    def pointExtract(self, src_filename, dlat, dlon):\n", "        ''' The extraction process is handled inside this function.\n", "        The GDAL python binding package is used here to read the raster files needed for the extraction.\n", "        The pyproj package is used to bring the geographical coordinates from the input csv to match the spatial\n", "         coordinate system of the rasters in order to make the extraction of the right collocated pixel with the csv data. '''\n", "        src_ds = gdal.Open(src_filename)\n", "        gt = src_ds.GetGeoTransform()\n", "        band = src_ds.GetRasterBand(1)\n", "        proj = osr.SpatialReference(wkt=src_ds.GetProjection())\n", "        #####\n", "        wgs84 = pyproj.CRS(\"EPSG:4326\")\n", "        rstProj = pyproj.CRS(proj.ExportToProj4())\n", "        #####\n", "        point = ogr.Geometry(ogr.wkbPoint)\n", "        point.AddPoint(float(dlat), float(dlon))  # to make sure the corrdinates are not in string format\n", "        mx, my = pyproj.Transformer.from_proj(wgs84, rstProj).transform(point.GetX(), point.GetY())\n", "        px = int((mx - gt[0]) / gt[1])  # x pixel\n", "        py = int((my - gt[3]) / gt[5])  # y pixel\n", "        intval = band.ReadAsArray(px, py, 1, 1)\n", "        del band, src_ds, point, proj, gt, mx, my, px, py\n", "        return intval\n", "\n", "    def getCISTarFileList(self, dlist, prglabel, pr, pStatus):\n", "        ''' This function extract the file list needed for the extraction. It uses the acquisition year\n", "        in the input csv file to locate the same year used in the FTP data endpoint in order to select the\n", "        files to be downloaded. '''\n", "        dl = pd.to_datetime(dlist)\n", "        csv_year = [datetime.strftime(s, '%Y') for s in dl]\n", "        csv_year = np.unique(csv_year)\n", "        fcis_gen = []\n", "        fcis = []\n", "        m = Manager()\n", "        queue = m.Queue()\n", "        for y in csv_year:\n", "            hostname = 'sidads.colorado.edu'\n", "            ftp = FTP(hostname)\n", "            ftp.login(user='anonymous', passwd='')\n", "            ''' There is more regions to be considered. Here the region Easter_Arctic is directly selected. '''\n", "            # TODO: Find a way to make automatic selection of the region of interest regarding the spatial extent of coordinate in the csv file used\n", "            ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/{0}/'.format(y))\n", "            files = ftp.nlst()  # This extract all the files within the folder named with the YEAR in.\n", "            ftp.quit()\n", "            fcis_gen.append([i for i in files if i.__contains__('cis')])\n", "        for i in fcis_gen:\n", "            fcis = fcis + i\n", "            queue.put(i)\n", "            p = int(queue.qsize()*100/len(fcis_gen))\n", "            pr.value=p\n", "            prglabel.value = 'Building file list...'\n", "            pStatus.value = f'{pr.value}%'\n", "        del fcis_gen, m, queue\n", "        return fcis\n", "\n", "    def ExtractionPixelValues(self, param_to_extract, q, rstsource, prglabel, pr, pStatus):\n", "        ''' * inputfile: Comma Separated Value (CSV) file with a header containning Date, Latitude, and Longitude.\n", "            * param_to_extract: name of the parameter to be extracted from the raster. This will become the name of\n", "            the new column that will be added to the initial csv file. Here it is about Sea_Ice_Concentration.\n", "            The format of the Date in the csv file should be mm/dd/yyyy.\n", "            The Latitude and Longitude Should be in full decimal format and their values are in the range [-180, 180].\n", "\n", "            * The Extraction Process is don using parallel computing to accelerate the process. Parallel processing is\n", "            very usefull here as the Extraction of the data coresponding to each rows are independent from each other.\n", "        '''\n", "\n", "        outfile = os.path.join(os.path.split(self.inputfile)[0],\n", "                               os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}.csv'.format(param_to_extract,\n", "                                                                                             rstsource))\n", "        if os.path.exists(outfile):\n", "            os.remove(outfile)\n", "        df = pd.read_csv(self.inputfile, header=0, sep=',', parse_dates=True, skiprows=[1])\n", "        time_header=[i for i in df.columns if\n", "                     ((i.__contains__('Date')) or (i.__contains__('date')) or\n", "                      (i.__contains__('Time')) or (i.__contains__('time')))].pop()\n", "        if rstsource == 'BU':\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], q, queue, i] for i in df.index]\n", "            if q == 'Y':\n", "                if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters')):\n", "                    os.makedirs(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'))\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            s = pool.map_async(self.extractFromUniBremenAMSR2, p)\n", "            ##\n", "            while True:\n", "                if s.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1 \n", "                    pStatus.value = f'{pr.value}%'\n", "            print(queue.qsize())\n", "            ##\n", "            a = np.array(s.get())\n", "            del s\n", "            b = np.transpose(a)\n", "            df[param_to_extract] = b[0]\n", "            df[time_header] = b[1]\n", "            del pool, m, queue\n", "        else:\n", "            CISRaster = os.path.join(os.path.dirname(self.inputfile), 'CISraster')\n", "            if not os.path.exists(CISRaster):\n", "                os.makedirs(CISRaster)\n", "            ''' Retrieval from the CIS server of filename list corresponding to each dataset in the csv file'''\n", "            fcis = self.getCISTarFileList(df[time_header], prglabel, pr, pStatus)\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            pr.value=0\n", "            ''' Selection of each shapefile with a closest acquisition time to each dataset of the csv file. '''\n", "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n", "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            imglist = pool.map_async(self.selectCISFiles, p)\n", "            prglabel.value = 'Selecting CIS file...'\n", "            while True:\n", "                if imglist.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1\n", "                    pStatus.value = f'{pr.value}%'\n", "            imgarray0 = np.array(imglist.get())\n", "            imgarray1 = np.unique(imgarray0)\n", "            imglist0 = list(imgarray1)\n", "            CIS_shp = os.path.join(os.path.dirname(self.inputfile), 'CIS_shp')\n", "            if not os.path.exists(CIS_shp):\n", "                os.makedirs(CIS_shp)\n", "            del imgarray0, imgarray1, pool, m, queue\n", "            '''Fetching the files from the remote server'''\n", "            self.fetchTarFromCIS(imglist0, CIS_shp, CISRaster, prglabel, pr, pStatus)\n", "            pool = mlp.Pool(mlp.cpu_count() - 2)\n", "            m = Manager()\n", "            queue = m.Queue()\n", "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n", "            s = pool.map_async(self.extractFromCSI, p)\n", "            ##\n", "            while True:\n", "                if s.ready():\n", "                    break\n", "                else:\n", "                    c1=int(queue.qsize()*100/len(p))\n", "                    pr.value=c1\n", "                    prglabel.value = 'Extracting SIC from newly created rasters...'\n", "                    pStatus.value = f'{pr.value}%'\n", "            del imglist0\n", "            ##\n", "            a = np.array(s.get())\n", "            del pool, m, queue\n", "            b = np.transpose(a)\n", "            df[param_to_extract] = b[0]\n", "            df['CIS_dates'] = b[1]\n", "            del s\n", "            if q == 'N':\n", "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CISraster'))\n", "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CIS_shp'))\n", "        df.to_csv(outfile,\n", "                  sep=',', index=False, header=1)\n", "        output_shp = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_SHP'.format(param_to_extract, rstsource) # Name of the output shapefile\n", "        self.createShapefile(df, output_shp, time_header, prglabel)\n", "        prglabel.value = 'Processing Finished!!'\n", "        pStatus.value = f'{pr.value}%'\n", "        output_gpkg = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_gpkg'.format(param_to_extract, rstsource) # Name of the output Geopackage (gpkg) file\n", "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_gpkg)):\n", "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_gpkg))\n", "        gpkgfile = os.path.join(os.path.dirname(self.inputfile), output_gpkg, output_gpkg + '.gpkg')\n", "        os.system(f'ogr2ogr -a_srs EPSG:4326 -oo X_POSSIBLE_NAMES=Lon* -oo Y_POSSIBLE_NAMES=Lat*  -f \"GPKG\" {gpkgfile} {outfile}') # Creates the gpkg file from the shapefile. This can be created directly from the csv.\n", "        return output_shp\n", "\n", "    def getSeaIceSource(self, workDir, r, gLocalCopy, prglabel, pr, pStatus):\n", "        ''' The choice between the Sea Ice Data from the Canadian Sea Ice Service and Bremen University is handled\n", "         in this function. When the choice is done, another function is called to take care of the process of\n", "         extracting Sea Ice Concentration from the chosen data source. '''\n", "        if not os.path.exists(self.inputfile):\n", "            print(\"Input file not exists !\")\n", "            exit(-1)\n", "        if r == 1:\n", "            r = 'BU'\n", "        elif r == 2:\n", "            r = 'CIS'\n", "        else:\n", "            exit(-1)\n", "        if gLocalCopy == 'y':\n", "            gLocalCopy = 'Y'\n", "        elif gLocalCopy == 'n':\n", "            gLocalCopy = 'N'\n", "        t = datetime.now()\n", "        output_shp = self.ExtractionPixelValues('sea_ice_co', gLocalCopy, r, prglabel, pr, pStatus)\n", "        print(datetime.now() - t)\n", "        return output_shp\n", "\n", "class ccadi_uc3_mapping():\n", "    def __init__(self):\n", "        ## initiate the grid to display the contents of the page ###\n", "        self.gridwindow={}\n", "        self.vbox_widgets = []\n", "        self.gridwindow['grid'] = widgets.GridspecLayout(1,1)\n", "                \n", "        #####\n", "\n", "         # read text\n", "        f=open(\"md_texts/SeaIceConcentration.md\",\"r\")\n", "        fc=f.read()\n", "        f.close()\n", "        text_html1 = markdown.markdown(fc)\n", "        del fc\n", "        self.gridwindow['InfoSIC'] = widgets.HTML(text_html1)\n", "        self.vbox_widgets.append(self.gridwindow['InfoSIC'])\n", "        # Fields\n", "        self.wdField = widgets.Text(\n", "            value=os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"),\n", "            layout=widgets.Layout(width='max-content')\n", "        )\n", "\n", "        self.chkb1 = widgets.Checkbox(\n", "            value=False,\n", "            description='Canadian Ice Service',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.chkb2 = widgets.Checkbox(\n", "            value=False,\n", "            description='Bremen University',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.chkb3 = widgets.Checkbox(\n", "            value=False,\n", "            description='Keep a local copy of the raster images',\n", "            disabled=False,\n", "            indent=False\n", "        )\n", "        self.gridwindow['checkbox'] = widgets.VBox(children=[self.chkb1, self.chkb2, self.chkb3])\n", "        self.vbox_widgets.append(self.gridwindow['checkbox'])\n", "        self.prg = widgets.IntProgress(\n", "            value=0,\n", "            min=0,\n", "            max=100,\n", "            bar_style='success',\n", "            style={'bar_color': 'green'},\n", "            orientation='horizontal',\n", "            layout=widgets.Layout(width='800px')\n", "        )\n", "        self.status = widgets.Label(value=f'{self.prg.value}%', layout=widgets.Layout(width='max-content'))\n", "        self.prg_label = widgets.Label('', layout=widgets.Layout(width='max-content'))\n", "        self.vbox_widgets.append(self.prg_label)\n", "        self.gridwindow['progressbar'] = widgets.HBox(children=[self.prg, self.status])\n", "        self.vbox_widgets.append(self.gridwindow['progressbar'])\n", "        self.okButton = widgets.Button(description=\"OK\")\n", "        \n", "        self.okButton.on_click(self.clickOkbutton)\n", "        #####\n", "        \n", "        self.UC3_mapping()\n", "\n", "\n", "    def check_checkBox(self):\n", "        c = 0\n", "        value = ''\n", "        q = 'n'\n", "        if self.chkb1.value==True:\n", "            value = self.chkb1.description\n", "            c = 2\n", "        if self.chkb2.value==True:\n", "            value = self.chkb2.description\n", "            c = 1\n", "        if (self.chkb3.value==True):\n", "            q = 'y'\n", "        return c, q\n", "\n", "\n", "    def clickOkbutton(self, b):\n", "        self.prg.value=0\n", "        self.prg_label.value = 'Processing...'\n", "        workDir = os.path.dirname(self.wdField.value)\n", "        if not os.path.exists(workDir):\n", "            os.makedirs(workDir)\n", "        inputfile = self.wdField.value\n", "        r, q = self.check_checkBox()\n", "        output_shp = addSeaIceConcentration(inputfile).getSeaIceSource(workDir, r, q, self.prg_label, self.prg, self.status)\n", "        \n", "\n", "    def UC3_mapping(self): \n", "        def on_button_clicked(b):\n", "            showmap()\n", "\n", "        out=widgets.Output()\n", "        @out.capture()\n", "        def showmap():\n", "\n", "            workDir=os.path.join(\"2016_int_btl_csv\")\n", "            shp=os.path.join(workDir, \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP\", \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP.shp\")\n", "            data_full=gpd.read_file(shp)\n", "                        \n", "            # Create a Geo-id which is needed by the Folium (it needs to have a unique identifier for each row)\n", "            data_full['geoid'] = data_full.index.astype(str)\n", "\n", "#             dataf_0m=data.loc[np.round(data[\"sample_dep\"].values)==1]\n", "            dataf_10m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==10]\n", "            dataf_20m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==20]\n", "            dataf_30m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==30]\n", "            dataf_40m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==40]\n", "            dataf_50m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==50]\n", "            dataf_60m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==60]\n", "            dataf_70m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==70]\n", "            dataf_80m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==80]\n", "            dataf_90m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==90]\n", "            dataf_100m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==100]\n", "\n", "            ###\n", "\n", "            lonCent = (data_full.bounds.maxx + data_full.bounds.minx).mean()/2\n", "            latCent = (data_full.bounds.maxy + data_full.bounds.miny).mean()/2\n", "            # creating a map object\n", "            m = leafmap.folium.Map(location=(latCent,lonCent), projections=\"epsg3575\", zoom_start=6)\n", "            #rst = os.path.join(\"2016_int_btl_csv\",\"CISraster\",\"cis_SGRDREA_20160606T1800Z_pl_a.tif\")\n", "\n", "            ###\n", "            # Create the variable plot upon click on the stations on the map\n", "            def chart_func(df, st): #new function\n", "                chart_temp = alt.Chart(df).mark_line(color='red').transform_fold(\n", "                    fold=['CTDTmp90', 'sample_dep'], \n", "                    as_=['variable', 'value']).encode(\n", "                        x=alt.X('CTDTmp90:Q', \n", "                                axis=alt.Axis(title='Temperature (\u00b0C)', \n", "                                              titleColor='red'), \n", "                                scale=alt.Scale(domain=[df['CTDTmp90'].min(), \n", "                                                        df['CTDTmp90'].max()])),\n", "                        y=alt.Y('sample_dep:Q',\n", "                                axis=alt.Axis(title='Depth (m)'), \n", "                                scale=alt.Scale(reverse=True, \n", "                                                domain=[0, df['sample_dep'].max()])),\n", "                        color=alt.value('red')\n", "                )\n", "                chart_sal=alt.Chart(df).mark_line(color='green').transform_fold(\n", "                    fold=['P_sal_CTD', 'sample_dep'], \n", "                    as_=['variable', 'value']).encode(\n", "                        x=alt.X('P_sal_CTD:Q', \n", "                                axis=alt.Axis(title='Salinity', \n", "                                              titleColor='green'), \n", "                                scale=alt.Scale(domain=[df['P_sal_CTD'].min(), \n", "                                                        df['P_sal_CTD'].max()])),\n", "                        y=alt.Y('sample_dep:Q', \n", "                                axis=alt.Axis(title='Depth (m)'), \n", "                                scale=alt.Scale(reverse=True, \n", "                                                domain=[0, df['sample_dep'].max()])),\n", "                        color=alt.value('green')\n", "                )\n", "                ufchart=alt.layer(chart_temp, chart_sal, \n", "                                  title=f\"Vertical profil of Salinity and Temperature at Station: {st}\", \n", "                                  width=400, height=400).resolve_scale(x='independent').configure_axisTop(titleColor='green').configure_axisBottom(titleColor='red').resolve_legend(color='independent')            \n", "                return ufchart.to_json()\n", "            \n", "            # extract unique coordinates\n", "            data_full=data_full.round({'latitude':3, 'longitude':3})\n", "            df=data_full[['latitude', 'longitude']].drop_duplicates() # drop all duplicated coordinates and keep the row indexes\n", "            u=[]\n", "            for i in df.index:  # use the indexes (kept in the precedent lines) to build a new dataframe from df\n", "                u.append(data_full.values[i])\n", "            dg=pd.DataFrame(u, columns=data_full.columns)\n", "            data_coord=dg[['station', 'latitude', 'longitude']]\n", "            del dg\n", "            full_profile = leafmap.folium.FeatureGroup(name=\"Full profiles\")\n", "            for i, st in zip(df.index, data_coord['station'].values[:]):\n", "                ds0=data_full[\n", "                    [\n", "                        'sample_dep',\n", "                        'P_sal_CTD', \n", "                        'station', \n", "                        'CTDTmp90', \n", "                        'latitude', \n", "                        'longitude']\n", "                ].loc[\n", "                    data_full[\"station\"].values==st\n", "                ]\n", "                ds2=ds0.dropna().round({\n", "                    \"CTDTmp90\":2, \n", "                    \"P_sal_CTD\":2, \n", "                    'latitude': 3, \n", "                    'longitude':3})\n", "                chart=chart_func(ds0, st)\n", "                pp=leafmap.folium.Popup(max_width=600).add_child(leafmap.folium.VegaLite(chart, width=600))\n", "                full_profile.add_child(leafmap.folium.CircleMarker(\n", "                    location=[data_full['latitude'].values[i], data_full['longitude'].values[i]], radius=6,\n", "                    popup=pp,\n", "                ))\n", "            full_profile.add_to(m)\n", "            \n", "            # Select only needed columns           \n", "            data_10m = dataf_10m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 10m\n", "            leafmap.folium.features.GeoJson(dataf_10m,\n", "                                            name='Data at 10m depth',\n", "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "\n", "            # Select only needed columns\n", "            data_20m = dataf_20m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 20m\n", "            leafmap.folium.features.GeoJson(dataf_20m,\n", "                                            name='Data at 20m depth',\n", "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "            \n", "            # Select only needed columns\n", "            data_30m = dataf_30m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 30m\n", "            leafmap.folium.features.GeoJson(dataf_30m,\n", "                                            name='Data at 30m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "            # Select only needed columns\n", "            data_40m = dataf_40m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n", "            \n", "            # Add data near the sea surface: 40m\n", "            leafmap.folium.features.GeoJson(dataf_40m,\n", "                                            name='Data at 40m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "#           # Select only needed columns\n", "            data_50m = dataf_50m[['geoid', 'P_sal_CTD', 'station', \n", "                                  'sample_dep', 'CTDTmp90', 'geometry', \n", "                                  'latitude', 'longitude']]\n", "    \n", "            # Add data near the sea surface: 50m\n", "            leafmap.folium.features.GeoJson(dataf_50m,\n", "                                            name='Data at 50m depth',\n", "                                            style_function=lambda x: {\n", "                                                'color':'transparent',\n", "                                                'fillColor':'transparent',\n", "                                                'weight':0\n", "                                            },\n", "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n", "                                                fields=[\n", "                                                    'P_sal_CTD', \n", "                                                    'station', \n", "                                                    'sample_dep', \n", "                                                    'CTDTmp90'],\n", "                                                aliases = [\n", "                                                    'Practical salinity from CTD', \n", "                                                    'Station name', \n", "                                                    'sample depth (m)',\n", "                                                    'Temperature from CTD (\u00b0C)'\n", "                                                ],\n", "                                                sticky=False)\n", "                                           ).add_to(m)\n", "\n", "#             ######################################################################################################################\n", "\n", "            leafmap.folium.LayerControl().add_to(m)\n", "            display(m)\n", "        \n", "        self.showmap_button=widgets.Button(\n", "                description='Show Map',\n", "                disabled=False,\n", "                button_style='', \n", "                tooltip='Click me',\n", "                icon=''\n", "                )\n", "        self.gridwindow['ok_and_continue'] = widgets.HBox(children=[self.okButton, self.showmap_button])\n", "        self.vbox_widgets.append(self.gridwindow['ok_and_continue'])\n", "        \n", "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  #\n", "        \n", "        self.accordion0 = widgets.Accordion(\n", "            children=[widgets.HBox(children = [self.gridwindow['grid'][0, 0]])]\n", "        )\n", "        self.accordion0.set_title(0, 'Adding Sea Ice Concentrations into the combined BTL_Nutrient file.')\n", "        display(self.accordion0)\n", "\n", "        self.showmap_button.on_click(on_button_clicked)\n", "        display(out)\n", "    \n", "\n", "\n", "    "]}, {"cell_type": "code", "execution_count": 9, "id": "3c73708d-556c-49d8-add3-61098022648b", "metadata": {}, "outputs": [{"data": {"text/markdown": ["<h1><b>Merging bottle file with the nutrient file</b></h1>"], "text/plain": ["<IPython.core.display.Markdown object>"]}, "metadata": {}, "output_type": "display_data"}, {"ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'md_texts/nutrient_btl_infos.md'", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)", "Cell \u001b[0;32mIn [9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmerging_gui_jupiter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn [2], line 96\u001b[0m, in \u001b[0;36mmerging_gui_jupiter.__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     94\u001b[0m printmd(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m<h1><b>Merging bottle file with the nutrient file</b></h1>\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     95\u001b[0m \u001b[38;5;66;03m# read text\u001b[39;00m\n\u001b[0;32m---> 96\u001b[0m f\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmd_texts/nutrient_btl_infos.md\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     97\u001b[0m fc\u001b[38;5;241m=\u001b[39mf\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m     98\u001b[0m f\u001b[38;5;241m.\u001b[39mclose()\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'md_texts/nutrient_btl_infos.md'"]}], "source": ["merging_gui_jupiter()"]}], "metadata": {"kernelspec": {"display_name": "Python [conda env:edc-default-2022.10-14]", "language": "python", "name": "conda-env-edc-default-2022.10-14-py"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "properties": {"id": "c250de70-680e-43e1-a62f-07f420e9a180", "license": null, "name": "UC3 Ocean Acidification Notebook", "requirements": [], "tags": ["Jupyter", "Polar"], "tosAgree": true, "type": "Jupyter Notebook", "version": "0.0.1", "description": "Ocean acidification in Baffin Bay", "authors": [{"id": "39a21097-0c47-4efa-afb8-21161b20e5ff", "name": "yanique.campbell@umanitoba.ca"}]}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f838aabf",
+   "metadata": {
+    "tags": [
+     "papermill-error-cell-tag"
+    ]
+   },
+   "source": [
+    "<span style=\"color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;\">An Exception was encountered at '<a href=\"#papermill-error-cell\">In [2]</a>'.</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a386f33c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2022-10-25T15:13:46.725702Z",
+     "iopub.status.busy": "2022-10-25T15:13:46.724959Z",
+     "iopub.status.idle": "2022-10-25T15:13:46.798800Z",
+     "shell.execute_reply": "2022-10-25T15:13:46.798045Z"
+    },
+    "papermill": {
+     "duration": 0.085581,
+     "end_time": "2022-10-25T15:13:46.802172",
+     "exception": false,
+     "start_time": "2022-10-25T15:13:46.716591",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<script type=\"text/javascript\">\n",
+       "        function toggle(id) {\n",
+       "            el = document.getElementById(id);\n",
+       "            el.style.display = el.style.display === \"none\" ? \"block\" : \"none\";\n",
+       "        }\n",
+       "    </script>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "This notebook is compatible with this base image version (user-2022.10-14)."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from eoxhub import check_compatibility\n",
+    "check_compatibility(\"user-2022.10-14\", dependencies=[])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "203291f0",
+   "metadata": {
+    "tags": [
+     "papermill-error-cell-tag"
+    ]
+   },
+   "source": [
+    "<span id=\"papermill-error-cell\" style=\"color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;\">Execution using papermill encountered an exception here and stopped:</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d3e94585-e44b-443f-bbd5-07dba42214e8",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2022-10-25T15:13:46.821047Z",
+     "iopub.status.busy": "2022-10-25T15:13:46.820451Z",
+     "iopub.status.idle": "2022-10-25T15:13:47.850221Z",
+     "shell.execute_reply": "2022-10-25T15:13:47.849006Z"
+    },
+    "papermill": {
+     "duration": 1.040875,
+     "end_time": "2022-10-25T15:13:47.852075",
+     "exception": true,
+     "start_time": "2022-10-25T15:13:46.811200",
+     "status": "failed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'erddapy'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [2], line 6\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mipywidgets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m widgets, IntSlider, jslink, interact, interactive, fixed, interact_manual\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmarkdown\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merddapy\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ERDDAP\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mipyleaflet\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Map, Marker, GeoData, ImageOverlay, basemaps, basemap_to_tiles, LayersControl, ScaleControl, FullScreenControl, WidgetControl\n\u001b[1;32m      8\u001b[0m \u001b[38;5;66;03m# import pandas as pd\u001b[39;00m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'erddapy'"
+     ]
+    }
+   ],
+   "source": [
+    "import urllib.request as request\n",
+    "import h5py\n",
+    "import os\n",
+    "from ipywidgets import widgets, IntSlider, jslink, interact, interactive, fixed, interact_manual\n",
+    "import markdown\n",
+    "from erddapy import ERDDAP\n",
+    "from ipyleaflet import Map, Marker, GeoData, ImageOverlay, basemaps, basemap_to_tiles, LayersControl, ScaleControl, FullScreenControl, WidgetControl\n",
+    "# import pandas as pd\n",
+    "import numpy as np\n",
+    "# from IPython.display import display\n",
+    "from netCDF4 import num2date\n",
+    "# from datetime import datetime\n",
+    "import geopandas as gpd\n",
+    "########################################################\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "import ipywidgets as widgets\n",
+    "from IPython.display import Markdown, HTML, Javascript, display, Image\n",
+    "import subprocess\n",
+    "import csv\n",
+    "from __future__ import print_function\n",
+    "# from ipywidgets import interact, interactive, fixed, interact_manual\n",
+    "import csv\n",
+    "import re\n",
+    "import warnings\n",
+    "#from init import *\n",
+    "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)\n",
+    "def printmd(string):\n",
+    "    display(Markdown(string))\n",
+    "###############################################################\n",
+    "from ftplib import FTP\n",
+    "import multiprocessing as mlp\n",
+    "import shutil\n",
+    "import tempfile\n",
+    "import urllib.request\n",
+    "from datetime import datetime, timedelta\n",
+    "\n",
+    "from osgeo import gdal\n",
+    "# import numpy as np\n",
+    "from osgeo import ogr\n",
+    "from osgeo import osr\n",
+    "# import pandas as pd\n",
+    "import pyproj\n",
+    "\n",
+    "from multiprocessing import Manager\n",
+    "# from ipywidgets import widgets, IntSlider, jslink\n",
+    "from ipyleaflet import Map, projections, GeoData, basemap_to_tiles, basemaps, WidgetControl, ScaleControl, FullScreenControl, LayersControl #, ImageOverlay, \n",
+    "import geopandas as gpd\n",
+    "import leafmap\n",
+    "import altair as alt\n",
+    "# import localtileserver  # was needed localy to be able to add raster on the leafmap Map."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee1fb199-9539-48f5-a84c-4f393d963bdc",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class merge_btl_nutrient:\n",
+    "    \n",
+    "    def get_btlfile(self, btl_url, wdir):\n",
+    "        request.urlretrieve(btl_url, os.path.join(wdir, \"btl.h5\"))\n",
+    "        return 0\n",
+    "\n",
+    "    def btl_to_dataframe(self, wdir):\n",
+    "        f = h5py.File(os.path.join(wdir, \"btl.h5\"), 'r')\n",
+    "        df_btl = pd.DataFrame()\n",
+    "        for grp in f:\n",
+    "            tempo_list = []\n",
+    "            tempo_columns = []\n",
+    "            for c in f[grp]:\n",
+    "                tempo_columns.append(c)\n",
+    "                tempo_list.append(f[grp][c])\n",
+    "            list_array = np.transpose(np.array(tempo_list))\n",
+    "            tempo_df = pd.DataFrame(list_array, columns=tempo_columns)\n",
+    "            tempo_df['station'] = [f[grp].attrs['Station'].strip().split(' ')[-1]] * len(tempo_df)\n",
+    "            tempo_df['cast'] = [int(f[grp].attrs['Cast_Number'].strip())] * len(tempo_df)\n",
+    "            df_btl = pd.DataFrame.append(tempo_df, df_btl)\n",
+    "        f.close()\n",
+    "#         # added to extract a csv format of the btl.h5 data to send out to the GUI team\n",
+    "#         df_btl.to_csv(os.path.join(wdir, \"btl.csv\"), header=1, index=0)\n",
+    "        #######\n",
+    "        return df_btl\n",
+    "\n",
+    "    def merge(self, df_nutrient, df_btl, file_ge_btl):\n",
+    "        ge_time_header = [i for i in list(df_nutrient.columns) if\n",
+    "                          ((i.lower().__contains__('date')) or \n",
+    "                           (i.lower().__contains__('time')))].pop()\n",
+    "        btl_time_header = [i for i in list(df_btl.columns) if\n",
+    "                           ((i.lower().__contains__('date')) or \n",
+    "                           (i.lower().__contains__('time')))].pop()\n",
+    "        ge_station_header = [i for i in list(df_nutrient.columns) if\n",
+    "                             i.lower().__contains__('station')].pop()\n",
+    "        btl_station_header = [i for i in list(df_btl.columns) if\n",
+    "                              i.lower().__contains__('station')].pop()\n",
+    "        ge_bottle_header = [i for i in list(df_nutrient.columns) if\n",
+    "                            (i.lower().__contains__('bottle'))].pop()\n",
+    "        \"\"\" TODO: bopo should be replaced with RosPos when the data with the corrected variable name will be served on Hyrax.\"\"\"\n",
+    "        btl_bottle_header = [i for i in list(df_btl.columns) if\n",
+    "                             i.lower().__contains__('bopo')].pop() \n",
+    "        ge_cast_header = [i for i in list(df_nutrient.columns) if\n",
+    "                          i.lower().__contains__('cast')].pop()\n",
+    "        btl_cast_header = [i for i in list(df_btl.columns) if\n",
+    "                           i.lower().__contains__('cast')].pop()\n",
+    "        ge_jointField = [ge_time_header, ge_station_header, ge_bottle_header, ge_cast_header]\n",
+    "        btl_jointField = [btl_time_header, btl_station_header, btl_bottle_header, btl_cast_header]\n",
+    "        \n",
+    "        #####################################\n",
+    "        df_nutrient_header = [h.split(' ')[0] for h in df_nutrient.columns]\n",
+    "        df_nutrient.columns = df_nutrient_header\n",
+    "        dfnutrient_to_merge = df_nutrient\n",
+    "        dfbtl_to_merge = df_btl\n",
+    "        dfbtl_to_merge = dfbtl_to_merge.drop(btl_time_header, axis=1)\n",
+    "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_time_header, axis=1)\n",
+    "        dfnutrient_to_merge = dfnutrient_to_merge.drop(ge_station_header, axis=1)\n",
+    "        dfnutrient_to_merge[ge_time_header] = pd.to_datetime(df_nutrient[ge_time_header]).dt.strftime('%Y-%m-%d')\n",
+    "        # dfnutrient_to_merge[ge_time_header] = df_nutrient[ge_time_header].dt.strftime('%Y-%m-%d')\n",
+    "        u = []\n",
+    "        for i in df_nutrient[ge_station_header].values:\n",
+    "            if i.isdigit():\n",
+    "                u.append('G' + i)\n",
+    "            else:\n",
+    "                u.append(i)\n",
+    "        dfnutrient_to_merge[ge_station_header] = u\n",
+    "        hdf_time_units = \"days since 1970-01-01 00:00:00\"\n",
+    "        list_tmp = []\n",
+    "        import cftime\n",
+    "        for i in range(len(df_btl[btl_time_header])):\n",
+    "            u=num2date(df_btl[btl_time_header].values[i], hdf_time_units)\n",
+    "            u=cftime.DatetimeGregorian.strftime(u, '%Y-%m-%d')\n",
+    "            list_tmp.append(u)\n",
+    "        dfbtl_to_merge[btl_time_header] = list_tmp\n",
+    "        '''https://www.datasciencemadesimple.com/join-merge-data-frames-pandas-python/'''\n",
+    "        df = pd.merge(dfnutrient_to_merge, dfbtl_to_merge, how=\"inner\", left_on=ge_jointField, right_on=btl_jointField)\n",
+    "        df.to_csv(file_ge_btl, header=1, index=0)\n",
+    "        del df\n",
+    "        return 0\n",
+    "\n",
+    "\n",
+    "class merging_gui_jupiter():\n",
+    "    # This is the class where the GUI is made\n",
+    "    def __init__(self):\n",
+    "        self.gridwindow={} # making an empty grid window\n",
+    "        self.vbox_widgets = [] # making an empty vertical box\n",
+    "        self.gridwindow['grid'] = widgets.GridspecLayout(1, 1)\n",
+    "        #####\n",
+    "        self.getBTLbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n",
+    "        self.getNutrientbutton = widgets.Button(description=\"retrieve\", layout=widgets.Layout(width='max-content'), button_style='info')\n",
+    "        self.Continuebutton = widgets.Button(description=\"Continue\", layout=widgets.Layout(width='max-content'))\n",
+    "        # BTL file retrieval\n",
+    "        layout = widgets.Layout(height='auto', width='125px')\n",
+    "        printmd('<h1><b>Merging bottle file with the nutrient file</b></h1>')\n",
+    "        # read text\n",
+    "        f=open(\"md_texts/nutrient_btl_infos.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        text_html1 = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        self.gridwindow['text1'] = widgets.HTML(text_html1)\n",
+    "#         # Reading the images of the CTD-Rosette ##########\n",
+    "        ctd_img = open(\"images/ctd-rosette.jpg\", \"rb\")\n",
+    "        ship_img = open(\"images/CCGSAmundsen.png\", 'rb')\n",
+    "        ctd = ctd_img.read()\n",
+    "        ship = ship_img.read()\n",
+    "        gridimage1 = widgets.Image(value=ship, format='jpg', width=300)\n",
+    "        Figure1 = widgets.Label(r'\\(\\textbf{Figure 1:}\\)'+' Canadian Coast Guard Ship ' + r'\\(\\textit{Amundsen}\\)', layout=widgets.Layout(height='auto', width='auto'))\n",
+    "        gridimage2 = widgets.Image(value=ctd, format='png', width=300)\n",
+    "        Figure2 = widgets.Label(r'\\(\\textbf{Figure 2:}\\)'+' CTD-Rosette', layout=widgets.Layout(height='auto', width='auto'))\n",
+    "        ship_img.close()\n",
+    "        ctd_img.close()\n",
+    "        image_vbox1 = widgets.VBox(children=[gridimage1, Figure1])\n",
+    "        image_vbox2 = widgets.VBox(children=[gridimage2, Figure2])\n",
+    "        self.gridwindow['image'] = widgets.HBox(children=[image_vbox1, image_vbox2])\n",
+    "# #         image_vbox = [[gridimage1, gridimage2], ['CCGS Amundsen', 'CTD-Rosette']]\n",
+    "#         gridwindow['image'] = widgets.HBox(children=[gridimage2, gridimage1])\n",
+    "        ###################################################\n",
+    "        self.vbox_widgets.append(self.gridwindow['text1'])\n",
+    "        self.vbox_widgets.append(self.gridwindow['image'])\n",
+    "        \n",
+    "                # read text\n",
+    "        f=open(\"md_texts/data_retrieval.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        data_retrieval = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        self.gridwindow['data_retrieval'] = widgets.HTML(data_retrieval)\n",
+    "        self.vbox_widgets.append(self.gridwindow['data_retrieval'])\n",
+    "        ## Bottle file retrieval ######\n",
+    "        label = widgets.Label('Bottle files', layout=layout)\n",
+    "        self.BottleData = widgets.Text(\n",
+    "            value=\"http://jorvik.uwaterloo.ca:8080/opendap/data/CCADI/Amundsen_BTL_GreenEdge2016_LEG1.h5\",\n",
+    "            layout=widgets.Layout(width='50%')\n",
+    "        )\n",
+    "        self.gridwindow['bottle'] = widgets.HBox(children=[label, self.BottleData, self.getBTLbutton])\n",
+    "        self.vbox_widgets.append(self.gridwindow['bottle'])\n",
+    "        label = widgets.Label('Nutrient file', layout=layout)\n",
+    "        self.nutrientServer = widgets.Text(\n",
+    "            value=\"https://CanWINerddap.ad.umanitoba.ca/erddap\",\n",
+    "            layout=widgets.Layout(width='50%')\n",
+    "        )\n",
+    "        self.gridwindow['nutrientserver'] = widgets.HBox(children=[label, self.nutrientServer, self.getNutrientbutton])\n",
+    "        self.vbox_widgets.append(self.gridwindow['nutrientserver'])\n",
+    "        \n",
+    "        self.list0 = widgets.SelectMultiple(\n",
+    "            options=[\"Empty\"],\n",
+    "            value=[\"Empty\"],\n",
+    "            disabled=False\n",
+    "        )\n",
+    "\n",
+    "        self.list1 = widgets.SelectMultiple(\n",
+    "            options=[\"Empty\"],\n",
+    "            value=[\"Empty\"],\n",
+    "            disabled=False\n",
+    "        )\n",
+    "        \n",
+    "        self.depthRange = widgets.FloatRangeSlider(\n",
+    "                value=[0, 0],\n",
+    "                min=0,\n",
+    "                max=5000,\n",
+    "                step=0.1,\n",
+    "                disabled=False,\n",
+    "                continuous_update=False,\n",
+    "                orientation='horizontal',\n",
+    "                readout=True,\n",
+    "                readout_format='.1f',\n",
+    "        )\n",
+    "        \n",
+    "        self.outputdir = \"2016_int_btl_csv\"\n",
+    "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  # pass all the content of the vertical box into the left side of the grid\n",
+    "        \n",
+    "        self.getBTLbutton.on_click(self.getBTLdata)\n",
+    "        self.getNutrientbutton.on_click(self.getNutrientdata)\n",
+    "        display(self.gridwindow['grid'])\n",
+    "        self.Merge_Button=widgets.Button(\n",
+    "            description='Merge',\n",
+    "            disabled=False,\n",
+    "            button_style='', \n",
+    "            tooltip='Click me',\n",
+    "            icon=''\n",
+    "            )\n",
+    "        self.merge_btl_nutrient()\n",
+    "        \n",
+    "        \n",
+    "        ###########\n",
+    "             \n",
+    "    def continue_to_pyco2sys(self): \n",
+    "        def on_button_pyco2sys(b):\n",
+    "            continueprocess()\n",
+    "\n",
+    "        outmerge=widgets.Output()\n",
+    "        @outmerge.capture()\n",
+    "        def continueprocess():\n",
+    "            checkInputfile()\n",
+    "#             self.continue_to_sic()\n",
+    "        \n",
+    "        gridwindow={}\n",
+    "        vbox_widgets = []\n",
+    "        gridwindow['grid'] = widgets.GridspecLayout(1,1)\n",
+    "        layout = widgets.Layout(height='auto', width='125px')\n",
+    "        f=open(\"md_texts/variable_Join_list.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        text_var_sel = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        gridwindow['variable_selection'] = widgets.HTML(text_var_sel)\n",
+    "        vbox_widgets.append(gridwindow['variable_selection'])\n",
+    "        label = widgets.Label('Bottle_variables', layout=widgets.Layout(width='50%'))\n",
+    "        label = widgets.Label('Nutrient variables', layout=widgets.Layout(width='50%'))\n",
+    "        gridwindow['bottle variable list'] = widgets.HBox(children=[label, self.list0])\n",
+    "\n",
+    "        gridwindow['nutrient variable list'] = widgets.HBox(children=[label, self.list1])\n",
+    "        gridwindow['var_list'] = widgets.HBox(children=[gridwindow['bottle variable list'], gridwindow['nutrient variable list']])\n",
+    "        vbox_widgets.append(gridwindow['var_list'])\n",
+    "        label = widgets.Label('Sample depth:', layout=layout)\n",
+    "        # read text\n",
+    "        f=open(\"md_texts/variable_meaning.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        text_var = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        gridwindow['variable_meaning'] = widgets.HTML(text_var)\n",
+    "        # read text\n",
+    "        f=open(\"md_texts/sample_depth_range.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        text_var_sel = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        gridwindow['sample_depth_range'] = widgets.HTML(text_var_sel)\n",
+    "        vbox_widgets.append(gridwindow['sample_depth_range'])\n",
+    "        gridwindow['Sample depth'] = widgets.HBox(children=[label, self.depthRange])\n",
+    "        vbox_widgets.append(gridwindow['Sample depth'])\n",
+    "\n",
+    "        self.btl = pd.DataFrame()\n",
+    "        self.nutrient=pd.DataFrame()\n",
+    "        \n",
+    "        continue_button1=widgets.Button(\n",
+    "            description='Continue',\n",
+    "            disabled=False,\n",
+    "            button_style='', \n",
+    "            tooltip='Click me',\n",
+    "            icon=''\n",
+    "            )\n",
+    "        gridwindow['merge'] = widgets.HBox(children=[self.Merge_Button])\n",
+    "        gridwindow['to_pyco2sys'] = widgets.HBox(children=[gridwindow['merge'], continue_button1])\n",
+    "        vbox_widgets.append(gridwindow['to_pyco2sys'])\n",
+    "        gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n",
+    "        self.Merge_Button.on_click(self.clickMerge)\n",
+    "        continue_button1.on_click(on_button_pyco2sys)\n",
+    "        display(gridwindow['grid'])\n",
+    "        display(outmerge)\n",
+    "        return 0       \n",
+    "\n",
+    "\n",
+    "        \n",
+    "    def merge_btl_nutrient(self): \n",
+    "        def on_button_continuemerge(b):\n",
+    "            continuemerge()\n",
+    "\n",
+    "        out=widgets.Output()\n",
+    "        @out.capture()\n",
+    "        def continuemerge():           \n",
+    "            ###### transit to the PyCO2SYS #####\n",
+    "            self.continue_to_pyco2sys()\n",
+    "            \n",
+    "\n",
+    "        self.continue_button=widgets.Button(\n",
+    "            description='continue',\n",
+    "            disabled=False,\n",
+    "            button_style='', \n",
+    "            tooltip='Click me',\n",
+    "            icon=''\n",
+    "            )\n",
+    "        self.gridwindow['continue'] = widgets.HBox(children=[self.continue_button])\n",
+    "        self.vbox_widgets.append(self.gridwindow['continue'])\n",
+    "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)\n",
+    "        self.continue_button.on_click(on_button_continuemerge)\n",
+    "        display(out)\n",
+    "\n",
+    "    def getBTLdata(self,a):\n",
+    "        if not os.path.exists(self.outputdir):\n",
+    "            os.makedirs(self.outputdir)\n",
+    "        merge_btl_nutrient().get_btlfile(self.BottleData.value, self.outputdir)\n",
+    "        df_btl=merge_btl_nutrient().btl_to_dataframe(self.outputdir)\n",
+    "        self.list0.options=tuple(df_btl.columns)\n",
+    "        self.list0.value=[\"BOPO\",\"CTDTmp90\",\"Cast_Number\",\"P_sal_CTD\",\"Pres_Z\",\"depth\",\"latitude\",\"longitude\",\"time\",\"station\"]\n",
+    "        self.list0.rows = 24\n",
+    "        self.depthRange.min = df_btl['depth'].min()\n",
+    "        self.depthRange.max = df_btl['depth'].max()\n",
+    "        self.depthRange.value = [self.depthRange.min, self.depthRange.max]\n",
+    "        self.getBTLbutton.description=\"Success!\"\n",
+    "        self.getBTLbutton.button_style='success'\n",
+    "        del df_btl\n",
+    "        return 0\n",
+    "    \n",
+    "    def getNutrientdata(self, a):\n",
+    "        if not os.path.exists(self.outputdir):\n",
+    "            os.makedirs(self.outputdir)\n",
+    "        e_DataSearch = ERDDAP(server=self.nutrientServer.value)\n",
+    "        result_search = e_DataSearch.get_search_url(search_for=\"greenedge\", response=\"csv\")\n",
+    "        self.datasetID = [k \n",
+    "                          for k in pd.read_csv(result_search)[\"Dataset ID\"] \n",
+    "                          if k.lower().__contains__(\"greenedge_nutrient\")].pop()\n",
+    "        #self.datasetID = pd.read_csv(result_search)[\"Dataset ID\"][0]\n",
+    "        #print(result_search)\n",
+    "\n",
+    "        e_datafetch = ERDDAP(server=self.nutrientServer.value, protocol=\"tabledap\", response=\"csv\")\n",
+    "        e_datafetch.dataset_id = self.datasetID\n",
+    "\n",
+    "        df_nutrient = e_datafetch.to_pandas(parse_dates=True)\n",
+    "        file_ge = os.path.join(self.outputdir, f'{self.datasetID}.csv')  ## Nutrient file name \n",
+    "        df_nutrient.to_csv(file_ge, index=False, header=True)\n",
+    "        self.list1.options=df_nutrient.columns\n",
+    "        \"\"\" Adjustment done in order to look easily for the variables needed in the data field\"\"\"\n",
+    "        station = [k for k in df_nutrient.columns if k.lower().__contains__(\"station\")].pop()\n",
+    "        sample_date = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_date\")].pop()\n",
+    "        #sample_date = sample_date.split(\" \")[0]\n",
+    "        sample_depth = [k for k in df_nutrient.columns if k.lower().__contains__(\"sample_depth\")].pop()\n",
+    "        #sample_depth = sample_depth.split(\" \")[0]\n",
+    "        cast = [k for k in df_nutrient.columns if k.lower().__contains__(\"cast\")].pop()\n",
+    "        bottle = [k for k in df_nutrient.columns if k.lower().__contains__(\"bottle\")].pop()\n",
+    "        dic_um = [k for k in df_nutrient.columns if k.lower().__contains__(\"dic_um\")].pop()\n",
+    "        #dic_um = dic_um.split(\" \")[0]\n",
+    "        totalk = [k for k in df_nutrient.columns if k.lower().__contains__(\"totalk_l_um\")].pop()\n",
+    "        #totalk = totalk.split(\" \")[0]\n",
+    "        self.list1.value=[station, sample_date, sample_depth, cast, bottle,dic_um, totalk]\n",
+    "        self.list1.rows = 24\n",
+    "        self.getNutrientbutton.description=\"Success!\"\n",
+    "        self.getNutrientbutton.button_style='success'\n",
+    "        del df_nutrient\n",
+    "        return 0\n",
+    "\n",
+    "        \n",
+    "    def clickMerge(self, a):\n",
+    "        file_ge_btl = os.path.join(self.outputdir, 'merged_btl_nutrient.csv') ## Merged file name to be fed to the PyCO2SYS\n",
+    "        objectsForMerging = merge_btl_nutrient()\n",
+    "        if os.path.exists(file_ge_btl):\n",
+    "            os.remove(file_ge_btl)\n",
+    "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n",
+    "            df_btl.reset_index(drop=True, inplace=True)\n",
+    "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n",
+    "            df_btl = df_btl.loc[cond]\n",
+    "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n",
+    "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n",
+    "            del df_nutrient, df_btl\n",
+    "        else:\n",
+    "            df_btl = objectsForMerging.btl_to_dataframe(self.outputdir)\n",
+    "            df_btl.reset_index(drop=True, inplace=True)\n",
+    "            cond = (df_btl[\"depth\"].values[:]>=self.depthRange.value[0]) & (df_btl[\"depth\"].values[:]<=self.depthRange.value[1])\n",
+    "            df_btl = df_btl.loc[cond]\n",
+    "            df_nutrient = pd.read_csv(os.path.join(self.outputdir, f'{self.datasetID}.csv'), header=0)            \n",
+    "            objectsForMerging.merge(df_nutrient=df_nutrient[list(self.list1.value)], df_btl=df_btl[list(self.list0.value)], file_ge_btl=file_ge_btl)\n",
+    "            del df_nutrient, df_btl\n",
+    "        self.Merge_Button.description=\"Done\"\n",
+    "        self.Merge_Button.button_style=\"success\"\n",
+    "        return 0\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fde05a6e-c6d1-451e-96e2-055ef4fade3d",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "#Define the required parameters\n",
+    "\n",
+    "def checkInputfile():\n",
+    "    #Create an empty list for all the required parameters in the file. \n",
+    "    req_param_inFile=[]\n",
+    "    opt_param_inFile=[]\n",
+    "    \n",
+    "    pd.set_option('display.max_columns', 50)\n",
+    "    \n",
+    "    #-------------------INPUT FILE-------------------------------------------\n",
+    "    input_file =os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient.csv\")                \n",
+    "    df = pd.read_csv(input_file)\n",
+    "    df=df.reset_index(drop=True)\n",
+    "    #------------------------------------------------------------------------\n",
+    "    \n",
+    "    #Checking for the stadardized names in the input file to automatically pull out all the required, optional and mandatory parameters present\n",
+    "    \n",
+    "    # 1. KEY PARAMETERS \n",
+    "    # Parameters- Total Alkalinity, DIC, PH, PCO2, fCO2, CO232, biCO2\n",
+    "    \n",
+    "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2'] #BODC standardized names\n",
+    "    fullNames=['Total alkalinity (umolkg1)','Dissolved inorganic carbon (umolkg1)','Partial pressure of carbon dioxide (pCO2) (uatm)',\\\n",
+    "               'Fugacity of carbon dioxide (fCo2) (uatm)','Carbonate ion concentration (CO32) (umolkg1)','Bicarbonate ion (umol kg1)']   # Full name that will show up in widget\n",
+    "    \n",
+    "    \n",
+    "    for name, fname in zip(standardizedNames, fullNames): \n",
+    "        if name in df.columns:\n",
+    "            req_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n",
+    "        \n",
+    "    \n",
+    "    # 2. OPTIONAL PARAMETERS \n",
+    "    # Parameters- SiOx, PO4, Ammonia, Sulfide\n",
+    "    \n",
+    "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide'] #BODC standardized names\n",
+    "    fullNames=['Total Silicate (umolkg1)','Total Phosphate (SRP) (umolkg1)','Total Ammonia (umolkg1)','Total Sulfide (umolkg1)']   # Full name that will show up in widget\n",
+    "    \n",
+    "    for name, fname in zip(standardizedNames, fullNames): \n",
+    "        if name in df.columns:\n",
+    "            opt_param_inFile.append(fname) #Append the names of all the key parameters in the input file\n",
+    "    \n",
+    "    \n",
+    "    getUserParameters(df, req_param_inFile, opt_param_inFile) \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85597719-a3eb-4f3f-928a-7eea387ba0c9",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def getUserParameters(df,req_param_inFile,opt_param_inFile):\n",
+    "\n",
+    "    #The user will be presented with the parameters automatically pulled out from the input file. They will have a chance to make changes to the selections. \n",
+    "    #Only exception is if there are only two req parameters in input file, they will not be able to make any changes/selctions in this case.\n",
+    "    \n",
+    "    #Key Parameters Widget\n",
+    "    req_param_user=widgets.SelectMultiple(\n",
+    "        options=req_param_inFile,\n",
+    "        #value=req_param_inFile,\n",
+    "        #description='Key Parameters:',\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    req_param_user.layout.margin='0.5% 0% 5% 0%'\n",
+    "    req_param_user.layout.width='20%'\n",
+    "    req_param_user.layout.height='70%'\n",
+    "\n",
+    "\n",
+    "    #Optional Parameters Widget\n",
+    "    opt_param_user=widgets.SelectMultiple(\n",
+    "        options=opt_param_inFile,\n",
+    "        value=opt_param_inFile,\n",
+    "        #description='Optional Parameters:',\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    opt_param_user.layout.margin='0.5% 0% 3% 0%'\n",
+    "    opt_param_user.layout.width='20%'\n",
+    "\n",
+    "    cont_button1=widgets.Button(\n",
+    "    description='Continue',\n",
+    "    disabled=False,\n",
+    "    button_style='', \n",
+    "    tooltip='Click me',\n",
+    "    icon=''\n",
+    "    )\n",
+    "\n",
+    "    \n",
+    "    # Onclick function for the first Continue button widget\n",
+    "    output = widgets.Output()\n",
+    "    @output.capture()\n",
+    "    def on_button_clicked(b):\n",
+    "        getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile)\n",
+    "    \n",
+    "\n",
+    "    # Key parameters, aka carbonate system parameters\n",
+    "    printmd('### <br><br/> Carbonate System Parameters ###') \n",
+    "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#carbonate-system-parameters).')\n",
+    "    #if there is only one or no key parameters in input file\n",
+    "    if len(req_param_inFile)<2:\n",
+    "        printmd(\"<br>**There are not enough key parameters for calculation of the full carbonate system. Please check input file and try again.**<br />\")\n",
+    "        sys.exit(-1)\n",
+    "    \n",
+    "    #If only two key parameters in the input file, automatically use those two\n",
+    "    if len(req_param_inFile)==2:\n",
+    "        printmd(\"<br>**The following key carbonate parameters were found in the input file and will be used in calculations.**<br />\")\n",
+    "\n",
+    "        for name in req_param_inFile:\n",
+    "            printmd('- {}'.format(name))\n",
+    " \n",
+    "    #If there are more than two key parameters in the input file, ask user to select any two\n",
+    "    if len(req_param_inFile)>2:\n",
+    "        printmd(\"<br>**The following key carbonate parameters were found in the input file. Choose any two parameters.**<br />\")\n",
+    "        display(req_param_user) #display widget\n",
+    "\n",
+    "\n",
+    "    # Optional parameters, aka Nutrients & solutes\n",
+    "    printmd('### <br><br/> Nutrients and other solutes ###') \n",
+    "    printmd('More information on these arguments an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#nutrients-and-other-solutes).')\n",
+    "    \n",
+    "    # If there is at least one opt parameter in file, display them and ask user to select any of them. All are automatically selected in the widget\n",
+    "    if len(opt_param_inFile)>0:\n",
+    "        printmd(\"<br>**The following nutrient parameters are in the input file. Choose any parameter(s).**</b>\")\n",
+    "        display(opt_param_user)   #display widget\n",
+    "    \n",
+    "    \n",
+    "    display(cont_button1) #display continue button\n",
+    "    cont_button1.on_click(on_button_clicked)  #Call onclick function\n",
+    "    display(output) #display widget ouput when button is clicked\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b305cb57-05ea-42b0-927b-ca9e7ea0547b",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def getConstants(df, req_param_user, opt_param_user, req_param_inFile, opt_param_inFile):    \n",
+    "    \n",
+    "    # Constants\n",
+    "    printmd('### <br><br/> Settings for constants ###') \n",
+    "    printmd('More information on these constants an be found [here](https://pyco2sys.readthedocs.io/en/latest/co2sys_nd/#settings). Default constants chosen based on [Jiang et al., 2022](https://www.frontiersin.org/articles/10.3389/fmars.2021.705638/full).')\n",
+    "    #Widgets for the different constants\n",
+    "    phstr = widgets.Output()\n",
+    "    @phstr.capture()\n",
+    "    def constStrings1():\n",
+    "        printmd(\"<br>**Choose the pH scale:**\")\n",
+    "        \n",
+    "    constStrings1()\n",
+    "\n",
+    "    option_list=['1. Total',\n",
+    "                 '2. Seawater',\n",
+    "                 '3. Free',\n",
+    "                 '4. NBS, i.e. relative to NBS/NIST reference standards']\n",
+    "    \n",
+    "    #PH Scale\n",
+    "    phscale=widgets.RadioButtons(\n",
+    "        options=option_list,  \n",
+    "        disabled=False,\n",
+    "        layout={'width': 'max-content'},\n",
+    "    )\n",
+    "    phscale.layout.margin='0.5% 1% 3% 0%'\n",
+    "    #phscale.layout.width='40%' \n",
+    "\n",
+    "\n",
+    "    # Carbonic Acid Dissociation\n",
+    "    k1k2str = widgets.Output()\n",
+    "    @k1k2str.capture()\n",
+    "    def constStrings2():\n",
+    "        printmd(\"**Choose the set of equilibrium constant parameterisations to model carbonic acid dissociation:**\")\n",
+    "    constStrings2()\n",
+    "\n",
+    "    option_list=['1. RRV93 (0 < T < 45 °C, 5 < S < 45, Total scale, artificial seawater).',\n",
+    "                '2. GP89 (−1 < T < 40 °C, 10 < S < 50, Seawater scale, artificial seawater).',\n",
+    "                '3. H73a and H73b refit by DM87 (2 < T < 35 °C, 20 < S < 40, Seawater scale, artificial seawater).',\n",
+    "                '4. MCHP73 refit by DM87 (2 < T < 35 °C, 20 < S < 40, Seawater scale, real seawater).',\n",
+    "                '5. H73a, H73b and MCHP73 refit by DM87(2 < T < 35 °C, 20 < S < 40, Seawater scale, real seawater)',\n",
+    "                '6. MCHP73 aka \"GEOSECS\" (2 < T < 35 °C, 19 < S < 43, NBS scale, real seawater).',\n",
+    "                '7. MCHP73 without certain species aka \"Peng\" (2 < T < 35 °C, 19 < S < 43, NBS scale, real seawater).',\n",
+    "                '8. M79 (0 < T < 50 °C, S = 0, freshwater only).',            \n",
+    "                '9. CW98 (2 < T < 30 °C, 0 < S < 40, NBS scale, real estuarine seawater).',\n",
+    "                '10. LDK00 (2 < T < 35 °C, 19 < S < 43, Total scale, real seawater).',\n",
+    "                '11. MM02 (0 < T < 45 °C, 5 < S < 42, Seawater scale, real seawater).',\n",
+    "                '12. MPL02 (−1.6 < T < 35 °C, 34 < S < 37, Seawater scale, field measurements).',\n",
+    "                '13. MGH06 (0 < T < 50 °C, 1 < S < 50, Seawater scale, real seawater).',\n",
+    "                '14. M10 (0 < T < 50 °C, 1 < S < 50, Seawater scale, real seawater).',\n",
+    "                '15. WMW14 (0 < T < 45 °C, 0 < S < 45, Seawater scale, real seawater).',\n",
+    "                '16. SLH20 (−1.67 < T < 31.80 °C, 30.73 < S < 37.57, Total scale, field measurements).',\n",
+    "                '17. SB21 (15 < T < 35 °C, 19.6 < S < 41, Total scale, real seawater).']\n",
+    "    \n",
+    "    k1k2=widgets.RadioButtons(\n",
+    "        options=option_list,    \n",
+    "        value='10. LDK00 (2 < T < 35 °C, 19 < S < 43, Total scale, real seawater).',\n",
+    "        #rows=len(option_list),\n",
+    "        layout={'width': 'max-content'},\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    k1k2.layout.margin='0.5% 1% 3% 0%'\n",
+    "    #k1k2.layout.width='50%'\n",
+    "    #k1k2.layout.height='100%'\n",
+    "\n",
+    "    # Bisulfate ion dissociation \n",
+    "    kso4str = widgets.Output()\n",
+    "    @kso4str.capture()\n",
+    "    def constStrings3():\n",
+    "        printmd(\"**Choose the equilibrium constant parameterisations to model bisulfate ion dissociation:**\")\n",
+    "    constStrings3()\n",
+    "\n",
+    "    option_list=['1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n",
+    "                '2. KRCB77: Khoo et al. (1977) Anal. Chem.',\n",
+    "                '3. WM13: Waters & Millero (2013) Mar. Chem./ WMW14: Waters et al. (2014) Mar. Chem.']\n",
+    "    \n",
+    "    kso4=widgets.RadioButtons(\n",
+    "        options=option_list,    \n",
+    "        value='1. D90a: Dickson (1990) J. Chem. Thermodyn.',\n",
+    "        layout={'width': 'max-content'},\n",
+    "        #description='Parameter:',\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    kso4.layout.margin='0.5% 1% 3% 0%'\n",
+    "    #kso4.layout.width='40%'\n",
+    "\n",
+    "    # Total borate \n",
+    "    bostr = widgets.Output()\n",
+    "    @bostr.capture()\n",
+    "    def constStrings4():\n",
+    "        printmd(\"**Choose which boron:salinity relationship to use to estimate total borate:**\")\n",
+    "    constStrings4()\n",
+    "\n",
+    "    option_list=['1. U74: Uppström (1974) DeepSea Res.',\n",
+    "                '2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta']\n",
+    "    \n",
+    "    bo=widgets.RadioButtons(\n",
+    "        options=option_list,    \n",
+    "        value='2. LKB10: Lee et al. (2010) Geochim. Cosmochim. Acta',\n",
+    "        layout={'width': 'max-content'},\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    bo.layout.margin='0.5% 1% 3% 0%'\n",
+    "    # bo.layout.width='40%'\n",
+    "    \n",
+    "    # hydrogen fluoride dissociation\n",
+    "    hfstr = widgets.Output()\n",
+    "    @hfstr.capture()\n",
+    "    def constStrings5():\n",
+    "        printmd(\"**Choose which which equilibrium constant parameterisation to use for hydrogen fluoride dissociation:**\")\n",
+    "    constStrings5()\n",
+    "\n",
+    "    option_list=['1. DR79: Dickson & Riley (1979) Mar. Chem.',\n",
+    "                '2. PF87: Perez & Fraga (1987) Mar. Chem.']\n",
+    "    \n",
+    "    hf=widgets.RadioButtons(\n",
+    "        options=option_list,    \n",
+    "        value='2. PF87: Perez & Fraga (1987) Mar. Chem.',\n",
+    "        layout={'width': 'max-content'},\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    hf.layout.margin='0.5% 1% 3% 0%'\n",
+    "    # hf.layout.width='40%' \n",
+    "\n",
+    "    # opt_gas_constant\n",
+    "    gcstr = widgets.Output()\n",
+    "    @gcstr.capture()\n",
+    "    def constStrings6():\n",
+    "        printmd(\"**Choose which value to use for the gas constant:**\")\n",
+    "    constStrings6()\n",
+    "    option_list=['1. DOEv2',\n",
+    "                '2. DOEv3',\n",
+    "                '3. 2018 CODATA']\n",
+    "    \n",
+    "    gc=widgets.RadioButtons(\n",
+    "        options=option_list,    \n",
+    "        value='3. 2018 CODATA',\n",
+    "        layout={'width': 'max-content'},\n",
+    "        disabled=False,\n",
+    "    )\n",
+    "    gc.layout.margin='0.5% 1% 3% 0%'\n",
+    "    # gc.layout.width='40%' \n",
+    "    \n",
+    "    #Continue button On-click function\n",
+    "    output = widgets.Output()\n",
+    "    @output.capture()\n",
+    "    def on_button_clicked(b):        \n",
+    "        runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc)\n",
+    "\n",
+    "    # Button widget\n",
+    "    button2=widgets.Button(\n",
+    "    description='Continue',\n",
+    "    disabled=False,\n",
+    "    button_style='', # 'success', 'info', 'warning', 'danger' or ''\n",
+    "    tooltip='Click me',\n",
+    "    icon=''\n",
+    "    )\n",
+    "\n",
+    "    box2 = widgets.VBox([phstr,phscale, k1k2str, k1k2, kso4str, kso4, bostr, bo, hfstr, hf, gcstr,gc, button2])\n",
+    "    display(box2)\n",
+    "\n",
+    "    button2.on_click(on_button_clicked)\n",
+    "    display(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5b6eb4f-eaac-4575-becd-701685897e6c",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def runPyco2sys(df,req_param_user, opt_param_user, req_param_inFile, opt_param_inFile, phscale, k1k2, kso4, bo, hf,gc):\n",
+    "\n",
+    "\n",
+    "    #Build the argument list for pyco2sys to ingest\n",
+    "    #The argument names are defined in the PyCO2 sys documentation.\n",
+    "    kwargs={}\n",
+    "   \n",
+    "\n",
+    "    #KEY PARAMETERS\n",
+    "    \n",
+    "    # pyco2sys labels the two key parameters chosen as par 1 and par 2. 'par1check' checks if par1 has yet been assigned, otherwise it sets a key parameter to par 2\n",
+    "    par1check=\"False\"\n",
+    "    par2check=\"False\"\n",
+    "\n",
+    "    \n",
+    "    #Check if the parameter was chosen by the user, or if it was automatically selected (only two parameters were in the file). In that case the variable would not be in 'req_param_user' from the widget.\n",
+    "    #If the varibale is to be used in the calculation, then get the data from the input file (saved as the data frame df)\n",
+    "    #The variable is then added to the arguments for pyco2sys\n",
+    "    \n",
+    "    substrings=['alkalinity','Dissolved inorganic carbon','pH','Partial pressure of carbon dioxide', 'Fugacity of carbon dioxide','Carbonate ion concentration', 'Bicarbonate Ion']\n",
+    "    standardizedNames=['TotAlk_l_um_l','DIC_um_l','pH','pCO2','fCO2','CO232','biCO2']\n",
+    "    types=[1,2,3,4,5,6,7]   #These are the different types according to pyco2sys documentation \n",
+    "    \n",
+    "    \n",
+    "    for name, substr, t in zip(standardizedNames, substrings, types):           # Loop through the substrings and the standardized names  \n",
+    "        if par1check==\"False\" or par2check==\"False\":                            #Check if either par1 or par2 is false. We need at least two key paramaters (par1 and par2 represent these two parameters)\n",
+    "            if any(substr in string for string in req_param_inFile):            #If it is in the input file\n",
+    "                if len(req_param_inFile)==2:                                    #If it is one of only two key variables in the input file (here it would not be in 'req_param_user.value' as it would be automatically selected- no widget used)\n",
+    "   \n",
+    "                    if par1check==\"True\":        \n",
+    "                        kwargs['par2']=df[name].to_numpy(dtype=float)            #Get the data using the standardized name from the data frame (input file)\n",
+    "                        kwargs['par2_type']=t\n",
+    "                        par2check=\"True\"\n",
+    "                    else:\n",
+    "                        kwargs['par1']=df[name].to_numpy(dtype=float)             \n",
+    "                        kwargs['par1_type']=t\n",
+    "                        par1check=\"True\"\n",
+    "\n",
+    "                elif len(req_param_inFile)>2:                                    # If there are more than two key parameters in the input file\n",
+    "                    if any(substr in string for string in req_param_user.value): #If there are more than two key variables in the file, then check if the user actually selected this varibale\n",
+    "                        if par1check==\"True\":        \n",
+    "                            kwargs['par2']=df[name].to_numpy(dtype=float)          \n",
+    "                            kwargs['par2_type']=t\n",
+    "                            par2check=\"True\"\n",
+    "                        else:\n",
+    "                            kwargs['par1']=df[name].to_numpy(dtype=float)          \n",
+    "                            kwargs['par1_type']=t\n",
+    "                            par1check=\"True\"\n",
+    "\n",
+    "\n",
+    "\n",
+    "                            \n",
+    "    # OPTIONAL PARAMETERS\n",
+    "    \n",
+    "    substrings_opt=['Silicate','Phosphate','Ammonia','Sulfide' ]\n",
+    "    standardizedNames=['SiOx_um_l','PO4_Filt_um_l','Ammonia','Sulfide']\n",
+    "    pyco2sysNames=['total_silicate','total_phosphate','total_phosphate','total_sulfide']  # Names that pyco2sys expects in the argument list\n",
+    "    \n",
+    "    \n",
+    "    if len(opt_param_inFile)>0:  # if there is at least one optional paramter in the input file.        \n",
+    "        for name, substr, pName in zip(standardizedNames, substrings_opt, pyco2sysNames):     \n",
+    "            if any(substr in string for string in opt_param_user.value):  # If the optional parameter was chosen by the user\n",
+    "                kwargs[pName]=df[name].to_numpy(dtype=float)              # Get the data using the standardized name\n",
+    "        \n",
+    "\n",
+    "        \n",
+    "    # MANDATORY  PARAMETERS\n",
+    "\n",
+    "    # Also check if the user wanted to use any other output temperature and pressure in the calculations.\n",
+    "    # If they did not, the temp_out and press_out would be 'nan'\n",
+    "    \n",
+    "    #Set the output temperature and pressure to nan\n",
+    "    Temperature_out=float('nan')\n",
+    "    Pressure_out=float('nan')\n",
+    "    \n",
+    "    Temperature=df['CTDTmp90'].to_numpy(dtype=float)\n",
+    "    kwargs['temperature']=Temperature\n",
+    "    #if np.isnan(Temperature_out)==False:    \n",
+    "    kwargs['temperature_out']=Temperature_out\n",
+    "\n",
+    "    Pressure=df['Pres_Z'].to_numpy(dtype=float)\n",
+    "    kwargs['pressure']=Pressure   \n",
+    "    #if np.isnan(Pressure_out)==False:    \n",
+    "    kwargs['temperature_out']=Pressure_out   \n",
+    "\n",
+    "    Salinity=df['P_sal_CTD'].to_numpy(dtype=float)\n",
+    "    kwargs['salinity']=Salinity   \n",
+    "\n",
+    "\n",
+    "\n",
+    "    #Get the values of the widget selections for the different CONSTANTS. All parameters are selected by default.\n",
+    "    \n",
+    "    k1k2Value=k1k2.value\n",
+    "    kso4Value=kso4.value\n",
+    "    boValue=bo.value\n",
+    "    hfValue=hf.value\n",
+    "    phscaleValue=phscale.value\n",
+    "    gcValue=gc.value\n",
+    "\n",
+    "    kso4Value_temp=kso4Value[:2]\n",
+    "    boValue_temp=boValue[:2]\n",
+    "    hfValue_temp=hfValue[:2]\n",
+    "    k1k2Value_temp=k1k2Value[:2]\n",
+    "    phscaleValue_temp=phscaleValue[:2]\n",
+    "    gcValue_temp=gcValue[:2]\n",
+    "        \n",
+    "    if '.' in kso4Value_temp:      \n",
+    "        kso4Value_temp=kso4Value_temp[:1]\n",
+    "    \n",
+    "    if '.' in boValue_temp:       \n",
+    "        boValue_temp=boValue_temp[:1]       \n",
+    "\n",
+    "    if '.' in hfValue_temp:       \n",
+    "        hfValue_temp=hfValue_temp[:1]       \n",
+    "        \n",
+    "    if '.' in k1k2Value_temp:       \n",
+    "        k1k2Value_temp=k1k2Value_temp[:1]\n",
+    "              \n",
+    "    if '.' in phscaleValue_temp:   \n",
+    "        phscaleValue_temp=phscaleValue_temp[:1]\n",
+    "        \n",
+    "    if '.' in gcValue_temp:\n",
+    "        gcValue_temp=gcValue_temp[:1]\n",
+    "    \n",
+    "    k1k2Value=int(k1k2Value_temp)\n",
+    "    kso4Value=int(kso4Value_temp)\n",
+    "    boValue=int(boValue_temp)\n",
+    "    hfValue=int(hfValue_temp)\n",
+    "    phscaleValue=int(phscaleValue_temp)\n",
+    "    gcValue=int(gcValue_temp)\n",
+    "        \n",
+    "    #Add them as arguments for pyco2sys\n",
+    "    kwargs['opt_k_carbonic']=k1k2Value\n",
+    "    kwargs['opt_k_bisulfate']=kso4Value\n",
+    "    kwargs['opt_total_borate']=boValue\n",
+    "    kwargs['opt_k_fluoride']=hfValue\n",
+    "    kwargs['opt_gas_constant']=gcValue\n",
+    "\n",
+    "\n",
+    "    # Import PyCO2SYS\n",
+    "    import PyCO2SYS as pyco2\n",
+    "\n",
+    "    # Run pyCO2SYS!\n",
+    "    output_dict = pyco2.sys(**kwargs)\n",
+    "\n",
+    "    \n",
+    "    Output(df,output_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8fafc9c0-74e1-4966-9e73-1a9f4b9b56c1",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def Output(df,output_dict):\n",
+    "    \n",
+    "#     ## Added to make the \"continue button\" to continue the process into the Sea Ice Concentration retrieval: \n",
+    "    def on_button_sic(b):\n",
+    "        continue_sic()\n",
+    "\n",
+    "    outsic=widgets.Output()\n",
+    "    @outsic.capture()\n",
+    "    def continue_sic():\n",
+    "        ccadi_uc3_mapping() # sea ice concentration processing\n",
+    "\n",
+    "    ###########################################\n",
+    "    \n",
+    "    \n",
+    "    #The dictionary output_dict is uneven. Some elements are just a single int, string or float. The length of 'par1' will always be the longest length (it holds the first of the two key parameters from the input file)\n",
+    "    #For the values in the dictionary that are not single values, they are arrays. \n",
+    "    #To be able to create even data frames, we need to seperate the values in the array and create a list.\n",
+    "    #Otherise, the array is saved as one value for each key, instad of a list of multiple values.\n",
+    "    \n",
+    "    \n",
+    "    longlength=len(output_dict['par1'])\n",
+    "    newlist=[]\n",
+    "\n",
+    "    for val in output_dict.values():\n",
+    "\n",
+    "        #if value is a single integer\"\n",
+    "        if isinstance(val, int):\n",
+    "            list0=[val] * longlength\n",
+    "            newlist.append(list0)\n",
+    "\n",
+    "        elif isinstance(val, str):\n",
+    "            list1=[val] * longlength\n",
+    "            newlist.append(list1)\n",
+    "\n",
+    "        elif isinstance(val, float):\n",
+    "            list2=[val] * longlength\n",
+    "            newlist.append(list2)\n",
+    "        else:\n",
+    "            arr=val\n",
+    "            list3 = arr.tolist()\n",
+    "            newlist.append(list3)\n",
+    "            \n",
+    "            \n",
+    "    #Creae a new dict that has keys associated with a list of values, all of the same length.            \n",
+    "    newdict={}\n",
+    "    i=0\n",
+    "    for key in output_dict.keys():\n",
+    "        newdict[key]=newlist[i]\n",
+    "        i=i+1\n",
+    "\n",
+    "\n",
+    "    #Create a new dataframe and save as csv.\n",
+    "    output_df=pd.DataFrame.from_dict(newdict)\n",
+    "\n",
+    "    #Merge this resulting data frame with extra variables from the input file that were not used in calculations\n",
+    "    cols_to_use = df.columns.difference(output_df.columns)   #variales that are different from those in output file\n",
+    "    input_subset=df[cols_to_use]\n",
+    "    merged_df = pd.concat([output_df,input_subset] , axis=1)  #Merged dataframe\n",
+    "\n",
+    "        \n",
+    "    # Organize data frame so that specific varibales are at the front\n",
+    "    front_metadata=['project name','platform name','Cruise', 'Station','sample date','TIME','latitude','longitude','sample depth','Cast','Bottle']\n",
+    "    \n",
+    "    #Loop through the list of metadata variables that should be at the front\n",
+    "    col_position=-1\n",
+    "    for var in front_metadata: \n",
+    "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n",
+    "            stripped_string = re.sub(\"[^0-9a-zA-Z]+\", \" \",col)         # Strip the column headers of all non- laphanumeric characters\n",
+    "            if var.lower() in stripped_string.lower():                 # Check for column name regardless of case\n",
+    "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n",
+    "                col_position=col_position+1                            # Find the next front position\n",
+    "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n",
+    "\n",
+    "    front_data=['saturation_aragonite', 'saturation_aragonite_out','saturation_calcite','saturation_calcite_out','pCO2','fCO2','bicarbonate','pH_total']\n",
+    "\n",
+    "    #Loop through the list of calculated carbonate chemistry variables that should be at the front\n",
+    "    for var in front_data: \n",
+    "        for col in merged_df.columns:                                  # Loop through all the columns in dataframe\n",
+    "            if col==var:                 \n",
+    "                popped_col_data=merged_df.pop(col)                     # Pop the column from daat frame\n",
+    "                col_position=col_position+1                            # Find the next front position\n",
+    "                merged_df.insert(col_position, col, popped_col_data)   # Place variable at position\n",
+    "    \n",
+    "    \n",
+    "    #Remove par1, par2, par1 and par2 types- added by pyco2, not needed by user \n",
+    "    merged_df.pop('par1')\n",
+    "    merged_df.pop('par2')\n",
+    "    merged_df.pop('par1_type')\n",
+    "    merged_df.pop('par2_type')\n",
+    "\n",
+    "    #Remove duplicate columns from final data frame\n",
+    "    duplicateColumnNames = list()\n",
+    "\n",
+    "    for x in range(merged_df.shape[1]):                   # Iterate over all the columns in dataframe\n",
+    "        col_name1= merged_df.columns[x]                   # Select column at xth index.\n",
+    "\n",
+    "        for y in range(x + 1, merged_df.shape[1]):        # Iterate over all the columns in DataFrame from (x+1)th index till end\n",
+    "            col_name2= merged_df.columns[y]\n",
+    "            \n",
+    "            if col_name1.lower()==col_name2.lower():      # Check if column names are the same regardless of case\n",
+    "                duplicateColumnNames.append(col_name1)\n",
+    "                continue\n",
+    "                \n",
+    "            if '.1' in col_name1:                         # Check if there is a duplicate (same case), pandas will save this with a .1 at the end of the duplicated variable\n",
+    "                col_name1_stripped=col_name1.strip('.1')  # Remove .1 and check again for equality \n",
+    "                if col_name1_stripped.lower()==col_name2.lower():\n",
+    "                    duplicateColumnNames.append(col_name1)\n",
+    "                    continue\n",
+    "                \n",
+    "            if '.1' in col_name2:\n",
+    "                col_name2_stripped=col_name2.strip('.1')\n",
+    "                if col_name1.lower()==col_name2_stripped.lower():\n",
+    "                     duplicateColumnNames.append(col_name2)\n",
+    "                        \n",
+    "    merged_df = merged_df.drop(columns=duplicateColumnNames)  #Drop all duplicates\n",
+    "    merged_df=merged_df.dropna(axis=1,how='all')              #Drop all empty columns\n",
+    "\n",
+    "    # OUTPUT FILE----------------------------------------------------------------------\n",
+    "    if os.path.isfile(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))==True:  \n",
+    "        os.remove(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"))\n",
+    "    merged_df.to_csv(os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"), index=False)   \n",
+    "    # OUTPUT FILE----------------------------------------------------------------------\n",
+    "\n",
+    "    \n",
+    "    printmd('**<br />PCO2sys ran successfully! Output file is saved as merged_btl_nutrient_pyco2sys.csv**')\n",
+    "    printmd(\"<br />**Retrieving ice concentration now...**\")\n",
+    " \n",
+    "    # add the \"continue button\" on the GUI ######################################\n",
+    "    gridwindow={}\n",
+    "    vbox_widgets = []\n",
+    "    gridwindow['grid'] = widgets.GridspecLayout(1,1)\n",
+    "\n",
+    "    continue_button2=widgets.Button(\n",
+    "        description='Continue',\n",
+    "        disabled=False,\n",
+    "        button_style='', \n",
+    "        tooltip='Click me',\n",
+    "        icon=''\n",
+    "        )\n",
+    "    gridwindow['to_sic'] =  widgets.HBox(children=[continue_button2])\n",
+    "    vbox_widgets.append(gridwindow['to_sic'])\n",
+    "    gridwindow['grid'][0, 0] = widgets.VBox(children=vbox_widgets)\n",
+    "    display(gridwindow['grid'])\n",
+    "    continue_button2.on_click(on_button_sic)\n",
+    "    display(outsic)\n",
+    "    ##################################################\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee45f378-f39f-4d89-86e1-5aa09db5cfec",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "L = mlp.Lock()\n",
+    "class addSeaIceConcentration:\n",
+    "    def __init__(self, inputfile):\n",
+    "        self.inputfile = inputfile\n",
+    "\n",
+    "    def createShapefile(self, df, output_shp, time_header, prglabel):\n",
+    "        ''' This function creates a shapefile from the geographical locations inside the input CSV file.\n",
+    "            The coordinate system used for the output shapetile is the World Geodetic System (WGS) 1984.\n",
+    "            Both Latitude and Longitude are in decimal degrees'''\n",
+    "        prglabel.value = 'Creating the final shapefile...'\n",
+    "        shpfile = os.path.join(os.path.dirname(self.inputfile), output_shp, output_shp + '.shp')\n",
+    "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_shp)):\n",
+    "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_shp))\n",
+    "        driver = ogr.GetDriverByName(\"ESRI Shapefile\")\n",
+    "        if os.path.exists(shpfile):\n",
+    "            driver.DeleteDataSource(shpfile)\n",
+    "        ds = driver.CreateDataSource(shpfile)\n",
+    "        spref = osr.SpatialReference()\n",
+    "        spref.ImportFromEPSG(4326)\n",
+    "        layer = ds.CreateLayer('StationsLocations', spref, ogr.wkbPoint)\n",
+    "        # create field to the layer\n",
+    "        for c in df.columns:\n",
+    "            u = list(df[c])\n",
+    "            vint = [i for i in u if isinstance(i, int)]\n",
+    "            vfloat = [i for i in u if isinstance(i, float)]\n",
+    "            if c.__contains__(time_header):\n",
+    "                layer.CreateField(ogr.FieldDefn(time_header, ogr.OFTDateTime))\n",
+    "            elif c.__contains__('CIS_dates'):\n",
+    "                layer.CreateField(ogr.FieldDefn('CIS_dates', ogr.OFTDateTime))\n",
+    "            elif c.__contains__('Time'):\n",
+    "                fieldname = ogr.FieldDefn('Time UTC', ogr.OFTString)\n",
+    "                fieldname.SetWidth(20)\n",
+    "                layer.CreateField(fieldname)\n",
+    "            elif len(u) == len(vint):\n",
+    "                df[c] = df[c].astype(float)\n",
+    "                ##############################################\n",
+    "                fieldname = ogr.FieldDefn(c, ogr.OFTInteger)\n",
+    "                fieldname.SetPrecision(0)\n",
+    "                layer.CreateField(fieldname)\n",
+    "            elif len(u) == len(vfloat):\n",
+    "                fieldname = ogr.FieldDefn(c, ogr.OFTReal)\n",
+    "                fieldname.SetPrecision(6)\n",
+    "                layer.CreateField(fieldname)\n",
+    "            else:\n",
+    "                df[c].astype(str)\n",
+    "                fieldname = ogr.FieldDefn(c, ogr.OFTString)\n",
+    "                fieldname.SetWidth(30)\n",
+    "                layer.CreateField(fieldname)\n",
+    "\n",
+    "        c_fid = 0\n",
+    "        ld = layer.GetLayerDefn()\n",
+    "        lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n",
+    "        lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n",
+    "        for i in df.index:\n",
+    "            # create new point object\n",
+    "            point = ogr.Geometry(ogr.wkbPoint)\n",
+    "            point.AddPoint(float(df[lon].values[i]), float(df[lat].values[i]))\n",
+    "            # create new feature\n",
+    "            featureDfn = layer.GetLayerDefn()\n",
+    "            feature = ogr.Feature(featureDfn)\n",
+    "            feature.SetGeometry(point)\n",
+    "            for k in range(0, df.columns.__len__()):\n",
+    "                fieldName = ld.GetFieldDefn(k).GetName()\n",
+    "                feature.SetField(fieldName, df[df.columns[k]].values[i])\n",
+    "            c_fid += 1\n",
+    "            # add the new feature to the new layer\n",
+    "            layer.CreateFeature(feature)\n",
+    "        del layer, ds, df\n",
+    "        return 0\n",
+    "\n",
+    "    def extractFromUniBremenAMSR2(self, prm):\n",
+    "        ''' Sea Ice Concentration (SIC) from Bremen University are downloaded. The SIC located at each geographical\n",
+    "        coordinate indicated in the input csv file are extracted and then added as an extra-column to the csv file. '''\n",
+    "        L.acquire()\n",
+    "        ddate = pd.to_datetime(prm[0])\n",
+    "        dlat = prm[1]\n",
+    "        dlon = prm[2]\n",
+    "        q = prm[3]\n",
+    "        m = datetime.strftime(ddate, '%b').lower()  # extract the month in lowercase charachter\n",
+    "        y = datetime.strftime(ddate, '%Y')  # extract the year\n",
+    "        with tempfile.TemporaryDirectory() as tmpDir:\n",
+    "            url = r'https://seaice.uni-bremen.de/data/amsr2/asi_daygrid_swath/n6250/{0}/{1}/Arctic/asi-AMSR2-n6250-{2}-v5.4.tif'.format(\n",
+    "                y, m, datetime.strftime(ddate, '%Y%m%d'))\n",
+    "            if not os.path.exists(os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n",
+    "                                               'asi-AMSR2_{0}.tif'.format(\n",
+    "                                                   datetime.strftime(ddate, '%Y%m%d')))):\n",
+    "                urllib.request.urlretrieve(url, os.path.join(tmpDir, 'asi-AMSR2.tif'))\n",
+    "                if (q == 'Y'):\n",
+    "                    shutil.copy2(os.path.join(tmpDir, 'asi-AMSR2.tif'),\n",
+    "                                 os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n",
+    "                                              'asi-AMSR2_{0}.tif'.format(\n",
+    "                                                  datetime.strftime(ddate, '%Y%m%d'))))\n",
+    "                    src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n",
+    "                                                'asi-AMSR2_{0}.tif'.format(\n",
+    "                                                    datetime.strftime(ddate, '%Y%m%d')))\n",
+    "                else:\n",
+    "                    src_filename = os.path.join(tmpDir, 'asi-AMSR2.tif')\n",
+    "            else:\n",
+    "                src_filename = os.path.join(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'),\n",
+    "                                            'asi-AMSR2_{0}.tif'.format(\n",
+    "                                                datetime.strftime(ddate, '%Y%m%d')))\n",
+    "            try:\n",
+    "                outval = self.pointExtract(src_filename, dlat, dlon)\n",
+    "                prm[4].put(prm[5])\n",
+    "            except:\n",
+    "                outval = np.nan\n",
+    "                prm[4].put(prm[5])\n",
+    "                pass\n",
+    "        L.release()\n",
+    "        return ([float(outval), datetime.strftime(ddate.to_pydatetime(), '%Y-%m-%d')])\n",
+    "\n",
+    "    def extractFromCSI(self, prm):\n",
+    "        '''For each acquisition date in the csv file, differences are calculated between each one of them and each one included\n",
+    "        in the CIS tar files. Then the tar file corresponding the minimum difference is choosen for the extraction\n",
+    "        of the Sea Ice Concentration. '''\n",
+    "        L.acquire()  # this is needed to lock each process to let them running separately without writing in a same variable at the same time\n",
+    "        ddate = pd.to_datetime(prm[0])  # Acquisition date from the csv file.\n",
+    "        dlat = prm[1]\n",
+    "        dlon = prm[2]\n",
+    "        fcis = prm[3]\n",
+    "        CISRaster = prm[4]\n",
+    "\n",
+    "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcis]\n",
+    "        wq = np.array(CIS_acquisition_times)\n",
+    "        CIS_acquisition_times = list(np.unique(wq))\n",
+    "        csv_acquisition_time = datetime.strftime(ddate, '%Y%m%dT%H%M%S')\n",
+    "        sample_date = datetime.strptime(csv_acquisition_time, '%Y%m%dT%H%M%S').date()\n",
+    "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n",
+    "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n",
+    "        outraster = os.path.join(CISRaster, [i for i in fcis if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0][:-4] + '.tif')\n",
+    "        intval = self.pointExtract(outraster, dlat, dlon)\n",
+    "        outval = intval[0][0]\n",
+    "        prm[5].put(prm[6])\n",
+    "        L.release()  # this releases the locked process\n",
+    "        return ([float(outval), datetime.strftime(closest_date, '%Y-%m-%d')])\n",
+    "#         return ([float(outval), datetime.strftime(CIS_acquisition_times[s[0][0]], '%Y-%m-%d')])\n",
+    "\n",
+    "    def selectCISFiles(self, prm):\n",
+    "        '''This function select the filenames from the CIS acquired at the nearest time as each one of the within the\n",
+    "        input csv file. The output list of files will be used to download them. '''\n",
+    "        L.acquire()\n",
+    "        ddate = pd.to_datetime(prm[0])\n",
+    "        fcislist = prm[3]\n",
+    "        CIS_acquisition_times = [datetime.strptime(i.split('_')[2], '%Y%m%dT%H%MZ').date() for i in fcislist]\n",
+    "        wq = np.array(CIS_acquisition_times)\n",
+    "        CIS_acquisition_times = list(np.unique(wq))\n",
+    "        u = datetime.strftime(ddate, '%Y%m%dT%H%M%S') # sample date\n",
+    "        sample_date = datetime.strptime(u, '%Y%m%dT%H%M%S').date()\n",
+    "        dt_abs = [abs(sample_date - each_date) for each_date in CIS_acquisition_times]\n",
+    "        closest_date = CIS_acquisition_times[dt_abs.index(min(dt_abs))]\n",
+    "        prm[5].put(prm[6])\n",
+    "        L.release()\n",
+    "        return [i for i in fcislist if i.__contains__(datetime.strftime(closest_date, '%Y%m%d'))][0]\n",
+    "\n",
+    "    def binaryretrieve(self, j):\n",
+    "        '''The connection to the FTP server of the Canadian Ice Service is done in this function.\n",
+    "        All the spatial coordinate are assumed to be within the region Eastern_Arctic and in the same year 2016\n",
+    "        as the case of the GreenEdge data. '''\n",
+    "        L.acquire()\n",
+    "        i = j[0]\n",
+    "        shp_for_UC3 = j[1]\n",
+    "        hostname = 'sidads.colorado.edu'\n",
+    "        ftp = FTP(hostname)\n",
+    "        ftp.login(user='anonymous', passwd='')\n",
+    "        ## This should be changed to be dynamically change depending on the region (here: Eastern_Arctic) and the year.\n",
+    "        # The User Guide from the Canadian Ice Service (https://nsidc.org/data/G02171/versions/1?qt-data_set_tabs=3#qt-data_set_tabs) discribe all the possible region names\n",
+    "        ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/2016/') \n",
+    "        if not os.path.exists(os.path.join(shp_for_UC3, i)):\n",
+    "            with open(os.path.join(shp_for_UC3, i), 'wb') as localfile:\n",
+    "                ftp.retrbinary('RETR ' + i, localfile.write, 1024)\n",
+    "        ftp.quit()\n",
+    "        j[2].put(j[0])\n",
+    "        L.release()\n",
+    "        return 0\n",
+    "\n",
+    "    def fetchTarFromCIS(self, tarfile, shp_for_UC3, CISRaster, prglabel, pr, pStatus):\n",
+    "        ''' This function manage the file retrieval from the CIS and then call to another function to do the vector\n",
+    "        shapefiles into rasters. '''\n",
+    "\n",
+    "        if not os.path.exists(shp_for_UC3):\n",
+    "            os.makedirs(shp_for_UC3)\n",
+    "        '''For now, we assume all the data in the csv file were acquired in the same year (2016) and from\n",
+    "        the same region (Eastern_Arctic) as the case of the GreenEdge data.'''\n",
+    "        pool = mlp.Pool(processes=mlp.cpu_count()-2)\n",
+    "        m = Manager()\n",
+    "        queue = m.Queue()\n",
+    "        tarfilelist = [[i, shp_for_UC3, queue] for i in tarfile]\n",
+    "        s = pool.map_async(self.binaryretrieve, tarfilelist)\n",
+    "        ##\n",
+    "        while True:\n",
+    "            if s.ready():\n",
+    "                break\n",
+    "            else:\n",
+    "                c1 = int(queue.qsize() * 100 / len(tarfilelist))\n",
+    "                pr.value=c1\n",
+    "                prglabel.value = 'Fetching files from the CIS server...'\n",
+    "                pStatus.value = f'{pr.value}%'\n",
+    "        ##\n",
+    "        del pool, s, queue, m\n",
+    "        # Start a new progressbar for the shapefile converstion\n",
+    "        m = Manager()\n",
+    "        queue = m.Queue()\n",
+    "        pr.value=0\n",
+    "        prglabel.value = 'Converting shapefiles into raster files...'\n",
+    "        pStatus.value = f'{pr.value}%'\n",
+    "        ###################################################################\n",
+    "        for f in tarfile:\n",
+    "            '''Here the *.shp file have already been extracted from the *.tar file, \n",
+    "            so we only need to point to it as shp_filename'''\n",
+    "            shutil.unpack_archive(os.path.join(shp_for_UC3, f), shp_for_UC3, f[-3:])\n",
+    "            shp_filename = os.path.join(shp_for_UC3, f[:-4] + '.shp')\n",
+    "            outraster = os.path.join(CISRaster, f[:-4] + '.tif')\n",
+    "            if not os.path.exists(outraster):\n",
+    "                self.makeRasterFromSHP(shp_filename, outraster, 100)\n",
+    "            queue.put(1)\n",
+    "            c1 = int(queue.qsize() * 100 / len(tarfilelist))\n",
+    "            pr.value=c1\n",
+    "            pStatus.value = f'{pr.value}%'\n",
+    "        del m, queue\n",
+    "        return 0\n",
+    "\n",
+    "\n",
+    "    def makeRasterFromSHP(self, shp_filename, outraster, pxlsize):\n",
+    "        ''' This function handle the transformation of the vector shapefile format into rasters format.\n",
+    "        The ogr python binding package is used to read the vector shepefile before their transfmation into raster.\n",
+    "         Here only the Field CT holding the Sea Ice Concentration data are rasterized.\n",
+    "         If needed, other Field amoung all of those included in the shapefile can be added as an additional band\n",
+    "         into the output raster. '''\n",
+    "        shpfile = ogr.Open(shp_filename)\n",
+    "        layer = shpfile.GetLayer()\n",
+    "        xmin, xmax, ymin, ymax = layer.GetExtent()\n",
+    "        cols = int((xmax - xmin) / pxlsize)\n",
+    "        rows = int((ymax - ymin) / pxlsize)\n",
+    "        rdrive = gdal.GetDriverByName('GTiff')\n",
+    "        ds = rdrive.Create(outraster, cols, rows, 1, gdal.GDT_Byte)\n",
+    "        ds.SetGeoTransform([xmin, pxlsize, 0, ymax, 0, -pxlsize])\n",
+    "        gdal.RasterizeLayer(ds, [1], layer, options=['ATTRIBUTE=CT'])\n",
+    "        ds.SetProjection(layer.GetSpatialRef().ExportToPrettyWkt())\n",
+    "        ds.GetRasterBand(1).SetNoDataValue(0)\n",
+    "        del ds, rdrive, shpfile, layer\n",
+    "        return 0\n",
+    "\n",
+    "    def pointExtract(self, src_filename, dlat, dlon):\n",
+    "        ''' The extraction process is handled inside this function.\n",
+    "        The GDAL python binding package is used here to read the raster files needed for the extraction.\n",
+    "        The pyproj package is used to bring the geographical coordinates from the input csv to match the spatial\n",
+    "         coordinate system of the rasters in order to make the extraction of the right collocated pixel with the csv data. '''\n",
+    "        src_ds = gdal.Open(src_filename)\n",
+    "        gt = src_ds.GetGeoTransform()\n",
+    "        band = src_ds.GetRasterBand(1)\n",
+    "        proj = osr.SpatialReference(wkt=src_ds.GetProjection())\n",
+    "        #####\n",
+    "        wgs84 = pyproj.CRS(\"EPSG:4326\")\n",
+    "        rstProj = pyproj.CRS(proj.ExportToProj4())\n",
+    "        #####\n",
+    "        point = ogr.Geometry(ogr.wkbPoint)\n",
+    "        point.AddPoint(float(dlat), float(dlon))  # to make sure the corrdinates are not in string format\n",
+    "        mx, my = pyproj.Transformer.from_proj(wgs84, rstProj).transform(point.GetX(), point.GetY())\n",
+    "        px = int((mx - gt[0]) / gt[1])  # x pixel\n",
+    "        py = int((my - gt[3]) / gt[5])  # y pixel\n",
+    "        intval = band.ReadAsArray(px, py, 1, 1)\n",
+    "        del band, src_ds, point, proj, gt, mx, my, px, py\n",
+    "        return intval\n",
+    "\n",
+    "    def getCISTarFileList(self, dlist, prglabel, pr, pStatus):\n",
+    "        ''' This function extract the file list needed for the extraction. It uses the acquisition year\n",
+    "        in the input csv file to locate the same year used in the FTP data endpoint in order to select the\n",
+    "        files to be downloaded. '''\n",
+    "        dl = pd.to_datetime(dlist)\n",
+    "        csv_year = [datetime.strftime(s, '%Y') for s in dl]\n",
+    "        csv_year = np.unique(csv_year)\n",
+    "        fcis_gen = []\n",
+    "        fcis = []\n",
+    "        m = Manager()\n",
+    "        queue = m.Queue()\n",
+    "        for y in csv_year:\n",
+    "            hostname = 'sidads.colorado.edu'\n",
+    "            ftp = FTP(hostname)\n",
+    "            ftp.login(user='anonymous', passwd='')\n",
+    "            ''' There is more regions to be considered. Here the region Easter_Arctic is directly selected. '''\n",
+    "            # TODO: Find a way to make automatic selection of the region of interest regarding the spatial extent of coordinate in the csv file used\n",
+    "            ftp.cwd('/pub/DATASETS/NOAA/G02171/Eastern_Arctic/{0}/'.format(y))\n",
+    "            files = ftp.nlst()  # This extract all the files within the folder named with the YEAR in.\n",
+    "            ftp.quit()\n",
+    "            fcis_gen.append([i for i in files if i.__contains__('cis')])\n",
+    "        for i in fcis_gen:\n",
+    "            fcis = fcis + i\n",
+    "            queue.put(i)\n",
+    "            p = int(queue.qsize()*100/len(fcis_gen))\n",
+    "            pr.value=p\n",
+    "            prglabel.value = 'Building file list...'\n",
+    "            pStatus.value = f'{pr.value}%'\n",
+    "        del fcis_gen, m, queue\n",
+    "        return fcis\n",
+    "\n",
+    "    def ExtractionPixelValues(self, param_to_extract, q, rstsource, prglabel, pr, pStatus):\n",
+    "        ''' * inputfile: Comma Separated Value (CSV) file with a header containning Date, Latitude, and Longitude.\n",
+    "            * param_to_extract: name of the parameter to be extracted from the raster. This will become the name of\n",
+    "            the new column that will be added to the initial csv file. Here it is about Sea_Ice_Concentration.\n",
+    "            The format of the Date in the csv file should be mm/dd/yyyy.\n",
+    "            The Latitude and Longitude Should be in full decimal format and their values are in the range [-180, 180].\n",
+    "\n",
+    "            * The Extraction Process is don using parallel computing to accelerate the process. Parallel processing is\n",
+    "            very usefull here as the Extraction of the data coresponding to each rows are independent from each other.\n",
+    "        '''\n",
+    "\n",
+    "        outfile = os.path.join(os.path.split(self.inputfile)[0],\n",
+    "                               os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}.csv'.format(param_to_extract,\n",
+    "                                                                                             rstsource))\n",
+    "        if os.path.exists(outfile):\n",
+    "            os.remove(outfile)\n",
+    "        df = pd.read_csv(self.inputfile, header=0, sep=',', parse_dates=True, skiprows=[1])\n",
+    "        time_header=[i for i in df.columns if\n",
+    "                     ((i.__contains__('Date')) or (i.__contains__('date')) or\n",
+    "                      (i.__contains__('Time')) or (i.__contains__('time')))].pop()\n",
+    "        if rstsource == 'BU':\n",
+    "            m = Manager()\n",
+    "            queue = m.Queue()\n",
+    "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n",
+    "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n",
+    "            p = [[df[time_header][i], df[lat][i], df[lon][i], q, queue, i] for i in df.index]\n",
+    "            if q == 'Y':\n",
+    "                if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters')):\n",
+    "                    os.makedirs(os.path.join(os.path.dirname(self.inputfile), 'BU_rasters'))\n",
+    "            pool = mlp.Pool(mlp.cpu_count() - 2)\n",
+    "            s = pool.map_async(self.extractFromUniBremenAMSR2, p)\n",
+    "            ##\n",
+    "            while True:\n",
+    "                if s.ready():\n",
+    "                    break\n",
+    "                else:\n",
+    "                    c1=int(queue.qsize()*100/len(p))\n",
+    "                    pr.value=c1 \n",
+    "                    pStatus.value = f'{pr.value}%'\n",
+    "            print(queue.qsize())\n",
+    "            ##\n",
+    "            a = np.array(s.get())\n",
+    "            del s\n",
+    "            b = np.transpose(a)\n",
+    "            df[param_to_extract] = b[0]\n",
+    "            df[time_header] = b[1]\n",
+    "            del pool, m, queue\n",
+    "        else:\n",
+    "            CISRaster = os.path.join(os.path.dirname(self.inputfile), 'CISraster')\n",
+    "            if not os.path.exists(CISRaster):\n",
+    "                os.makedirs(CISRaster)\n",
+    "            ''' Retrieval from the CIS server of filename list corresponding to each dataset in the csv file'''\n",
+    "            fcis = self.getCISTarFileList(df[time_header], prglabel, pr, pStatus)\n",
+    "            m = Manager()\n",
+    "            queue = m.Queue()\n",
+    "            pr.value=0\n",
+    "            ''' Selection of each shapefile with a closest acquisition time to each dataset of the csv file. '''\n",
+    "            lon = [i for i in df.columns if i.__contains__('longitude')].pop()\n",
+    "            lat = [i for i in df.columns if i.__contains__('latitude')].pop()\n",
+    "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n",
+    "            pool = mlp.Pool(mlp.cpu_count() - 2)\n",
+    "            imglist = pool.map_async(self.selectCISFiles, p)\n",
+    "            prglabel.value = 'Selecting CIS file...'\n",
+    "            while True:\n",
+    "                if imglist.ready():\n",
+    "                    break\n",
+    "                else:\n",
+    "                    c1=int(queue.qsize()*100/len(p))\n",
+    "                    pr.value=c1\n",
+    "                    pStatus.value = f'{pr.value}%'\n",
+    "            imgarray0 = np.array(imglist.get())\n",
+    "            imgarray1 = np.unique(imgarray0)\n",
+    "            imglist0 = list(imgarray1)\n",
+    "            CIS_shp = os.path.join(os.path.dirname(self.inputfile), 'CIS_shp')\n",
+    "            if not os.path.exists(CIS_shp):\n",
+    "                os.makedirs(CIS_shp)\n",
+    "            del imgarray0, imgarray1, pool, m, queue\n",
+    "            '''Fetching the files from the remote server'''\n",
+    "            self.fetchTarFromCIS(imglist0, CIS_shp, CISRaster, prglabel, pr, pStatus)\n",
+    "            pool = mlp.Pool(mlp.cpu_count() - 2)\n",
+    "            m = Manager()\n",
+    "            queue = m.Queue()\n",
+    "            p = [[df[time_header][i], df[lat][i], df[lon][i], fcis, CISRaster, queue, i] for i in df.index]\n",
+    "            s = pool.map_async(self.extractFromCSI, p)\n",
+    "            ##\n",
+    "            while True:\n",
+    "                if s.ready():\n",
+    "                    break\n",
+    "                else:\n",
+    "                    c1=int(queue.qsize()*100/len(p))\n",
+    "                    pr.value=c1\n",
+    "                    prglabel.value = 'Extracting SIC from newly created rasters...'\n",
+    "                    pStatus.value = f'{pr.value}%'\n",
+    "            del imglist0\n",
+    "            ##\n",
+    "            a = np.array(s.get())\n",
+    "            del pool, m, queue\n",
+    "            b = np.transpose(a)\n",
+    "            df[param_to_extract] = b[0]\n",
+    "            df['CIS_dates'] = b[1]\n",
+    "            del s\n",
+    "            if q == 'N':\n",
+    "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CISraster'))\n",
+    "                shutil.rmtree(os.path.join(os.path.dirname(self.inputfile), 'CIS_shp'))\n",
+    "        df.to_csv(outfile,\n",
+    "                  sep=',', index=False, header=1)\n",
+    "        output_shp = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_SHP'.format(param_to_extract, rstsource) # Name of the output shapefile\n",
+    "        self.createShapefile(df, output_shp, time_header, prglabel)\n",
+    "        prglabel.value = 'Processing Finished!!'\n",
+    "        pStatus.value = f'{pr.value}%'\n",
+    "        output_gpkg = os.path.split(self.inputfile)[1][:-4] + '_{0}_{1}_gpkg'.format(param_to_extract, rstsource) # Name of the output Geopackage (gpkg) file\n",
+    "        if not os.path.exists(os.path.join(os.path.dirname(self.inputfile), output_gpkg)):\n",
+    "            os.makedirs(os.path.join(os.path.dirname(self.inputfile), output_gpkg))\n",
+    "        gpkgfile = os.path.join(os.path.dirname(self.inputfile), output_gpkg, output_gpkg + '.gpkg')\n",
+    "        os.system(f'ogr2ogr -a_srs EPSG:4326 -oo X_POSSIBLE_NAMES=Lon* -oo Y_POSSIBLE_NAMES=Lat*  -f \"GPKG\" {gpkgfile} {outfile}') # Creates the gpkg file from the shapefile. This can be created directly from the csv.\n",
+    "        return output_shp\n",
+    "\n",
+    "    def getSeaIceSource(self, workDir, r, gLocalCopy, prglabel, pr, pStatus):\n",
+    "        ''' The choice between the Sea Ice Data from the Canadian Sea Ice Service and Bremen University is handled\n",
+    "         in this function. When the choice is done, another function is called to take care of the process of\n",
+    "         extracting Sea Ice Concentration from the chosen data source. '''\n",
+    "        if not os.path.exists(self.inputfile):\n",
+    "            print(\"Input file not exists !\")\n",
+    "            exit(-1)\n",
+    "        if r == 1:\n",
+    "            r = 'BU'\n",
+    "        elif r == 2:\n",
+    "            r = 'CIS'\n",
+    "        else:\n",
+    "            exit(-1)\n",
+    "        if gLocalCopy == 'y':\n",
+    "            gLocalCopy = 'Y'\n",
+    "        elif gLocalCopy == 'n':\n",
+    "            gLocalCopy = 'N'\n",
+    "        t = datetime.now()\n",
+    "        output_shp = self.ExtractionPixelValues('sea_ice_co', gLocalCopy, r, prglabel, pr, pStatus)\n",
+    "        print(datetime.now() - t)\n",
+    "        return output_shp\n",
+    "\n",
+    "class ccadi_uc3_mapping():\n",
+    "    def __init__(self):\n",
+    "        ## initiate the grid to display the contents of the page ###\n",
+    "        self.gridwindow={}\n",
+    "        self.vbox_widgets = []\n",
+    "        self.gridwindow['grid'] = widgets.GridspecLayout(1,1)\n",
+    "                \n",
+    "        #####\n",
+    "\n",
+    "         # read text\n",
+    "        f=open(\"md_texts/SeaIceConcentration.md\",\"r\")\n",
+    "        fc=f.read()\n",
+    "        f.close()\n",
+    "        text_html1 = markdown.markdown(fc)\n",
+    "        del fc\n",
+    "        self.gridwindow['InfoSIC'] = widgets.HTML(text_html1)\n",
+    "        self.vbox_widgets.append(self.gridwindow['InfoSIC'])\n",
+    "        # Fields\n",
+    "        self.wdField = widgets.Text(\n",
+    "            value=os.path.join(\"2016_int_btl_csv\", \"merged_btl_nutrient_pyco2sys.csv\"),\n",
+    "            layout=widgets.Layout(width='max-content')\n",
+    "        )\n",
+    "\n",
+    "        self.chkb1 = widgets.Checkbox(\n",
+    "            value=False,\n",
+    "            description='Canadian Ice Service',\n",
+    "            disabled=False,\n",
+    "            indent=False\n",
+    "        )\n",
+    "        self.chkb2 = widgets.Checkbox(\n",
+    "            value=False,\n",
+    "            description='Bremen University',\n",
+    "            disabled=False,\n",
+    "            indent=False\n",
+    "        )\n",
+    "        self.chkb3 = widgets.Checkbox(\n",
+    "            value=False,\n",
+    "            description='Keep a local copy of the raster images',\n",
+    "            disabled=False,\n",
+    "            indent=False\n",
+    "        )\n",
+    "        self.gridwindow['checkbox'] = widgets.VBox(children=[self.chkb1, self.chkb2, self.chkb3])\n",
+    "        self.vbox_widgets.append(self.gridwindow['checkbox'])\n",
+    "        self.prg = widgets.IntProgress(\n",
+    "            value=0,\n",
+    "            min=0,\n",
+    "            max=100,\n",
+    "            bar_style='success',\n",
+    "            style={'bar_color': 'green'},\n",
+    "            orientation='horizontal',\n",
+    "            layout=widgets.Layout(width='800px')\n",
+    "        )\n",
+    "        self.status = widgets.Label(value=f'{self.prg.value}%', layout=widgets.Layout(width='max-content'))\n",
+    "        self.prg_label = widgets.Label('', layout=widgets.Layout(width='max-content'))\n",
+    "        self.vbox_widgets.append(self.prg_label)\n",
+    "        self.gridwindow['progressbar'] = widgets.HBox(children=[self.prg, self.status])\n",
+    "        self.vbox_widgets.append(self.gridwindow['progressbar'])\n",
+    "        self.okButton = widgets.Button(description=\"OK\")\n",
+    "        \n",
+    "        self.okButton.on_click(self.clickOkbutton)\n",
+    "        #####\n",
+    "        \n",
+    "        self.UC3_mapping()\n",
+    "\n",
+    "\n",
+    "    def check_checkBox(self):\n",
+    "        c = 0\n",
+    "        value = ''\n",
+    "        q = 'n'\n",
+    "        if self.chkb1.value==True:\n",
+    "            value = self.chkb1.description\n",
+    "            c = 2\n",
+    "        if self.chkb2.value==True:\n",
+    "            value = self.chkb2.description\n",
+    "            c = 1\n",
+    "        if (self.chkb3.value==True):\n",
+    "            q = 'y'\n",
+    "        return c, q\n",
+    "\n",
+    "\n",
+    "    def clickOkbutton(self, b):\n",
+    "        self.prg.value=0\n",
+    "        self.prg_label.value = 'Processing...'\n",
+    "        workDir = os.path.dirname(self.wdField.value)\n",
+    "        if not os.path.exists(workDir):\n",
+    "            os.makedirs(workDir)\n",
+    "        inputfile = self.wdField.value\n",
+    "        r, q = self.check_checkBox()\n",
+    "        output_shp = addSeaIceConcentration(inputfile).getSeaIceSource(workDir, r, q, self.prg_label, self.prg, self.status)\n",
+    "        \n",
+    "\n",
+    "    def UC3_mapping(self): \n",
+    "        def on_button_clicked(b):\n",
+    "            showmap()\n",
+    "\n",
+    "        out=widgets.Output()\n",
+    "        @out.capture()\n",
+    "        def showmap():\n",
+    "\n",
+    "            workDir=os.path.join(\"2016_int_btl_csv\")\n",
+    "            shp=os.path.join(workDir, \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP\", \"merged_btl_nutrient_pyco2sys_sea_ice_co_CIS_SHP.shp\")\n",
+    "            data_full=gpd.read_file(shp)\n",
+    "                        \n",
+    "            # Create a Geo-id which is needed by the Folium (it needs to have a unique identifier for each row)\n",
+    "            data_full['geoid'] = data_full.index.astype(str)\n",
+    "\n",
+    "#             dataf_0m=data.loc[np.round(data[\"sample_dep\"].values)==1]\n",
+    "            dataf_10m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==10]\n",
+    "            dataf_20m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==20]\n",
+    "            dataf_30m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==30]\n",
+    "            dataf_40m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==40]\n",
+    "            dataf_50m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==50]\n",
+    "            dataf_60m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==60]\n",
+    "            dataf_70m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==70]\n",
+    "            dataf_80m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==80]\n",
+    "            dataf_90m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==90]\n",
+    "            dataf_100m=data_full.loc[np.round(data_full[\"sample_dep\"].values)==100]\n",
+    "\n",
+    "            ###\n",
+    "\n",
+    "            lonCent = (data_full.bounds.maxx + data_full.bounds.minx).mean()/2\n",
+    "            latCent = (data_full.bounds.maxy + data_full.bounds.miny).mean()/2\n",
+    "            # creating a map object\n",
+    "            m = leafmap.folium.Map(location=(latCent,lonCent), projections=\"epsg3575\", zoom_start=6)\n",
+    "            #rst = os.path.join(\"2016_int_btl_csv\",\"CISraster\",\"cis_SGRDREA_20160606T1800Z_pl_a.tif\")\n",
+    "\n",
+    "            ###\n",
+    "            # Create the variable plot upon click on the stations on the map\n",
+    "            def chart_func(df, st): #new function\n",
+    "                chart_temp = alt.Chart(df).mark_line(color='red').transform_fold(\n",
+    "                    fold=['CTDTmp90', 'sample_dep'], \n",
+    "                    as_=['variable', 'value']).encode(\n",
+    "                        x=alt.X('CTDTmp90:Q', \n",
+    "                                axis=alt.Axis(title='Temperature (°C)', \n",
+    "                                              titleColor='red'), \n",
+    "                                scale=alt.Scale(domain=[df['CTDTmp90'].min(), \n",
+    "                                                        df['CTDTmp90'].max()])),\n",
+    "                        y=alt.Y('sample_dep:Q',\n",
+    "                                axis=alt.Axis(title='Depth (m)'), \n",
+    "                                scale=alt.Scale(reverse=True, \n",
+    "                                                domain=[0, df['sample_dep'].max()])),\n",
+    "                        color=alt.value('red')\n",
+    "                )\n",
+    "                chart_sal=alt.Chart(df).mark_line(color='green').transform_fold(\n",
+    "                    fold=['P_sal_CTD', 'sample_dep'], \n",
+    "                    as_=['variable', 'value']).encode(\n",
+    "                        x=alt.X('P_sal_CTD:Q', \n",
+    "                                axis=alt.Axis(title='Salinity', \n",
+    "                                              titleColor='green'), \n",
+    "                                scale=alt.Scale(domain=[df['P_sal_CTD'].min(), \n",
+    "                                                        df['P_sal_CTD'].max()])),\n",
+    "                        y=alt.Y('sample_dep:Q', \n",
+    "                                axis=alt.Axis(title='Depth (m)'), \n",
+    "                                scale=alt.Scale(reverse=True, \n",
+    "                                                domain=[0, df['sample_dep'].max()])),\n",
+    "                        color=alt.value('green')\n",
+    "                )\n",
+    "                ufchart=alt.layer(chart_temp, chart_sal, \n",
+    "                                  title=f\"Vertical profil of Salinity and Temperature at Station: {st}\", \n",
+    "                                  width=400, height=400).resolve_scale(x='independent').configure_axisTop(titleColor='green').configure_axisBottom(titleColor='red').resolve_legend(color='independent')            \n",
+    "                return ufchart.to_json()\n",
+    "            \n",
+    "            # extract unique coordinates\n",
+    "            data_full=data_full.round({'latitude':3, 'longitude':3})\n",
+    "            df=data_full[['latitude', 'longitude']].drop_duplicates() # drop all duplicated coordinates and keep the row indexes\n",
+    "            u=[]\n",
+    "            for i in df.index:  # use the indexes (kept in the precedent lines) to build a new dataframe from df\n",
+    "                u.append(data_full.values[i])\n",
+    "            dg=pd.DataFrame(u, columns=data_full.columns)\n",
+    "            data_coord=dg[['station', 'latitude', 'longitude']]\n",
+    "            del dg\n",
+    "            full_profile = leafmap.folium.FeatureGroup(name=\"Full profiles\")\n",
+    "            for i, st in zip(df.index, data_coord['station'].values[:]):\n",
+    "                ds0=data_full[\n",
+    "                    [\n",
+    "                        'sample_dep',\n",
+    "                        'P_sal_CTD', \n",
+    "                        'station', \n",
+    "                        'CTDTmp90', \n",
+    "                        'latitude', \n",
+    "                        'longitude']\n",
+    "                ].loc[\n",
+    "                    data_full[\"station\"].values==st\n",
+    "                ]\n",
+    "                ds2=ds0.dropna().round({\n",
+    "                    \"CTDTmp90\":2, \n",
+    "                    \"P_sal_CTD\":2, \n",
+    "                    'latitude': 3, \n",
+    "                    'longitude':3})\n",
+    "                chart=chart_func(ds0, st)\n",
+    "                pp=leafmap.folium.Popup(max_width=600).add_child(leafmap.folium.VegaLite(chart, width=600))\n",
+    "                full_profile.add_child(leafmap.folium.CircleMarker(\n",
+    "                    location=[data_full['latitude'].values[i], data_full['longitude'].values[i]], radius=6,\n",
+    "                    popup=pp,\n",
+    "                ))\n",
+    "            full_profile.add_to(m)\n",
+    "            \n",
+    "            # Select only needed columns           \n",
+    "            data_10m = dataf_10m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n",
+    "            \n",
+    "            # Add data near the sea surface: 10m\n",
+    "            leafmap.folium.features.GeoJson(dataf_10m,\n",
+    "                                            name='Data at 10m depth',\n",
+    "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n",
+    "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n",
+    "                                                fields=[\n",
+    "                                                    'P_sal_CTD', \n",
+    "                                                    'station', \n",
+    "                                                    'sample_dep', \n",
+    "                                                    'CTDTmp90'],\n",
+    "                                                aliases = [\n",
+    "                                                    'Practical salinity from CTD', \n",
+    "                                                    'Station name', \n",
+    "                                                    'sample depth (m)',\n",
+    "                                                    'Temperature from CTD (°C)'\n",
+    "                                                ],\n",
+    "                                                sticky=False)\n",
+    "                                           ).add_to(m)\n",
+    "\n",
+    "\n",
+    "            # Select only needed columns\n",
+    "            data_20m = dataf_20m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n",
+    "            \n",
+    "            # Add data near the sea surface: 20m\n",
+    "            leafmap.folium.features.GeoJson(dataf_20m,\n",
+    "                                            name='Data at 20m depth',\n",
+    "                                            style_function=lambda x: {'color':'transparent','fillColor':'transparent','weight':0},\n",
+    "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n",
+    "                                                fields=[\n",
+    "                                                    'P_sal_CTD', \n",
+    "                                                    'station', \n",
+    "                                                    'sample_dep', \n",
+    "                                                    'CTDTmp90'],\n",
+    "                                                aliases = [\n",
+    "                                                    'Practical salinity from CTD', \n",
+    "                                                    'Station name', \n",
+    "                                                    'sample depth (m)',\n",
+    "                                                    'Temperature from CTD (°C)'\n",
+    "                                                ],\n",
+    "                                                sticky=False)\n",
+    "                                           ).add_to(m)\n",
+    "            \n",
+    "            # Select only needed columns\n",
+    "            data_30m = dataf_30m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n",
+    "            \n",
+    "            # Add data near the sea surface: 30m\n",
+    "            leafmap.folium.features.GeoJson(dataf_30m,\n",
+    "                                            name='Data at 30m depth',\n",
+    "                                            style_function=lambda x: {\n",
+    "                                                'color':'transparent',\n",
+    "                                                'fillColor':'transparent',\n",
+    "                                                'weight':0\n",
+    "                                            },\n",
+    "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n",
+    "                                                fields=[\n",
+    "                                                    'P_sal_CTD', \n",
+    "                                                    'station', \n",
+    "                                                    'sample_dep', \n",
+    "                                                    'CTDTmp90'],\n",
+    "                                                aliases = [\n",
+    "                                                    'Practical salinity from CTD', \n",
+    "                                                    'Station name', \n",
+    "                                                    'sample depth (m)',\n",
+    "                                                    'Temperature from CTD (°C)'\n",
+    "                                                ],\n",
+    "                                                sticky=False)\n",
+    "                                           ).add_to(m)\n",
+    "\n",
+    "            # Select only needed columns\n",
+    "            data_40m = dataf_40m[['geoid', 'P_sal_CTD', 'station', 'sample_dep', 'CTDTmp90', 'geometry']]\n",
+    "            \n",
+    "            # Add data near the sea surface: 40m\n",
+    "            leafmap.folium.features.GeoJson(dataf_40m,\n",
+    "                                            name='Data at 40m depth',\n",
+    "                                            style_function=lambda x: {\n",
+    "                                                'color':'transparent',\n",
+    "                                                'fillColor':'transparent',\n",
+    "                                                'weight':0\n",
+    "                                            },\n",
+    "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n",
+    "                                                fields=[\n",
+    "                                                    'P_sal_CTD', \n",
+    "                                                    'station', \n",
+    "                                                    'sample_dep', \n",
+    "                                                    'CTDTmp90'],\n",
+    "                                                aliases = [\n",
+    "                                                    'Practical salinity from CTD', \n",
+    "                                                    'Station name', \n",
+    "                                                    'sample depth (m)',\n",
+    "                                                    'Temperature from CTD (°C)'\n",
+    "                                                ],\n",
+    "                                                sticky=False)\n",
+    "                                           ).add_to(m)\n",
+    "#           # Select only needed columns\n",
+    "            data_50m = dataf_50m[['geoid', 'P_sal_CTD', 'station', \n",
+    "                                  'sample_dep', 'CTDTmp90', 'geometry', \n",
+    "                                  'latitude', 'longitude']]\n",
+    "    \n",
+    "            # Add data near the sea surface: 50m\n",
+    "            leafmap.folium.features.GeoJson(dataf_50m,\n",
+    "                                            name='Data at 50m depth',\n",
+    "                                            style_function=lambda x: {\n",
+    "                                                'color':'transparent',\n",
+    "                                                'fillColor':'transparent',\n",
+    "                                                'weight':0\n",
+    "                                            },\n",
+    "                                            tooltip=leafmap.folium.features.GeoJsonTooltip(\n",
+    "                                                fields=[\n",
+    "                                                    'P_sal_CTD', \n",
+    "                                                    'station', \n",
+    "                                                    'sample_dep', \n",
+    "                                                    'CTDTmp90'],\n",
+    "                                                aliases = [\n",
+    "                                                    'Practical salinity from CTD', \n",
+    "                                                    'Station name', \n",
+    "                                                    'sample depth (m)',\n",
+    "                                                    'Temperature from CTD (°C)'\n",
+    "                                                ],\n",
+    "                                                sticky=False)\n",
+    "                                           ).add_to(m)\n",
+    "\n",
+    "#             ######################################################################################################################\n",
+    "\n",
+    "            leafmap.folium.LayerControl().add_to(m)\n",
+    "            display(m)\n",
+    "        \n",
+    "        self.showmap_button=widgets.Button(\n",
+    "                description='Show Map',\n",
+    "                disabled=False,\n",
+    "                button_style='', \n",
+    "                tooltip='Click me',\n",
+    "                icon=''\n",
+    "                )\n",
+    "        self.gridwindow['ok_and_continue'] = widgets.HBox(children=[self.okButton, self.showmap_button])\n",
+    "        self.vbox_widgets.append(self.gridwindow['ok_and_continue'])\n",
+    "        \n",
+    "        self.gridwindow['grid'][0, 0] = widgets.VBox(children=self.vbox_widgets)  #\n",
+    "        \n",
+    "        self.accordion0 = widgets.Accordion(\n",
+    "            children=[widgets.HBox(children = [self.gridwindow['grid'][0, 0]])]\n",
+    "        )\n",
+    "        self.accordion0.set_title(0, 'Adding Sea Ice Concentrations into the combined BTL_Nutrient file.')\n",
+    "        display(self.accordion0)\n",
+    "\n",
+    "        self.showmap_button.on_click(on_button_clicked)\n",
+    "        display(out)\n",
+    "    \n",
+    "\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c73708d-556c-49d8-add3-61098022648b",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "merging_gui_jupiter()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:edc-default-2022.10-14]",
+   "language": "python",
+   "name": "conda-env-edc-default-2022.10-14-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 3.240193,
+   "end_time": "2022-10-25T15:13:48.375680",
+   "environment_variables": {},
+   "exception": true,
+   "input_path": "/tmp/tmpidqkx9fb",
+   "output_path": "/tmp/notebook_output.ipynb",
+   "parameters": {},
+   "start_time": "2022-10-25T15:13:45.135487",
+   "version": "2.3.4"
+  },
+  "properties": {
+   "authors": [
+    {
+     "id": "39a21097-0c47-4efa-afb8-21161b20e5ff",
+     "name": "yanique.campbell@umanitoba.ca"
+    }
+   ],
+   "description": "Ocean acidification in Baffin Bay",
+   "id": "c250de70-680e-43e1-a62f-07f420e9a180",
+   "license": null,
+   "name": "UC3 Ocean Acidification Notebook",
+   "requirements": [],
+   "tags": [
+    "Jupyter",
+    "Polar"
+   ],
+   "tosAgree": true,
+   "type": "Jupyter Notebook",
+   "version": "0.0.1"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {},
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file