From 5d3148ea681380fb726a74977fd4d36024ca28ea Mon Sep 17 00:00:00 2001 From: Phil Owen <19691521+PhillipsOwen@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:57:43 -0500 Subject: [PATCH] making a number of log statements info to debug level --- src/common/generate_urls_from_times.py | 4 +- src/common/geopoints_url.py | 149 ++++++++++++------------- src/common/utilities.py | 10 +- 3 files changed, 78 insertions(+), 85 deletions(-) diff --git a/src/common/generate_urls_from_times.py b/src/common/generate_urls_from_times.py index 6fa40ff..14ebd6a 100644 --- a/src/common/generate_urls_from_times.py +++ b/src/common/generate_urls_from_times.py @@ -510,7 +510,7 @@ def build_url_list_from_yaml_and_times(self, ensemble='nowcast')->list: url = construct_url_from_yaml( config, time, self.instance_name, ensemble, self.grid_name, hurricane_yaml_year=self.hurricane_yaml_year, hurricane_yaml_source=self.hurricane_yaml_source ) if url not in urls: urls.append(url) - logger.info('Constructed %s urls of ensemble %s based on the YML', urls, ensemble) + logger.debug('Constructed %s urls of ensemble %s based on the YML', urls, ensemble) return urls # Approach Used by ADDA @@ -568,7 +568,7 @@ def main(args): config_name=args.config_name if args.config_name is not None else os.path.join(os.path.dirname(__file__), '../config', 'url_framework.yml') # Set up IO env - logger.info("Product Level Working in %s.", os.getcwd()) + logger.debug("Product Level Working in %s.", os.getcwd()) if args.instance_name is not None: logger.debug('Ignoring args.instance_name for the testing sequence') diff --git a/src/common/geopoints_url.py b/src/common/geopoints_url.py index afb26e4..b6034dd 100644 --- a/src/common/geopoints_url.py +++ b/src/common/geopoints_url.py @@ -1,4 +1,4 @@ -''' +""" MIT License Copyright (c) 2022,2023,2024 Renaissance Computing Institute @@ -8,33 +8,29 @@ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' -""" Time series extraction Authors: Jeffrey L. Tilson, Brian O. Blanton 8/2024 """ -#!/usr/bin/env python -# coding: utf-8 -# pylint: skip-file - import sys import pandas as pd -import numpy as np import time as tm import src.common.utilities as utilities import src.common.generate_urls_from_times as genurls from argparse import ArgumentParser +# pylint: skip-file + # create a logger logger = utilities.logger # Define some basic mappings for URL to variables names. Can override using CI variables -var_mapper={'fort':'zeta','swan':'swan_HS'} +var_mapper = {'fort': 'zeta', 'swan': 'swan_HS'} + -def guess_variable_name(url)->str: +def guess_variable_name(url) -> str: """ Simply search the given URL for occurances of ither fort or swan. Choose the variable approapriately. User may always override using --variable_name @@ -44,14 +40,15 @@ def guess_variable_name(url)->str: Returns: varname: . Guess is varname is zeta or swan_HS based on url nomenclature and specifications in the var_mapper dict """ - varname=None - for key,value in var_mapper.items(): + varname = None + for key, value in var_mapper.items(): if isinstance(key, str) and key.casefold() in url.casefold(): - varname=value + varname = value break return varname -def strip_ensemble_from_url(urls)->str: + +def strip_ensemble_from_url(urls) -> str: """ We mandate that the URLs input to this fetcher are those used to access the TDS server used in APSViz. The "ensemble" information will be in position .split('/')[-2] eg. 'http://tds.renci.org/thredds/dodsC/2021/nam/2021052318/hsofs/hatteras.renci.org/hsofs-nam-bob-2021/nowcast/fort.63.nc' @@ -64,11 +61,12 @@ def strip_ensemble_from_url(urls)->str: url = grab_first_url_from_urllist(urls) try: words = url.split('/') - ensemble=words[-2] # Usually nowcast,forecast, etc + ensemble = words[-2] # Usually nowcast,forecast, etc except IndexError as e: logger.exception(f'strip_ensemble_from_url Unexpected failure try next:') return ensemble + def first_true(iterable, default=False, pred=None): """ itertools recipe found in the Python 3 docs @@ -83,7 +81,8 @@ def first_true(iterable, default=False, pred=None): """ return next(filter(pred, iterable), default) -def grab_first_url_from_urllist(urls)->str: + +def grab_first_url_from_urllist(urls) -> str: """ eg. 'http://tds.renci.org/thredds/dodsC/2021/nam/2021052318/hsofs/hatteras.renci.org/hsofs-nam-bob-2021/nowcast/fort.63.nc' @@ -98,59 +97,60 @@ def grab_first_url_from_urllist(urls)->str: url = first_true(urls) return url + def main(args): - variable_name=args.variable_name - url=args.url - lon=args.lon - lat=args.lat - nearest_neighbors=args.kmax - ndays=args.ndays # Look back/forward - - logger.info('Input URL word is %s',url) + variable_name = args.variable_name + url = args.url + lon = args.lon + lat = args.lat + nearest_neighbors = args.kmax + ndays = args.ndays # Look back/forward + + logger.info('Input URL word is %s', url) if variable_name is None: - variable_name=guess_variable_name(url) + variable_name = guess_variable_name(url) if variable_name is None: logger.error('Variable name invald or not identified') sys.exit(1) - logger.info(f' Identified variable name is {variable_name}') + logger.debug(f' Identified variable name is {variable_name}') - ensemble=strip_ensemble_from_url([url]) - if args.ensemble is not None: # Else use the ensemble present in the input URL. Allow us to input a forecast but choose the nowcast + ensemble = strip_ensemble_from_url([url]) + if args.ensemble is not None: # Else use the ensemble present in the input URL. Allow us to input a forecast but choose the nowcast ensemble = args.ensemble - logger.info(f'Input URL ensemble determined to be {ensemble}') + logger.debug(f'Input URL ensemble determined to be {ensemble}') # Try to setup proper header names for ADC/SWN and for nowcast/forecasr - dataproduct='Forecast' - if ensemble=='nowcast': - dataproduct='Nowcast' + dataproduct = 'Forecast' + if ensemble == 'nowcast': + dataproduct = 'Nowcast' # Now figure out data source: adcirc or swan - datasrc='APS' - if variable_name=='swan_HS': - datasrc='SWAN' - headername=f'{datasrc} {dataproduct}' - logger.info(f' Header name defined to be {headername}') + datasrc = 'APS' + if variable_name == 'swan_HS': + datasrc = 'SWAN' + headername = f'{datasrc} {dataproduct}' + logger.debug(f' Header name defined to be {headername}') if ndays <= 0: - logger.info(f'Build list of URLs to fetch: ndays lookback is {ndays}') - rpl = genurls.generate_urls_from_times(url=url,timein=None, timeout=None, ndays=ndays, grid_name=None, instance_name=None, config_name=None) + logger.debug(f'Build list of URLs to fetch: ndays lookback is {ndays}') + rpl = genurls.generate_urls_from_times(url=url, timein=None, timeout=None, ndays=ndays, grid_name=None, instance_name=None, config_name=None) new_urls = rpl.build_url_list_from_template_url_and_offset(ensemble=ensemble) - logger.debug('New URL list %s', new_urls) + logger.info('New URL list %s', new_urls) else: - new_urls=[url] - logger.info('Number of URL to try and process is: %s', len(new_urls)) + new_urls = [url] + logger.debug('Number of URL to try and process is: %s', len(new_urls)) - logger.debug('Lon: %s, Lat: %s', lon, lat) + logger.info('Lon: %s, Lat: %s', lon, lat) logger.debug('Selected nearest neighbors values is: %s', nearest_neighbors) - if len(new_urls) ==0: + if len(new_urls) == 0: logger.error('No URLs identified given the input URL: %s. Abort', url) sys.exit(1) - data_list=list() - exclude_list=list() + data_list = list() + exclude_list = list() - t0=tm.time() + t0 = tm.time() for url in new_urls: logger.debug('URL: %s', url) try: @@ -159,73 +159,66 @@ def main(args): #df_product_metadata.to_csv(f'Product_meta.csv',header=args.keep_headers) data_list.append(df_product_data) exclude_list.append(df_excluded) - except (OSError,FileNotFoundError): + except (OSError, FileNotFoundError): logger.warning('Current URL was not found: %s. Try another...', url) pass - logger.info('Fetching Runtime was: %s seconds', tm.time()-t0) + logger.info('Fetching Runtime was: %s seconds', tm.time() - t0) #If absolutely nothing comes back return a None try: - df=pd.concat(data_list,axis=0) - df.columns=[headername] - df = (df.reset_index() - .drop_duplicates(subset='index', keep='last') - .set_index('index').sort_index()) - df_excluded=pd.concat(exclude_list,axis=0) + df = pd.concat(data_list, axis=0) + df.columns = [headername] + df = (df.reset_index().drop_duplicates(subset='index', keep='last').set_index('index').sort_index()) + df_excluded = pd.concat(exclude_list, axis=0) df.index = df.index.strftime('%Y-%m-%d %H:%M:%S') - df.index.name='time' + df.index.name = 'time' logger.debug('Dimension of final data array: %s', df.shape) logger.debug('Dimension of excluded URL list array: %s', df_excluded.shape) except ValueError: logger.info('No data found for the specified lon/lat air. Return None') - df=None + df = None # Final data outputs # df.to_csv('Product_data_geopoints.csv') # df_excluded.to_csv('Product_excluded_geopoints.csv') - - logger.info('Finished. Runtime was: %s seconds', tm.time()-t0) + + logger.info('Finished. Runtime was: %s seconds', tm.time() - t0) return df + if __name__ == '__main__': - ret_val=0 + ret_val = 0 try: parser = ArgumentParser() - parser.add_argument('--lon', action='store', dest='lon', default=None, type=float, - help='lon: longitiude value for time series extraction') - parser.add_argument('--lat', action='store', dest='lat', default=None, type=float, - help='lat: latitude value for time series extraction') + parser.add_argument('--lon', action='store', dest='lon', default=None, type=float, help='lon: longitiude value for time series extraction') + parser.add_argument('--lat', action='store', dest='lat', default=None, type=float, help='lat: latitude value for time series extraction') parser.add_argument('--variable_name', action='store', dest='variable_name', default=None, type=str, - help='Optional variable name of interest from the supplied url') - parser.add_argument('--kmax', action='store', dest='kmax', default=10, type=int, - help='nearest_neighbors values when performing the Query') + help='Optional variable name of interest from the supplied url') + parser.add_argument('--kmax', action='store', dest='kmax', default=10, type=int, help='nearest_neighbors values when performing the Query') parser.add_argument('--alt_urlsource', action='store', dest='alt_urlsource', default=None, type=str, - help='Alternative location for the ADCIRC data - NOTE specific formatting requirements exist') - parser.add_argument('--url', action='store', dest='url', default=None, type=str, - help='Specify FQ URL') - parser.add_argument('--keep_headers', action='store_true', default=True, - help='Boolean: Indicates to add header names to output files') + help='Alternative location for the ADCIRC data - NOTE specific formatting requirements exist') + parser.add_argument('--url', action='store', dest='url', default=None, type=str, help='Specify FQ URL') + parser.add_argument('--keep_headers', action='store_true', default=True, help='Boolean: Indicates to add header names to output files') parser.add_argument('--ensemble', action='store', dest='ensemble', default=None, type=str, - help='Choose overriding ensemble such as nowcast. Else internal code extracts from the URL') + help='Choose overriding ensemble such as nowcast. Else internal code extracts from the URL') parser.add_argument('--ndays', action='store', dest='ndays', default=0, type=int, - help='ndays to scan: Default=0, <0 means look back. >0 means look forward') + help='ndays to scan: Default=0, <0 means look back. >0 means look forward') args = parser.parse_args() # log the input args - logger.debug('input args: %s',args) + logger.debug('input args: %s', args) # Call the runner df = main(args) if df is not None: - logger.debug('Final output df:%s:%s',df.head(5),df.shape) + logger.debug('Final output df:%s:%s', df.head(5), df.shape) else: logger.debug('Final output df is None: No data found') except Exception: logger.exception("Exit: exception occured") - ret_val=1 + ret_val = 1 sys.exit(ret_val) - diff --git a/src/common/utilities.py b/src/common/utilities.py index 3c3a88b..db0fa2f 100644 --- a/src/common/utilities.py +++ b/src/common/utilities.py @@ -257,7 +257,7 @@ def ComputeBasisRepresentation(xylist, agdict, agresults): agresults['final_weights']=final_weights agresults['final_jvals']=final_jvals agresults['final_status']=final_status - logger.debug('Compute of basis took: %s seconds', tm.time()-t0) + logger.info('Compute of basis took: %s seconds', tm.time()-t0) # Keep the list if the user needs to know after the fact outside_elements = np.argwhere(np.isnan(final_weights).all(axis=1)).ravel() agresults['outside_elements']=outside_elements @@ -318,7 +318,7 @@ def WaterLevelSelection(t, data_list, final_weights): df_single[f'P{vertex}']=dataseries[vertex].values if df_single.count()[0] > 0 : # df.notna().sum() final_list.append(df_single) - logger.info('Inserted one chosen df_single with non nan values for index %s at count number %s', index,count) + logger.debug('Inserted one chosen df_single with non nan values for index %s at count number %s', index,count) break logger.debug('Do Selection water series update') try: @@ -386,7 +386,7 @@ def ConstructReducedWaterLevelData_from_ds(ds, agdict, agresults, variable_name= #logger.info('Selecting the weighted mean time series') #df_final=WaterLevelReductions(t, data_list, final_weights) - logger.info('Selecting the greedy alg: first in list with not all nans time series') + logger.debug('Selecting the greedy alg: first in list with not all nans time series') df_final=WaterLevelSelection(t, data_list, final_weights) t0=tm.time() @@ -412,9 +412,9 @@ def Combined_pipeline(url, variable_name, lon, lat, nearest_neighbors=10): ds = f63_to_xr(url) agdict=get_adcirc_grid_from_ds(ds) agdict=attach_element_areas(agdict) - logger.debug('Compute_pipeline initiation: %s seconds', tm.time()-t0) + logger.info('Compute_pipeline initiation: %s seconds', tm.time()-t0) - logger.debug('Start annual KDTree pipeline LON: %s LAT: %s', geopoints[0][0], geopoints[0][1]) + logger.info('Start annual KDTree pipeline LON: %s LAT: %s', geopoints[0][0], geopoints[0][1]) agdict=ComputeTree(agdict) agresults=ComputeQuery(geopoints, agdict, kmax=nearest_neighbors) agresults=ComputeBasisRepresentation(geopoints, agdict, agresults)