From 77e92aa1158a4082f913f0cfb71218a9f9b12f1a Mon Sep 17 00:00:00 2001 From: Ian Fenty Date: Sat, 12 Oct 2024 22:53:30 +0000 Subject: [PATCH 1/4] rewrote use of TemporaryDirectory. Now everything inside a "with" scope --- .../ecco_generate_dataproducts.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py b/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py index 4cabdb9..c6b5173 100644 --- a/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py +++ b/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py @@ -92,13 +92,20 @@ def ecco_make_granule( task, cfg, merged_variable_dataset_with_all_metadata.to_netcdf( this_task['granule'], encoding=encoding) else: - tmpdir = tempfile.TemporaryDirectory() - _src = os.path.basename(this_task['granule']) - _dest = this_task['granule'] - merged_variable_dataset_with_all_metadata.to_netcdf( - os.path.join(tmpdir,_src), encoding=encoding) - log.info('uploading %s to %s', os.path.join(tmpdir,_src), _dest) - ecco_aws_s3_cp.aws_s3_cp( src=os.path.join(tmpdir,_src), dest=_dest, **kwargs) + + with tempfile.TemporaryDirectory() as tmpdir: + log.info('temporary directory created: %s ', tmpdir) + + _src = os.path.basename(this_task['granule']) + _dest = this_task['granule'] + + merged_variable_dataset_with_all_metadata.to_netcdf( + os.path.join(tmpdir,_src), encoding=encoding) + + log.info('uploading %s to %s', os.path.join(tmpdir,_src), _dest) + ecco_aws_s3_cp.aws_s3_cp( src=os.path.join(tmpdir,_src), dest=_dest, **kwargs) + # temporary directory will self-destruct at end of with block + log.info('... done') From d3420762c2d55b592e740c29f02acca5c7a9cceb Mon Sep 17 00:00:00 2001 From: Ian Fenty Date: Sun, 13 Oct 2024 17:51:12 +0000 Subject: [PATCH 2/4] bash script for running edp_generate_dataproducts using a single process from a directory with task jsons; and a python routine to split a task list json file into n pieces --- .../utils/edp_generate_dataproducts_single.sh | 24 +++++++ .../utils/split_task_json.py | 70 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 processing/src/ecco_dataset_production/utils/edp_generate_dataproducts_single.sh create mode 100644 processing/src/ecco_dataset_production/utils/split_task_json.py diff --git a/processing/src/ecco_dataset_production/utils/edp_generate_dataproducts_single.sh b/processing/src/ecco_dataset_production/utils/edp_generate_dataproducts_single.sh new file mode 100755 index 0000000..62707b8 --- /dev/null +++ b/processing/src/ecco_dataset_production/utils/edp_generate_dataproducts_single.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + + +# Run in a directory with one or more json task files (extension .json) +# Loops through each one; sends them all off to run in parallel +# takes one argument, the root name of the task file json file(s) + + +EDP_root_dir=/home/jpluser/edp +CFGFILE="${EDP_root_dir}/ECCO-Dataset-Production/processing/configs/product_generation_config_updated.yaml" + +# loop through all task files +TASKFILE=${1} + +echo $TASKFILE + +#KEYGEN='/usr/local/bin/aws-login.darwin.amd64' +#PROFILE='saml-pub' + +edp_generate_dataproducts --tasklist ${TASKFILE} --cfgfile ${CFGFILE} --log DEBUG +#> LOG_$TASKFILE.log 2> LOG_$TASKFILE.log +# #--keygen ${KEYGEN} \ +# #--profile ${PROFILE} \ + diff --git a/processing/src/ecco_dataset_production/utils/split_task_json.py b/processing/src/ecco_dataset_production/utils/split_task_json.py new file mode 100644 index 0000000..de08c18 --- /dev/null +++ b/processing/src/ecco_dataset_production/utils/split_task_json.py @@ -0,0 +1,70 @@ +import json +import math +import argparse + +def split_json(input_file, num_files, output_base): + """ + Splits a JSON file into multiple smaller JSON files. + + Arguments: + - input_file (str): Path to the input JSON file to split. + - num_files (int): Number of output JSON files to create. + - output_base (str): Base name for the output files. Each file will be named + as `output_base_001.json`, `output_base_002.json`, etc. + + The script reads a JSON file, divides it into `num_files` parts, and writes + each part into a new JSON file. Each new file will contain a roughly equal + number of entries from the original JSON file. + """ + + # Load the original JSON file + with open(input_file, 'r') as infile: + data = json.load(infile) + + # Total number of entries in the input JSON + n = len(data) + + # Calculate the number of entries per file (ceiling division to ensure all data is covered) + entries_per_file = math.ceil(n / num_files) + + # Split the data and write to new JSON files + for i in range(num_files): + start_index = i * entries_per_file + end_index = start_index + entries_per_file + + # Create a subset of the data for the current split + subset = data[start_index:end_index] + + # Output filename with zero-padded numbers (e.g., output_base_001.json) + output_filename = f'{output_base}_{i+1:03}.json' + + # Write the subset to the output file + with open(output_filename, 'w') as outfile: + json.dump(subset, outfile, indent=4) + + print(f"Split into {num_files} files.") + +# Set up argument parsing +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Split a JSON file into multiple smaller files.' + ) + + # Positional argument for input file + parser.add_argument('input_file', type=str, + help='Path to the input JSON file to be split.') + + # Positional argument for number of files to split into + parser.add_argument('num_files', type=int, + help='Number of output JSON files to create.') + + # Positional argument for output base name + parser.add_argument('output_base', type=str, + help='Base name for the output files (e.g., "output" will result in "output_001.json", "output_002.json", etc.)') + + # Parse the command-line arguments + args = parser.parse_args() + + # Call the function to split the JSON + split_json(args.input_file, args.num_files, args.output_base) + From 46b8adc3f3b9a1d22cb70f0a7be88e8100ee8a75 Mon Sep 17 00:00:00 2001 From: Ian Fenty Date: Sun, 13 Oct 2024 18:17:10 +0000 Subject: [PATCH 3/4] updated the test to generate ssh native and latlon granules locally. now it works --- tests/SSH_native_latlon_local/README.txt | 7 +++ .../SSH_native_latlon_mon_mean_tasks.json.sav | 48 +++++++++---------- ...ob_task_list_SSH_native_latlon_mon_mean.sh | 24 ++++++---- ...dataproducts_SSH_native_latlon_mon_mean.sh | 10 ++-- 4 files changed, 51 insertions(+), 38 deletions(-) create mode 100644 tests/SSH_native_latlon_local/README.txt diff --git a/tests/SSH_native_latlon_local/README.txt b/tests/SSH_native_latlon_local/README.txt new file mode 100644 index 0000000..c706d21 --- /dev/null +++ b/tests/SSH_native_latlon_local/README.txt @@ -0,0 +1,7 @@ +# two steps + +# 1. create task list +./edp_create_job_task_list_SSH_native_latlon_mon_mean.sh SSH_native_latlon_mon_mean_jobs.txt SSH_native_latlon_mon_mean_tasks.json.sav + +# 2. generate data products from task list +./edp_generate_dataproducts_SSH_native_latlon_mon_mean.sh SSH_native_latlon_mon_mean_tasks.json.sav diff --git a/tests/SSH_native_latlon_local/SSH_native_latlon_mon_mean_tasks.json.sav b/tests/SSH_native_latlon_local/SSH_native_latlon_mon_mean_tasks.json.sav index 85568ec..f31c5e7 100644 --- a/tests/SSH_native_latlon_local/SSH_native_latlon_mon_mean_tasks.json.sav +++ b/tests/SSH_native_latlon_local/SSH_native_latlon_mon_mean_tasks.json.sav @@ -1,6 +1,6 @@ [ { - "granule": "./V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-01_ECCO_V4r4_native_llc0090.nc", + "granule": "./V4r4_sav_test/V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-01_ECCO_V4r4_native_llc0090.nc", "variables": { "SSH": [ [ @@ -27,9 +27,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height and model sea level anomaly", "dimension": "2D", @@ -44,7 +44,7 @@ } }, { - "granule": "./V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-02_ECCO_V4r4_native_llc0090.nc", + "granule": "./V4r4_sav_test/V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-02_ECCO_V4r4_native_llc0090.nc", "variables": { "SSH": [ [ @@ -71,9 +71,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height and model sea level anomaly", "dimension": "2D", @@ -88,7 +88,7 @@ } }, { - "granule": "./V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-03_ECCO_V4r4_native_llc0090.nc", + "granule": "./V4r4_sav_test/V4r4/native/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-03_ECCO_V4r4_native_llc0090.nc", "variables": { "SSH": [ [ @@ -115,9 +115,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height and model sea level anomaly", "dimension": "2D", @@ -132,7 +132,7 @@ } }, { - "granule": "./V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-01_ECCO_V4r4_latlon_0p50deg.nc", + "granule": "./V4r4_sav_test/V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-01_ECCO_V4r4_latlon_0p50deg.nc", "variables": { "SSH": [ [ @@ -153,9 +153,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height", "dimension": "2D", @@ -170,7 +170,7 @@ } }, { - "granule": "./V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-02_ECCO_V4r4_latlon_0p50deg.nc", + "granule": "./V4r4_sav_test/V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-02_ECCO_V4r4_latlon_0p50deg.nc", "variables": { "SSH": [ [ @@ -191,9 +191,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height", "dimension": "2D", @@ -208,7 +208,7 @@ } }, { - "granule": "./V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-03_ECCO_V4r4_latlon_0p50deg.nc", + "granule": "./V4r4_sav_test/V4r4/latlon/mon_mean/SEA_SURFACE_HEIGHT/SEA_SURFACE_HEIGHT_mon_mean_1992-03_ECCO_V4r4_latlon_0p50deg.nc", "variables": { "SSH": [ [ @@ -229,9 +229,9 @@ ] ] }, - "ecco_grid_loc": "../data/ecco_grids/V4r4/grid_ECCOV4r4", - "ecco_mapping_factors_loc": "../data/ecco_mapping_factors/V4r4", - "ecco_metadata_loc": "../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata", + "ecco_grid_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4", + "ecco_mapping_factors_loc": "../../..//ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4", + "ecco_metadata_loc": "../../..//ECCO-v4-Configurations/ECCOv4 Release 4/metadata", "dynamic_metadata": { "name": "dynamic sea surface height", "dimension": "2D", diff --git a/tests/SSH_native_latlon_local/edp_create_job_task_list_SSH_native_latlon_mon_mean.sh b/tests/SSH_native_latlon_local/edp_create_job_task_list_SSH_native_latlon_mon_mean.sh index 4fe1654..bbcff5c 100755 --- a/tests/SSH_native_latlon_local/edp_create_job_task_list_SSH_native_latlon_mon_mean.sh +++ b/tests/SSH_native_latlon_local/edp_create_job_task_list_SSH_native_latlon_mon_mean.sh @@ -1,16 +1,22 @@ #!/usr/bin/env bash -# test job task list generation using locally-stored ECCO results: +# create a job task list from a jobfile +# first argument is the name of the job task file (text) +# second argument is the name of the output task file (json) -ver='V4r4' + +EDP_root_dir=../../../ +echo ${EDP_root_dir} +ls ${EDP_root_dir} edp_create_job_task_list \ - --jobfile ./SSH_native_latlon_mon_mean_jobs.txt \ + --jobfile ${1} \ --ecco_source_root ../data/ecco_results/${ver} \ - --ecco_destination_root ./ \ - --ecco_grid_loc ../data/ecco_grids/${ver}/grid_ECCO${ver} \ - --ecco_mapping_factors_loc ../data/ecco_mapping_factors/${ver} \ - --ecco_metadata_loc '../../ECCO-v4-Configurations/ECCOv4 Release 4/metadata' \ - --outfile SSH_native_latlon_mon_mean_tasks.json \ - --cfgfile ../../processing/configs/product_generation_config_updated.yaml \ + --ecco_destination_root "./V4r4_sav_test" \ + --ecco_grid_loc "${EDP_root_dir}/ECCO-Dataset-Production/tests/data/ecco_grids/V4r4/grid_ECCOV4r4" \ + --ecco_mapping_factors_loc "${EDP_root_dir}/ECCO-Dataset-Production/tests/data/ecco_mapping_factors/V4r4" \ + --ecco_metadata_loc "${EDP_root_dir}/ECCO-v4-Configurations/ECCOv4 Release 4/metadata" \ + --outfile "${2}" \ + --cfgfile "${EDP_root_dir}/ECCO-Dataset-Production/processing/configs/product_generation_config_updated.yaml" \ --log DEBUG + diff --git a/tests/SSH_native_latlon_local/edp_generate_dataproducts_SSH_native_latlon_mon_mean.sh b/tests/SSH_native_latlon_local/edp_generate_dataproducts_SSH_native_latlon_mon_mean.sh index b214d69..02ccaf6 100755 --- a/tests/SSH_native_latlon_local/edp_generate_dataproducts_SSH_native_latlon_mon_mean.sh +++ b/tests/SSH_native_latlon_local/edp_generate_dataproducts_SSH_native_latlon_mon_mean.sh @@ -1,13 +1,13 @@ #!/usr/bin/env bash -TASKLIST=SSH_native_latlon_mon_mean_tasks.json +#TASKLIST=SSH_native_latlon_mon_mean_tasks.json CFGFILE=../../processing/configs/product_generation_config_updated.yaml KEYGEN=/usr/local/bin/aws-login-pub.darwin.amd64 PROFILE=saml-pub edp_generate_dataproducts \ - --tasklist ${TASKLIST} \ + --tasklist ${1} \ --cfgfile ${CFGFILE} \ - --log DEBUG \ - --keygen ${KEYGEN} \ - --profile ${PROFILE} + --log DEBUG +# --keygen ${KEYGEN} \ +# --profile ${PROFILE} From 61eb644588ad4b1b43c6aa17d465546937e0863e Mon Sep 17 00:00:00 2001 From: Ian Fenty Date: Fri, 18 Oct 2024 12:32:39 +0000 Subject: [PATCH 4/4] several small changes to enable processing of snapshot granules --- .gitignore | 1 + .../apps/create_job_task_list.py | 53 ++++++++++++++----- .../src/ecco_dataset_production/ecco_file.py | 7 ++- .../ecco_generate_dataproducts.py | 3 ++ 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 5878fc0..94379f1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ job_logs*.json *.DS_Store # emacs temporary files: *~ +*._* \#*\# diff --git a/processing/src/ecco_dataset_production/apps/create_job_task_list.py b/processing/src/ecco_dataset_production/apps/create_job_task_list.py index 64324c7..ac79556 100644 --- a/processing/src/ecco_dataset_production/apps/create_job_task_list.py +++ b/processing/src/ecco_dataset_production/apps/create_job_task_list.py @@ -7,6 +7,7 @@ import importlib.resources import json import logging +import numpy as np import os import pandas as pd import re @@ -20,6 +21,7 @@ from .. import ecco_metadata_store from .. import ecco_time from .. import metadata +from pprint import pprint logging.basicConfig( @@ -262,9 +264,7 @@ def create_job_task_list( Job = collections.namedtuple( 'Job',['metadata_groupings_id','product_type','frequency','time_steps']) - with open(jobfile,'r') as fh: - for line in fh: # @@ -303,7 +303,8 @@ def create_job_task_list( time_coverage_duration = time_coverage_resolution = 'P1M' dataset_description_head = 'This dataset contains monthly-averaged ' elif job.frequency.lower() == 'snap': - #TODO: path/file_freq_pat + path_freq_pat = 'diags_inst' + file_freq_pat = 'day_snap' time_long_name = 'snapshot time' time_coverage_duration = time_coverage_resolution = 'PT0S' dataset_description_head = 'This dataset contains instantaneous ' @@ -349,6 +350,7 @@ def create_job_task_list( one_to_one = False if not one_to_one: + #('IAN not one_to_one') # variable depends on component inputs; determine # availability of input files and gather accordingly: @@ -371,12 +373,14 @@ def create_job_task_list( if isinstance(job.time_steps,str) and 'all'==job.time_steps.lower(): # get all possible time matches: if aws.ecco_aws.is_s3_uri(ecco_source_root): + s3_key_pat = re.compile( s3_parts.path.strip('/') # remove leading '/' from urlpath + '.*' # allow anything between path and filename + ecco_file.ECCOMDSFilestr( prefix=variable_input_component_key, averaging_period=file_freq_pat).re_filestr) + variable_input_component_files.extend( [os.path.join( urllib.parse.urlunparse( @@ -489,13 +493,15 @@ def create_job_task_list( variable_files = [] if aws.ecco_aws.is_s3_uri(ecco_source_root): + prefix=os.path.join( + s3_parts.path, + path_freq_pat, + '_'.join([variable,file_freq_pat])) all_var_files_in_bucket = s3_list_files( s3_client=s3c, bucket=s3_parts.netloc, - prefix=os.path.join( - s3_parts.path, - path_freq_pat, - '_'.join([variable,file_freq_pat]))) + prefix=prefix) + if isinstance(job.time_steps,str) and 'all'==job.time_steps.lower(): #if 'all' == job.time_steps.lower(): @@ -551,6 +557,9 @@ def create_job_task_list( variable_files.sort() variable_files_as_list_of_lists = [] + + print('number of files ', len(variable_files)) + for f in variable_files: if ecco_file.ECCOMDSFilestr(os.path.basename(f)).ext == 'data': tmplist = [f] @@ -607,13 +616,29 @@ def create_job_task_list( # ECCOTask()'; subsequent operations using class functions. task = {} - tb,center_time = ecco_time.make_time_bounds_metadata( - granule_time=time, - model_start_time=cfg['model_start_time'], - model_end_time=cfg['model_end_time'], - model_timestep=cfg['model_timestep'], - model_timestep_units=cfg['model_timestep_units'], - averaging_period=job.frequency) + model_start_time=cfg['model_start_time'] + model_end_time=cfg['model_end_time'] + model_timestep=cfg['model_timestep'] + model_timestep_units=cfg['model_timestep_units'] + + if 'snap' in job.frequency.lower(): + mst = np.datetime64(model_start_time) + td64 = np.timedelta64(int(time)*model_timestep ,model_timestep_units) + center_time = \ + mst + \ + td64 + tb = [] + tb.append(center_time) + tb.append(center_time) + + else: + tb,center_time = ecco_time.make_time_bounds_metadata( + granule_time=time, + model_start_time=model_start_time, + model_end_time=model_end_time, + model_timestep=model_timestep, + model_timestep_units=model_timestep_units, + averaging_period=job.frequency) if file_freq_pat == 'mon_mean': # in the case of monthly means, ensure file date stamp is diff --git a/processing/src/ecco_dataset_production/ecco_file.py b/processing/src/ecco_dataset_production/ecco_file.py index 721cc8b..477b227 100644 --- a/processing/src/ecco_dataset_production/ecco_file.py +++ b/processing/src/ecco_dataset_production/ecco_file.py @@ -44,10 +44,11 @@ def __init__(self,filestr=None,**kwargs): """ if filestr: + #print('IAN filestr ', filestr) # use filestr to set all attributes (a little complicated because # ECCO variable names may include underscores): try: - re_so = re.search('_day_inst|_day_mean|_mon_mean',filestr) + re_so = re.search('_day_snap|_day_mean|_mon_mean',filestr) self.prefix = filestr[:re_so.span()[0]] self.averaging_period = filestr[re_so.span()[0]+1:re_so.span()[1]] time_and_ext = filestr[re_so.span()[1]+1:] @@ -151,10 +152,12 @@ def __init__(self,filestr=None,**kwargs): """ if filestr: + #print('IAN ECCOGranuleFilestr filestr ', filestr) + # use filestr to set all attributes (a little complicated because # ECCO variable names may include underscores): try: - re_so = re.search('_day_inst|_day_mean|_mon_mean',filestr) + re_so = re.search('_day_snap|_day_mean|_mon_mean',filestr) self.prefix = filestr[:re_so.span()[0]] self.averaging_period = filestr[re_so.span()[0]+1:re_so.span()[1]] date_version_grid_type_grid_label_and_ext = filestr[re_so.span()[1]+1:] diff --git a/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py b/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py index c6b5173..ccaf6fd 100644 --- a/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py +++ b/processing/src/ecco_dataset_production/ecco_generate_dataproducts.py @@ -36,6 +36,7 @@ def ecco_make_granule( task, cfg, task (dict): cfg """ + log = logging.getLogger('edp.'+__name__) if log_level: log.setLevel(log_level) @@ -60,8 +61,10 @@ def ecco_make_granule( task, cfg, merged_variable_dataset = xr.merge(variable_datasets) elif this_task.is_native: + log.info('generating %s ...', os.path.basename(this_task['granule'])) for variable in this_task.variable_names: + log.debug('... adding %s using:', variable) for infile in itertools.chain.from_iterable(this_task.variable_inputs(variable)): log.debug(' %s', infile)