Merge pull request #92 from USEPA/release-v1.0.5

Release v1.0.5
USEPA · Aug 24, 2023 · e7e90a3 · e7e90a3
2 parents 21b74e4 + d6476aa
commit e7e90a3
Show file tree

Hide file tree

Showing 10 changed files with 114 additions and 67 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -31,14 +31,14 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        py-version: ['3.7', '3.8', '3.9', '3.10']
+        py-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     # general Python setup
     - name: Set up Python ${{ matrix.py-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v3
       with:
         python-version: ${{ matrix.py-version }}
 

diff --git a/lciafmt/__init__.py b/lciafmt/__init__.py
@@ -9,6 +9,7 @@
 
 import json
 import pkg_resources
+from typing import Union
 
 import pandas as pd
 
@@ -70,7 +71,8 @@ def get_class(name: str):
                 methods = m['methods']
             if n == name or c.value == name or mapping == name or name in methods.keys():
                 return c
-        util.log.error('Method not found')
+        util.log.warning(f'{name} is not a LCIAfmt Method')
+        return name
 
 
 def supported_methods() -> list:
@@ -156,7 +158,8 @@ def supported_mapping_systems() -> list:
     return fmap.supported_mapping_systems()
 
 
-def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
+def get_mapped_method(method_id, indicators=None, methods=None,
+                      download_from_remote=False) -> pd.DataFrame:
     """Return a mapped method stored as parquet.
 
     If a mapped method does not exist locally, it is generated.
@@ -165,23 +168,31 @@ def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
     :param indicators: list, if not None, return only those indicators passed
     :param methods: list, if not None, return only the version of the methods
         passed. Applies only to methods with multiple versions.
+    :param download_from_remote: bool, if True, download from remote before
+        generating method locally.
     :return: DataFrame of mapped method
     """
     method_id = util.check_as_class(method_id)
     mapped_method = util.read_method(method_id)
     if mapped_method is None:
-        util.log.info('generating ' + method_id.name)
-        method = get_method(method_id)
-        if 'mapping' in method_id.get_metadata():
-            mapping_system = method_id.get_metadata()['mapping']
-            case_insensitive = method_id.get_metadata()['case_insensitivity']
-            if case_insensitive:
-                method['Flowable'] = method['Flowable'].str.lower()
-            mapped_method = map_flows(method, system=mapping_system,
-                                      case_insensitive=case_insensitive)
-            mapped_method = util.collapse_indicators(mapped_method)
-        else:
-            mapped_method = method
+        if isinstance(method_id, str):
+            raise FileNotFoundError
+        elif download_from_remote:
+            util.download_method(method_id)
+            mapped_method = util.read_method(method_id)
+        if mapped_method is None:
+            util.log.info('generating ' + method_id.name)
+            method = get_method(method_id)
+            if 'mapping' in method_id.get_metadata():
+                mapping_system = method_id.get_metadata()['mapping']
+                case_insensitive = method_id.get_metadata()['case_insensitivity']
+                if case_insensitive:
+                    method['Flowable'] = method['Flowable'].str.lower()
+                mapped_method = map_flows(method, system=mapping_system,
+                                          case_insensitive=case_insensitive)
+                mapped_method = util.collapse_indicators(mapped_method)
+            else:
+                mapped_method = method
         util.store_method(mapped_method, method_id)
     if indicators is not None:
         mapped_method = mapped_method[mapped_method['Indicator'].isin(indicators)]
@@ -195,20 +206,30 @@ def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
     return mapped_method
 
 
-def generate_endpoints(file: str, name=None, matching_fields=None) -> pd.DataFrame:
+def generate_endpoints(file: Union[str, pd.DataFrame],
+                       name=None,
+                       matching_fields=None,
+                       download_from_remote=False) -> pd.DataFrame:
     """Generate an endpoint method for a supplied file based on specs.
 
-    :param file: name of file in data folder, without extension, containing
-        endpoint data based on the format specs for endpoint files
+    :param file: str name of file in data folder, without extension, containing
+        endpoint data based on the format specs for endpoint files, or
+        pd.DataFrame
     :param name: str, optional str for naming the generated method
     :param matching_fields: list of fields on which to apply unique endpoint
         conversions, if None
+    :param download_from_remote: bool, if True, download from remote before
+        generating method locally.
     :return: DataFrame of endpoint method
     """
-    endpoints = pd.read_csv(util.datapath+"/"+file+".csv")
+    if isinstance(file, pd.DataFrame):
+        endpoints = file
+    else:
+        endpoints = pd.read_csv(util.datapath / f'{file}.csv')
     if matching_fields is None:
         matching_fields = ['Indicator']
-    method = ep.apply_endpoints(endpoints, matching_fields)
+    method = ep.apply_endpoints(endpoints, matching_fields,
+                                download_from_remote)
     if name is None:
         method['Method'] = file
     else:
@@ -218,6 +239,7 @@ def generate_endpoints(file: str, name=None, matching_fields=None) -> pd.DataFra
 
 def supported_indicators(method_id) -> list:
     """Return a list of indicators for the identified method_id."""
+    method_id = util.check_as_class(method_id)
     method = util.read_method(method_id)
     if method is not None:
         indicators = set(list(method['Indicator']))

diff --git a/lciafmt/endpoint.py b/lciafmt/endpoint.py
@@ -12,14 +12,16 @@
 from .util import log
 
 
-def apply_endpoints(endpoints, matching_fields):
+def apply_endpoints(endpoints, matching_fields, download_from_remote=False):
     """Generate an endpoint method in LCIAmethod format.
 
     Method contains endpoint factors based on conversion factors supplied in
     passed dataframe.
     :param endpoints: df which conforms to Endpoints specs
     :param matching_fields: list of fields on which to apply unique endpoint
         conversions
+    :param download_from_remote: bool, if True, download from remote before
+        generating method locally.
     """
     log.info('developing endpoint methods...')
 
@@ -32,7 +34,11 @@ def apply_endpoints(endpoints, matching_fields):
 
     for m in indicators['Method'].unique():
         method_indicators = indicators[indicators['Method'] == m]
-        mapped_method = lciafmt.get_mapped_method(m, methods=[m]).fillna('')
+        mapped_method = (
+            lciafmt.get_mapped_method(m,
+                                      methods=[m],
+                                      download_from_remote=download_from_remote)
+            .fillna(''))
         if 'Indicator' in matching_fields:
             mapped_method = mapped_method[mapped_method['Indicator'].isin(
                 list(method_indicators['Indicator']))]
@@ -64,6 +70,6 @@ def apply_endpoints(endpoints, matching_fields):
         endpoint_method_agg.sort_values(by=['Indicator', 'Flowable',
                                             'Context'], inplace=True)
 
-        method = method.append(endpoint_method_agg, ignore_index=True)
+        method = pd.concat([method, endpoint_method_agg], ignore_index=True)
 
     return method
diff --git a/lciafmt/ipcc.py b/lciafmt/ipcc.py
@@ -17,7 +17,7 @@ def get() -> pd.DataFrame:
     log.info("get method IPCC")
 
     filename = 'IPCC_GWP_values.csv'
-    df = pd.read_csv(datapath + filename)
+    df = pd.read_csv(datapath / filename)
     df['Indicator'] = df['AR'] + '-' + df['Parameter'].str.replace('GWP', '')
     df['Method'] = 'IPCC'
     df['Indicator unit'] = 'kg CO2 eq'

diff --git a/lciafmt/recipe.py b/lciafmt/recipe.py
@@ -34,7 +34,7 @@
         'sea water': 'water/sea water',
         'Sea water': 'water/sea water',
         'marine water': 'water/sea water'}
-flowables_split = pd.read_csv(datapath + 'ReCiPe2016_split.csv')
+flowables_split = pd.read_csv(datapath / 'ReCiPe2016_split.csv')
 
 
 def get(add_factors_for_missing_contexts=True, endpoint=True,
@@ -181,7 +181,7 @@ def _read_endpoints(file: str) -> pd.DataFrame:
     endpoint.loc[endpoint['EndpointUnit'].str.contains('species', case=False), 'EndpointUnit'] = 'species-year'
     endpoint.loc[endpoint['EndpointUnit'].str.contains('USD', case=False), 'EndpointUnit'] = 'USD2013'
 
-    endpoint_map = pd.read_csv(datapath + 'ReCiPe2016_endpoint_to_midpoint.csv')
+    endpoint_map = pd.read_csv(datapath / 'ReCiPe2016_endpoint_to_midpoint.csv')
     endpoint = endpoint.merge(endpoint_map, how="left", on='EndpointIndicator')
 
     # split into two dataframes

diff --git a/lciafmt/traci.py b/lciafmt/traci.py
@@ -18,8 +18,8 @@
     datapath
 
 
-flowables_replace = pd.read_csv(datapath+'TRACI_2.1_replacement.csv')
-flowables_split = pd.read_csv(datapath+'TRACI_2.1_split.csv')
+flowables_replace = pd.read_csv(datapath / 'TRACI_2.1_replacement.csv')
+flowables_split = pd.read_csv(datapath / 'TRACI_2.1_split.csv')
 
 
 def get(add_factors_for_missing_contexts=True, file=None,

diff --git a/lciafmt/util.py b/lciafmt/util.py
@@ -5,25 +5,26 @@
 This module contains common functions for processing LCIA methods
 """
 
-import os
-from os.path import join
 import sys
+
 import lciafmt
 import logging as log
 import pandas as pd
 import numpy as np
 import yaml
 import pkg_resources
+from pathlib import Path
 from esupy.processed_data_mgmt import Paths, FileMeta, load_preprocessed_output,\
-    write_df_to_file, write_metadata_to_file, download_from_remote
+    write_df_to_file, write_metadata_to_file, download_from_remote, \
+    mkdir_if_missing
 from esupy.util import get_git_hash
 from fedelemflowlist.globals import flow_list_specs
 
 
 # set version number of package, needs to be updated with setup.py
-pkg_version_number = '1.0.4'
-modulepath = os.path.dirname(os.path.realpath(__file__)).replace('\\', '/')
-datapath = modulepath + '/data/'
+pkg_version_number = '1.0.5'
+MODULEPATH = Path(__file__).resolve().parent
+datapath = MODULEPATH / 'data'
 
 log.basicConfig(level=log.INFO, format='%(asctime)s %(levelname)-8s %(message)s',
                 datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stdout)
@@ -32,11 +33,11 @@
 write_format = "parquet"
 
 paths = Paths()
-paths.local_path = os.path.realpath(paths.local_path + "/lciafmt")
-outputpath = paths.local_path
+paths.local_path = paths.local_path / 'lciafmt'
+OUTPUTPATH = paths.local_path
 
 pkg = pkg_resources.get_distribution('lciafmt')
-git_hash = get_git_hash()
+GIT_HASH = get_git_hash()
 
 method_metadata = {
     'Name': '',
@@ -49,16 +50,18 @@
 
 def set_lcia_method_meta(method_id):
     lcia_method_meta = FileMeta()
-    if method_id is not None:
+    if isinstance(method_id, lciafmt.Method):
         lcia_method_meta.name_data = method_id.get_filename()
         lcia_method_meta.category = method_id.get_path()
-    else:
+    elif method_id is None:
         lcia_method_meta.name_data = ""
         lcia_method_meta.category = ""
+    else:
+        lcia_method_meta.name_data = method_id
     lcia_method_meta.tool = pkg.project_name
     lcia_method_meta.tool_version = pkg_version_number
     lcia_method_meta.ext = write_format
-    lcia_method_meta.git_hash = git_hash
+    lcia_method_meta.git_hash = GIT_HASH
     return lcia_method_meta
 
 
@@ -102,7 +105,7 @@ def aggregate_factors_for_primary_contexts(df) -> pd.DataFrame:
     indices = df['Context'].str.find('/')
     ignored_list = df['Indicator'].isin(ignored_categories)
     i = 0
-    for k in ignored_list.iteritems():
+    for k in ignored_list.items():
         if k[1]:
             indices.update(pd.Series([-1], index=[i]))
         i = i + 1
@@ -138,7 +141,7 @@ def aggregate_factors_for_primary_contexts(df) -> pd.DataFrame:
 
 def get_modification(source, name) -> pd.DataFrame:
     """Return a dataframe of modified CFs based on csv."""
-    modified_factors = pd.read_csv(datapath+"/"+source+"_"+name+".csv")
+    modified_factors = pd.read_csv(datapath / f'{source}_{name}.csv')
     return modified_factors
 
 
@@ -168,7 +171,7 @@ def check_as_class(method_id):
 
 
 def generate_method_description(name: str) -> str:
-    with open(join(datapath, "description.yaml")) as f:
+    with open(datapath / "description.yaml") as f:
         generic = yaml.safe_load(f)
     method_description = generic['description']
     method = check_as_class(name)
@@ -186,7 +189,7 @@ def generate_method_description(name: str) -> str:
             detailed_meta = '\n\n' + method_meta['methods'][name]
             method_description += detailed_meta
         except KeyError:
-            log.debug('%s not found in methods.json', name)
+            log.debug(f'{name} not found in methods.json')
     # Replace tagged fields
     if 'version' in method_meta:
         version = ' (v' + method_meta['version'] + ')'
@@ -223,14 +226,14 @@ def compile_metadata(method_id):
 def store_method(df, method_id, name=''):
     """Save the method as a dataframe to parquet file."""
     meta = set_lcia_method_meta(method_id)
-    method_path = outputpath + '/' + meta.category
+    method_path = OUTPUTPATH / meta.category
     if name != '':
         meta.name_data = name
-    elif meta.name_data == "":
+    elif meta.name_data == '':
         meta.name_data = df['Method'][0]
     meta.tool_meta = compile_metadata(method_id)
     try:
-        log.info('saving ' + meta.name_data + ' to ' + method_path)
+        log.info(f'saving {meta.name_data} to {method_path}')
         write_df_to_file(df, paths, meta)
         write_metadata_to_file(paths, meta)
     except:
@@ -241,14 +244,20 @@ def read_method(method_id):
     """Return the method stored in output."""
     meta = set_lcia_method_meta(method_id)
     method = load_preprocessed_output(meta, paths)
-    method_path = outputpath + '/' + meta.category
+    method_path = OUTPUTPATH / meta.category
     if method is None:
-        log.info(meta.name_data + ' not found in ' + method_path)
+        log.info(f'{meta.name_data} not found in {method_path}')
     else:
-        log.info('loaded ' + meta.name_data + ' from ' + method_path)
+        log.info(f'loaded {meta.name_data} from {method_path}')
     return method
 
 
+def download_method(method_id):
+    """Downloads the method from data commons."""
+    meta = set_lcia_method_meta(method_id)
+    download_from_remote(meta, paths)
+
+
 def save_json(method_id, mapped_data, method=None, name=''):
     """Save a method as json file in the outputpath.
 
@@ -265,11 +274,10 @@ def save_json(method_id, mapped_data, method=None, name=''):
     if method is not None:
         filename = method.replace('/', '_')
         mapped_data = mapped_data[mapped_data['Method'] == method]
-    path = outputpath+'/'+meta.category
-    os.makedirs(outputpath, exist_ok=True)
-    json_pack = path + '/' + filename + "_json_v" + meta.tool_version + ".zip"
-    if os.path.exists(json_pack):
-        os.remove(json_pack)
+    path = OUTPUTPATH / meta.category
+    mkdir_if_missing(OUTPUTPATH)
+    json_pack = path / f'{filename}_json_v{meta.tool_version}.zip'
+    json_pack.unlink(missing_ok=True)
     lciafmt.to_jsonld(mapped_data, json_pack)
 
 
@@ -291,8 +299,8 @@ def compare_to_remote(local_df, method_id):
     df_diff = df.query('`Characterization Factor` '
                        '!= `Characterization Factor_remote`')
     if len(df_diff) > 0:
-        path = os.path.join(paths.local_path, 'diff')
-        os.makedirs(path, exist_ok=True)
+        path = paths.local_path / 'diff'
+        mkdir_if_missing(path)
         log.info(f'Saving differences found in {method_id.name} '
                  f'versus remote to {path}')
         df_diff.to_csv(f'{path}/{method_id.name}_diff.csv', index=False)

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
-git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List@v1.1.1#egg=fedelemflowlist
-git+https://github.com/USEPA/esupy@v0.2.2#egg=esupy
-olca-ipc>=0.0.12
+git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git#egg=fedelemflowlist
+git+https://github.com/USEPA/esupy.git#egg=esupy
+olca-ipc==0.0.12
 pandas>=0.23
 requests>=2.21.0
 openpyxl>=3.0.7