Skip to content

Commit

Permalink
Merge pull request #92 from USEPA/release-v1.0.5
Browse files Browse the repository at this point in the history
Release v1.0.5
  • Loading branch information
bl-young authored Aug 24, 2023
2 parents 21b74e4 + d6476aa commit e7e90a3
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 67 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
py-version: ['3.7', '3.8', '3.9', '3.10']
py-version: ['3.7', '3.8', '3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

# general Python setup
- name: Set up Python ${{ matrix.py-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.py-version }}

Expand Down
60 changes: 41 additions & 19 deletions lciafmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import json
import pkg_resources
from typing import Union

import pandas as pd

Expand Down Expand Up @@ -70,7 +71,8 @@ def get_class(name: str):
methods = m['methods']
if n == name or c.value == name or mapping == name or name in methods.keys():
return c
util.log.error('Method not found')
util.log.warning(f'{name} is not a LCIAfmt Method')
return name


def supported_methods() -> list:
Expand Down Expand Up @@ -156,7 +158,8 @@ def supported_mapping_systems() -> list:
return fmap.supported_mapping_systems()


def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
def get_mapped_method(method_id, indicators=None, methods=None,
download_from_remote=False) -> pd.DataFrame:
"""Return a mapped method stored as parquet.
If a mapped method does not exist locally, it is generated.
Expand All @@ -165,23 +168,31 @@ def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
:param indicators: list, if not None, return only those indicators passed
:param methods: list, if not None, return only the version of the methods
passed. Applies only to methods with multiple versions.
:param download_from_remote: bool, if True, download from remote before
generating method locally.
:return: DataFrame of mapped method
"""
method_id = util.check_as_class(method_id)
mapped_method = util.read_method(method_id)
if mapped_method is None:
util.log.info('generating ' + method_id.name)
method = get_method(method_id)
if 'mapping' in method_id.get_metadata():
mapping_system = method_id.get_metadata()['mapping']
case_insensitive = method_id.get_metadata()['case_insensitivity']
if case_insensitive:
method['Flowable'] = method['Flowable'].str.lower()
mapped_method = map_flows(method, system=mapping_system,
case_insensitive=case_insensitive)
mapped_method = util.collapse_indicators(mapped_method)
else:
mapped_method = method
if isinstance(method_id, str):
raise FileNotFoundError
elif download_from_remote:
util.download_method(method_id)
mapped_method = util.read_method(method_id)
if mapped_method is None:
util.log.info('generating ' + method_id.name)
method = get_method(method_id)
if 'mapping' in method_id.get_metadata():
mapping_system = method_id.get_metadata()['mapping']
case_insensitive = method_id.get_metadata()['case_insensitivity']
if case_insensitive:
method['Flowable'] = method['Flowable'].str.lower()
mapped_method = map_flows(method, system=mapping_system,
case_insensitive=case_insensitive)
mapped_method = util.collapse_indicators(mapped_method)
else:
mapped_method = method
util.store_method(mapped_method, method_id)
if indicators is not None:
mapped_method = mapped_method[mapped_method['Indicator'].isin(indicators)]
Expand All @@ -195,20 +206,30 @@ def get_mapped_method(method_id, indicators=None, methods=None) -> pd.DataFrame:
return mapped_method


def generate_endpoints(file: str, name=None, matching_fields=None) -> pd.DataFrame:
def generate_endpoints(file: Union[str, pd.DataFrame],
name=None,
matching_fields=None,
download_from_remote=False) -> pd.DataFrame:
"""Generate an endpoint method for a supplied file based on specs.
:param file: name of file in data folder, without extension, containing
endpoint data based on the format specs for endpoint files
:param file: str name of file in data folder, without extension, containing
endpoint data based on the format specs for endpoint files, or
pd.DataFrame
:param name: str, optional str for naming the generated method
:param matching_fields: list of fields on which to apply unique endpoint
conversions, if None
:param download_from_remote: bool, if True, download from remote before
generating method locally.
:return: DataFrame of endpoint method
"""
endpoints = pd.read_csv(util.datapath+"/"+file+".csv")
if isinstance(file, pd.DataFrame):
endpoints = file
else:
endpoints = pd.read_csv(util.datapath / f'{file}.csv')
if matching_fields is None:
matching_fields = ['Indicator']
method = ep.apply_endpoints(endpoints, matching_fields)
method = ep.apply_endpoints(endpoints, matching_fields,
download_from_remote)
if name is None:
method['Method'] = file
else:
Expand All @@ -218,6 +239,7 @@ def generate_endpoints(file: str, name=None, matching_fields=None) -> pd.DataFra

def supported_indicators(method_id) -> list:
"""Return a list of indicators for the identified method_id."""
method_id = util.check_as_class(method_id)
method = util.read_method(method_id)
if method is not None:
indicators = set(list(method['Indicator']))
Expand Down
12 changes: 9 additions & 3 deletions lciafmt/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
from .util import log


def apply_endpoints(endpoints, matching_fields):
def apply_endpoints(endpoints, matching_fields, download_from_remote=False):
"""Generate an endpoint method in LCIAmethod format.
Method contains endpoint factors based on conversion factors supplied in
passed dataframe.
:param endpoints: df which conforms to Endpoints specs
:param matching_fields: list of fields on which to apply unique endpoint
conversions
:param download_from_remote: bool, if True, download from remote before
generating method locally.
"""
log.info('developing endpoint methods...')

Expand All @@ -32,7 +34,11 @@ def apply_endpoints(endpoints, matching_fields):

for m in indicators['Method'].unique():
method_indicators = indicators[indicators['Method'] == m]
mapped_method = lciafmt.get_mapped_method(m, methods=[m]).fillna('')
mapped_method = (
lciafmt.get_mapped_method(m,
methods=[m],
download_from_remote=download_from_remote)
.fillna(''))
if 'Indicator' in matching_fields:
mapped_method = mapped_method[mapped_method['Indicator'].isin(
list(method_indicators['Indicator']))]
Expand Down Expand Up @@ -64,6 +70,6 @@ def apply_endpoints(endpoints, matching_fields):
endpoint_method_agg.sort_values(by=['Indicator', 'Flowable',
'Context'], inplace=True)

method = method.append(endpoint_method_agg, ignore_index=True)
method = pd.concat([method, endpoint_method_agg], ignore_index=True)

return method
2 changes: 1 addition & 1 deletion lciafmt/ipcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get() -> pd.DataFrame:
log.info("get method IPCC")

filename = 'IPCC_GWP_values.csv'
df = pd.read_csv(datapath + filename)
df = pd.read_csv(datapath / filename)
df['Indicator'] = df['AR'] + '-' + df['Parameter'].str.replace('GWP', '')
df['Method'] = 'IPCC'
df['Indicator unit'] = 'kg CO2 eq'
Expand Down
4 changes: 2 additions & 2 deletions lciafmt/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
'sea water': 'water/sea water',
'Sea water': 'water/sea water',
'marine water': 'water/sea water'}
flowables_split = pd.read_csv(datapath + 'ReCiPe2016_split.csv')
flowables_split = pd.read_csv(datapath / 'ReCiPe2016_split.csv')


def get(add_factors_for_missing_contexts=True, endpoint=True,
Expand Down Expand Up @@ -181,7 +181,7 @@ def _read_endpoints(file: str) -> pd.DataFrame:
endpoint.loc[endpoint['EndpointUnit'].str.contains('species', case=False), 'EndpointUnit'] = 'species-year'
endpoint.loc[endpoint['EndpointUnit'].str.contains('USD', case=False), 'EndpointUnit'] = 'USD2013'

endpoint_map = pd.read_csv(datapath + 'ReCiPe2016_endpoint_to_midpoint.csv')
endpoint_map = pd.read_csv(datapath / 'ReCiPe2016_endpoint_to_midpoint.csv')
endpoint = endpoint.merge(endpoint_map, how="left", on='EndpointIndicator')

# split into two dataframes
Expand Down
4 changes: 2 additions & 2 deletions lciafmt/traci.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
datapath


flowables_replace = pd.read_csv(datapath+'TRACI_2.1_replacement.csv')
flowables_split = pd.read_csv(datapath+'TRACI_2.1_split.csv')
flowables_replace = pd.read_csv(datapath / 'TRACI_2.1_replacement.csv')
flowables_split = pd.read_csv(datapath / 'TRACI_2.1_split.csv')


def get(add_factors_for_missing_contexts=True, file=None,
Expand Down
66 changes: 37 additions & 29 deletions lciafmt/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,26 @@
This module contains common functions for processing LCIA methods
"""

import os
from os.path import join
import sys

import lciafmt
import logging as log
import pandas as pd
import numpy as np
import yaml
import pkg_resources
from pathlib import Path
from esupy.processed_data_mgmt import Paths, FileMeta, load_preprocessed_output,\
write_df_to_file, write_metadata_to_file, download_from_remote
write_df_to_file, write_metadata_to_file, download_from_remote, \
mkdir_if_missing
from esupy.util import get_git_hash
from fedelemflowlist.globals import flow_list_specs


# set version number of package, needs to be updated with setup.py
pkg_version_number = '1.0.4'
modulepath = os.path.dirname(os.path.realpath(__file__)).replace('\\', '/')
datapath = modulepath + '/data/'
pkg_version_number = '1.0.5'
MODULEPATH = Path(__file__).resolve().parent
datapath = MODULEPATH / 'data'

log.basicConfig(level=log.INFO, format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stdout)
Expand All @@ -32,11 +33,11 @@
write_format = "parquet"

paths = Paths()
paths.local_path = os.path.realpath(paths.local_path + "/lciafmt")
outputpath = paths.local_path
paths.local_path = paths.local_path / 'lciafmt'
OUTPUTPATH = paths.local_path

pkg = pkg_resources.get_distribution('lciafmt')
git_hash = get_git_hash()
GIT_HASH = get_git_hash()

method_metadata = {
'Name': '',
Expand All @@ -49,16 +50,18 @@

def set_lcia_method_meta(method_id):
lcia_method_meta = FileMeta()
if method_id is not None:
if isinstance(method_id, lciafmt.Method):
lcia_method_meta.name_data = method_id.get_filename()
lcia_method_meta.category = method_id.get_path()
else:
elif method_id is None:
lcia_method_meta.name_data = ""
lcia_method_meta.category = ""
else:
lcia_method_meta.name_data = method_id
lcia_method_meta.tool = pkg.project_name
lcia_method_meta.tool_version = pkg_version_number
lcia_method_meta.ext = write_format
lcia_method_meta.git_hash = git_hash
lcia_method_meta.git_hash = GIT_HASH
return lcia_method_meta


Expand Down Expand Up @@ -102,7 +105,7 @@ def aggregate_factors_for_primary_contexts(df) -> pd.DataFrame:
indices = df['Context'].str.find('/')
ignored_list = df['Indicator'].isin(ignored_categories)
i = 0
for k in ignored_list.iteritems():
for k in ignored_list.items():
if k[1]:
indices.update(pd.Series([-1], index=[i]))
i = i + 1
Expand Down Expand Up @@ -138,7 +141,7 @@ def aggregate_factors_for_primary_contexts(df) -> pd.DataFrame:

def get_modification(source, name) -> pd.DataFrame:
"""Return a dataframe of modified CFs based on csv."""
modified_factors = pd.read_csv(datapath+"/"+source+"_"+name+".csv")
modified_factors = pd.read_csv(datapath / f'{source}_{name}.csv')
return modified_factors


Expand Down Expand Up @@ -168,7 +171,7 @@ def check_as_class(method_id):


def generate_method_description(name: str) -> str:
with open(join(datapath, "description.yaml")) as f:
with open(datapath / "description.yaml") as f:
generic = yaml.safe_load(f)
method_description = generic['description']
method = check_as_class(name)
Expand All @@ -186,7 +189,7 @@ def generate_method_description(name: str) -> str:
detailed_meta = '\n\n' + method_meta['methods'][name]
method_description += detailed_meta
except KeyError:
log.debug('%s not found in methods.json', name)
log.debug(f'{name} not found in methods.json')
# Replace tagged fields
if 'version' in method_meta:
version = ' (v' + method_meta['version'] + ')'
Expand Down Expand Up @@ -223,14 +226,14 @@ def compile_metadata(method_id):
def store_method(df, method_id, name=''):
"""Save the method as a dataframe to parquet file."""
meta = set_lcia_method_meta(method_id)
method_path = outputpath + '/' + meta.category
method_path = OUTPUTPATH / meta.category
if name != '':
meta.name_data = name
elif meta.name_data == "":
elif meta.name_data == '':
meta.name_data = df['Method'][0]
meta.tool_meta = compile_metadata(method_id)
try:
log.info('saving ' + meta.name_data + ' to ' + method_path)
log.info(f'saving {meta.name_data} to {method_path}')
write_df_to_file(df, paths, meta)
write_metadata_to_file(paths, meta)
except:
Expand All @@ -241,14 +244,20 @@ def read_method(method_id):
"""Return the method stored in output."""
meta = set_lcia_method_meta(method_id)
method = load_preprocessed_output(meta, paths)
method_path = outputpath + '/' + meta.category
method_path = OUTPUTPATH / meta.category
if method is None:
log.info(meta.name_data + ' not found in ' + method_path)
log.info(f'{meta.name_data} not found in {method_path}')
else:
log.info('loaded ' + meta.name_data + ' from ' + method_path)
log.info(f'loaded {meta.name_data} from {method_path}')
return method


def download_method(method_id):
"""Downloads the method from data commons."""
meta = set_lcia_method_meta(method_id)
download_from_remote(meta, paths)


def save_json(method_id, mapped_data, method=None, name=''):
"""Save a method as json file in the outputpath.
Expand All @@ -265,11 +274,10 @@ def save_json(method_id, mapped_data, method=None, name=''):
if method is not None:
filename = method.replace('/', '_')
mapped_data = mapped_data[mapped_data['Method'] == method]
path = outputpath+'/'+meta.category
os.makedirs(outputpath, exist_ok=True)
json_pack = path + '/' + filename + "_json_v" + meta.tool_version + ".zip"
if os.path.exists(json_pack):
os.remove(json_pack)
path = OUTPUTPATH / meta.category
mkdir_if_missing(OUTPUTPATH)
json_pack = path / f'{filename}_json_v{meta.tool_version}.zip'
json_pack.unlink(missing_ok=True)
lciafmt.to_jsonld(mapped_data, json_pack)


Expand All @@ -291,8 +299,8 @@ def compare_to_remote(local_df, method_id):
df_diff = df.query('`Characterization Factor` '
'!= `Characterization Factor_remote`')
if len(df_diff) > 0:
path = os.path.join(paths.local_path, 'diff')
os.makedirs(path, exist_ok=True)
path = paths.local_path / 'diff'
mkdir_if_missing(path)
log.info(f'Saving differences found in {method_id.name} '
f'versus remote to {path}')
df_diff.to_csv(f'{path}/{method_id.name}_diff.csv', index=False)
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List@v1.1.1#egg=fedelemflowlist
git+https://github.com/USEPA/esupy@v0.2.2#egg=esupy
olca-ipc>=0.0.12
git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git#egg=fedelemflowlist
git+https://github.com/USEPA/esupy.git#egg=esupy
olca-ipc==0.0.12
pandas>=0.23
requests>=2.21.0
openpyxl>=3.0.7
Expand Down
Loading

0 comments on commit e7e90a3

Please sign in to comment.