Skip to content

Commit

Permalink
300 change how we count functionalnon functional for the playbook (#309)
Browse files Browse the repository at this point in the history
* fixed total json of pop dislocation post processing (#301) (#303)

* fixed total json of pop dislocation post processing (#301)

* changelog title change

---------

Co-authored-by: Chen Wang <[email protected]>

* Galveston Capital Shock and CGE (#280)

* First commit

* Create test_galvestoncge.py

* Update galvestoncge.py

* Update galvestoncge.py

* Draft update with new Nonethnic_CGE_Model

* Fixed whatever bug

* Remove files that should'nt be commited

* Code to add missing sectors

* Update CHANGELOG.md

* Updated test and corrected city in code

* Catch infeasible and other wrong solver status

* Update modules.rst

* Update galvestoncge.py

---------

Co-authored-by: Chen Wang <[email protected]>

* rewrite cge post pcoressing util (#297)

* rewrite

* changelog

* write better test

* adjust

* fix galveston

* rewrite test scripts for all 3 testbed cge

* space

* import correct galveston cge

---------

Co-authored-by: YONG WOOK KIM <[email protected]>

* add some temp test will remove later

* finally it's working....

* clean up the logic

* write proper test for joplin

* add galveston mcs

* write tests for joplin and galveston

* changelog

* fix pytest

* use the correct unique cluster or unique category

---------

Co-authored-by: Jong Lee <[email protected]>
Co-authored-by: Vismayak Mohanarajan <[email protected]>
Co-authored-by: YONG WOOK KIM <[email protected]>
  • Loading branch information
4 people authored Apr 7, 2023
1 parent 073ea4f commit 7502488
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 73 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Fixed
- CGE output post process util function [#298](https://github.com/IN-CORE/pyincore/issues/298)
- Population Dislocation utility function arbitrarily assumes there will be dislocated and non-dislocated [#301](https://github.com/IN-CORE/pyincore/issues/301)

- Functional vs non-functional calculation based of failure sample now [#300](https://github.com/IN-CORE/pyincore/issues/300)

## [1.9.0] - 2023-03-15

Expand Down
140 changes: 74 additions & 66 deletions pyincore/utils/dataprocessutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
import geopandas as gpd
import json
import pandas as pd
import numpy as np

from pyincore import Dataset, DataService
from pyincore import Dataset, DataService, IncoreClient
from functools import reduce


class DataProcessUtil:

@staticmethod
def get_mapped_result_from_analysis(client, inventory_id: str, dmg_result_dataset,
bldg_func_dataset, archetype_mapping_id: str,
groupby_col_name: str = "max_state", arch_col='archetype'
groupby_col_name: str = "max_state", arch_col="archetype"
):
"""Use this if you want to load results directly from the output files of the analysis, than storing the results
to data service and loading from there using ids.
Expand All @@ -43,7 +45,7 @@ def get_mapped_result_from_analysis(client, inventory_id: str, dmg_result_datase
dmg_result = dmg_result_dataset.get_dataframe_from_csv()

bldg_func_df = bldg_func_dataset.get_dataframe_from_csv()
bldg_func_df.rename(columns={'building_guid': 'guid'}, inplace=True)
bldg_func_df.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True)

arch_mapping = Dataset.from_data_service(archetype_mapping_id, DataService(client)).get_dataframe_from_csv()

Expand All @@ -57,7 +59,7 @@ def get_mapped_result_from_analysis(client, inventory_id: str, dmg_result_datase
@staticmethod
def get_mapped_result_from_dataset_id(client, inventory_id: str, dmg_result_id: str, bldg_func_id,
archetype_mapping_id: str,
groupby_col_name: str = "max_state", arch_col='archetype'):
groupby_col_name: str = "max_state", arch_col="archetype"):
"""Use this if your damage results are already stored in the data service and you have their dataset ids.
All the inputs (except groupby_col_name) are dataset ids.
Expand All @@ -84,7 +86,7 @@ def get_mapped_result_from_dataset_id(client, inventory_id: str, dmg_result_id:

bldg_func_dataset = Dataset.from_data_service(bldg_func_id, DataService(client))
bldg_func_df = bldg_func_dataset.get_dataframe_from_csv()
bldg_func_df.rename(columns={'building_guid': 'guid'}, inplace=True)
bldg_func_df.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True)

archtype_mapping_dataset = Dataset.from_data_service(archetype_mapping_id, DataService(client))
arch_mapping = archtype_mapping_dataset.get_dataframe_from_csv()
Expand All @@ -100,7 +102,7 @@ def get_mapped_result_from_dataset_id(client, inventory_id: str, dmg_result_id:
def get_mapped_result_from_path(inventory_path: str, dmg_result_path: str,
func_result_path: str,
archetype_mapping_path: str,
groupby_col_name: str, arch_col='archetype'):
groupby_col_name: str, arch_col="archetype"):
"""
Args:
Expand All @@ -121,7 +123,7 @@ def get_mapped_result_from_path(inventory_path: str, dmg_result_path: str,
inventory = pd.DataFrame(gpd.read_file("zip://" + inventory_path))
dmg_result = pd.read_csv(dmg_result_path)
bldg_func_df = pd.read_csv(func_result_path)
bldg_func_df.rename(columns={'building_guid': 'guid'}, inplace=True)
bldg_func_df.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True)
arch_mapping = pd.read_csv(archetype_mapping_path)

max_state_df = DataProcessUtil.get_max_damage_state(dmg_result)
Expand All @@ -134,7 +136,7 @@ def get_mapped_result_from_path(inventory_path: str, dmg_result_path: str,

@staticmethod
def create_mapped_dmg_result(inventory, dmg_result, arch_mapping, groupby_col_name="max_state",
arch_col='archetype'):
arch_col="archetype"):
"""
Args:
Expand All @@ -147,26 +149,26 @@ def create_mapped_dmg_result(inventory, dmg_result, arch_mapping, groupby_col_na
"""
dmg_states = dmg_result[groupby_col_name].unique().tolist() # get unique damage states
dmg_merged = pd.merge(inventory, dmg_result, on='guid')
dmg_merged = pd.merge(inventory, dmg_result, on="guid")
mapped_df = pd.merge(dmg_merged, arch_mapping, on=arch_col)
unique_categories = arch_mapping.groupby(by=['cluster', 'category'], sort=False).count().reset_index()
unique_categories = arch_mapping.groupby(by=["cluster", "category"], sort=False).count().reset_index()

group_by = mapped_df.groupby(by=[groupby_col_name, 'cluster', 'category']).count().reset_index()
group_by = group_by.loc[:, ['guid', groupby_col_name, 'cluster', 'category']]
group_by.rename(columns={'guid': 'count'}, inplace=True)
group_by = mapped_df.groupby(by=[groupby_col_name, "cluster", "category"]).count().reset_index()
group_by = group_by.loc[:, ["guid", groupby_col_name, "cluster", "category"]]
group_by.rename(columns={"guid": "count"}, inplace=True)

pivot = group_by.pivot_table(values='count', index=['cluster', 'category'], columns=groupby_col_name,
pivot = group_by.pivot_table(values="count", index=["cluster", "category"], columns=groupby_col_name,
fill_value=0)

table = pd.DataFrame()
table[['category', 'cluster']] = unique_categories[['category', 'cluster']]
result_by_cluster = pd.merge(table, pivot, how='left', on=['cluster', 'category'])
table[["category", "cluster"]] = unique_categories[["category", "cluster"]]
result_by_cluster = pd.merge(table, pivot, how="left", on=["cluster", "category"])

# Add missing max damage states. Handles case when no inventory fall under some damage states.
result_by_cluster = result_by_cluster.reindex(result_by_cluster.columns.union(
dmg_states, sort=False), axis=1, fill_value=0)

result_by_category = result_by_cluster.groupby(by=['category'], sort=False).sum(min_count=1).reset_index()
result_by_category = result_by_cluster.groupby(by=["category"], sort=False).sum(min_count=1).reset_index()

result_by_cluster[dmg_states] = result_by_cluster[dmg_states].fillna(-1).astype(int)
result_by_category[dmg_states] = result_by_category[dmg_states].fillna(-1).astype(int)
Expand All @@ -179,62 +181,68 @@ def create_mapped_dmg_result(inventory, dmg_result, arch_mapping, groupby_col_na
return {"by_cluster": json_by_cluster, "by_category": json_by_category}

@staticmethod
def create_mapped_func_result(inventory, bldg_func, arch_mapping, arch_col='archetype'):
def create_mapped_func_result(inventory, bldg_func, arch_mapping, arch_col="archetype"):
"""
Args:
inventory: dataframe represent inventory
bldg_func: building func dataset
bldg_func: building func state dataset
arch_mapping: Path to the archetype mappings
arch_col: archetype column to use for the clustering
Returns:
ret_json: JSON of the results ordered by cluster and category.
"""
def _sum_average(series):
return reduce(lambda x, y: np.mean(x + y).round(0), series)

func_state = ["percent_functional", "percent_non_functional", "num_functional", "num_non_functional"]
func_merged = pd.merge(inventory, bldg_func, on='guid')

# unify mcs and bldg func naming
bldg_func.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True)

func_merged = pd.merge(inventory, bldg_func, on="guid")
mapped_df = pd.merge(func_merged, arch_mapping, on=arch_col)
unique_categories = arch_mapping.groupby(by=['category'], sort=False, as_index=False).count()['category']
unique_cluster = arch_mapping.groupby(by=['cluster', 'category'], sort=False, as_index=False).count()[[
'cluster', 'category']]

# group by cluster
result_by_cluster = mapped_df.groupby(by=['cluster', 'category'], sort=False, as_index=False).agg(
{'guid': 'count',
'probability': 'mean'})
result_by_cluster.rename(columns={'guid': 'tot_count', 'probability': 'percent_functional'}, inplace=True)
result_by_cluster["percent_non_functional"] = 1 - result_by_cluster["percent_functional"]
result_by_cluster["num_functional"] = (result_by_cluster["tot_count"] * result_by_cluster[
"percent_functional"]).round(0)
result_by_cluster["num_non_functional"] = (result_by_cluster["tot_count"] * result_by_cluster[
"percent_non_functional"]).round(0)
result_by_cluster = result_by_cluster.drop('tot_count', 1)
result_by_cluster = pd.merge(unique_cluster, result_by_cluster, how='left', on=['cluster', 'category'])
# Add missing max damage states. Handles case when no inventory fall under some damage states.
result_by_cluster = result_by_cluster.reindex(result_by_cluster.columns.union(
func_state, sort=False), axis=1, fill_value=0)
# replace NaN
result_by_cluster[func_state] = result_by_cluster[func_state].fillna(-1)
result_by_cluster[["num_functional", "num_non_functional"]] = result_by_cluster[["num_functional",
"num_non_functional"]].astype(
int)

# group by category
result_by_category = mapped_df.groupby(by=['category'], sort=False, as_index=False).agg({'guid': 'count',
'probability': 'mean'})
result_by_category.rename(columns={'guid': 'tot_count', 'probability': 'percent_functional'}, inplace=True)
result_by_category["percent_non_functional"] = 1 - result_by_category["percent_functional"]
result_by_category["num_functional"] = (
result_by_category["tot_count"] * result_by_category["percent_functional"]).round(0)
result_by_category["num_non_functional"] = (
result_by_category["tot_count"] * result_by_category["percent_non_functional"]).round(0)
result_by_category = result_by_category.drop('tot_count', 1)
result_by_category = pd.merge(unique_categories, result_by_category, how='left', on=['category'])
# replace NaN
result_by_category[func_state] = result_by_category[func_state].fillna(-1)
result_by_category[["num_functional", "num_non_functional"]] = result_by_category[
["num_functional", "num_non_functional"]].astype(int)
unique_categories = arch_mapping.groupby(by=["category"], sort=False, as_index=False).count()["category"]
unique_cluster = arch_mapping.groupby(by=["cluster", "category"], sort=False, as_index=False).count()[[
"cluster", "category"]]

mapped_df = mapped_df[["guid", "failure", "category", "cluster"]]
mapped_df["failure_array"] = mapped_df["failure"].apply(lambda x: np.array([int(x) for x in x.split(",")]))

def _group_by(by_column, unique):
# group by cluster
result = mapped_df.groupby(by=by_column, sort=False, as_index=False).agg(
{"guid": "count", "failure_array": [_sum_average]})

# clean up
result.rename(columns={"guid": "tot_count", "failure_array": "num_functional"}, inplace=True)

# 0 (failed), 1 (not failed). MCS
# 0 otherwise (not functional), 1 (functional), Functionality
result["num_non_functional"] = result["tot_count"].squeeze() - result["num_functional"].squeeze()
result["percent_functional"] = result["num_functional"].squeeze() / result["tot_count"].squeeze()
result["percent_non_functional"] = 1 - result["percent_functional"]

# remove the tuples in column
result.columns = [x[0] if len(x) > 1 else x for x in result.columns]

# more clean up
result = pd.merge(unique, result, how="left", on=by_column)

# Add missing max damage states. Handles case when no inventory fall under some damage states.
result = result.reindex(result.columns.union(func_state, sort=False), axis=1, fill_value=0)

# replace NaN
result[func_state] = result[func_state].fillna(-1)
result["tot_count"] = result["tot_count"].fillna(-1)
result[["num_functional", "num_non_functional"]] = result[["num_functional", "num_non_functional"]].astype(int)

return result

result_by_cluster = _group_by(by_column=["cluster", "category"], unique=unique_cluster)
result_by_category = _group_by(by_column=["category"], unique=unique_categories)

cluster_records = result_by_cluster.to_json(orient="records")
category_records = result_by_category.to_json(orient="records")
Expand All @@ -254,20 +262,20 @@ def get_max_damage_state(dmg_result):
pd.DataFrame: Pandas dataframe that has column GUID and column max_state.
"""
if all(column in dmg_result.columns for column in ['DS_0', 'DS_1', 'DS_2', 'DS_3']):
dmg_states = ['DS_0', 'DS_1', 'DS_2', 'DS_3']
elif all(column in dmg_result.columns for column in ['insignific', 'moderate', 'heavy', 'complete']):
dmg_states = ['insignific', 'moderate', 'heavy', 'complete']
if all(column in dmg_result.columns for column in ["DS_0", "DS_1", "DS_2", "DS_3"]):
dmg_states = ["DS_0", "DS_1", "DS_2", "DS_3"]
elif all(column in dmg_result.columns for column in ["insignific", "moderate", "heavy", "complete"]):
dmg_states = ["insignific", "moderate", "heavy", "complete"]
elif all(column in dmg_result.columns for column in ["ds-none", "ds-slight", "ds-moderat", "ds-extensi",
"ds-complet"]):
dmg_states = ["ds-none", "ds-slight", "ds-moderat", "ds-extensi", "ds-complet"]
else:
raise ValueError("Invalid damage state names. Cannot create mapped max damage state.")

guids = dmg_result[['guid']]
guids = dmg_result[["guid"]]
max_val = dmg_result[dmg_states].max(axis=1)
max_key = dmg_result[dmg_states].idxmax(axis=1)
dmg_concat = pd.concat([guids, max_val, max_key], axis=1)
dmg_concat.rename(columns={0: 'max_prob', 1: 'max_state'}, inplace=True)
dmg_concat.rename(columns={0: "max_prob", 1: "max_state"}, inplace=True)

return dmg_concat
Loading

0 comments on commit 7502488

Please sign in to comment.