-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
137 lines (113 loc) · 5.39 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Copyright (C) 2018 Bryn Pickering.
Licensed under the MIT License (see LICENSE file).
"""
import numpy as np
import pandas as pd
# Implement scenario reduction
def get_reduced_scenarios(costs, scenarios, probabilities=None):
"""
Reduce the number of scenarios used to describe a uncertain parameter by
minimising the Kantorovich distance for a number of desired reduced scenarios
Parameters
----------
costs : list
Array of magintudes related to the scenarios.
Ideally, objective function value from a simplified LP optimisation would
give the magnitudes (and are thus monetary costs associated with that scenario).
scenarios : int
Number of reduced scenarios to pick.
probabilities : list, default = None
Array of probabilities of occurance for each scenario in the original set.
If `None`, scenarios are assumed to be equiprobable.
Returns
-------
chosen_scenarios : list
list of scenarios chosen to reduce total Kantorovich distances.
Scenario number refers to index of scenario given in `costs`
"""
nu_1 = pd.DataFrame(np.repeat(costs, len(costs))
.reshape(-1, len(costs))).sub(costs, axis=1).abs()
nu_1.index = [i for i in range(len(costs))]
nu_1.columns = [i for i in range(len(costs))]
chosen_scenarios = []
# Get kantorovich distances for each scenario, which involves summing the cost function columnwise
if probabilities:
probabilities = pd.DataFrame(probabilities)
d_1 = np.multiply(nu_1.T, probabilities).T.sum(axis=0)
else:
d_1 = nu_1.sum(axis=0)
# Find column which has the lowest kantorovich distance. Column number = starting scenario
# We may get multiple scenarios here (with same summed value), so we always take the first one
s_1 = d_1[d_1 == d_1.min()].index[0]
chosen_scenarios.append(s_1)
nu_prev_i = nu_1
for i in range(scenarios - 1):
# Get the last loop's chosen scenario
s_prev_i = chosen_scenarios[-1]
# Remove the row and column associated with the previously chosen scenario
nu_i = nu_prev_i.drop(s_prev_i, axis=1).drop(s_prev_i, axis=0)
# Create a matrix where each row is the value of previously chosen scenario for that row
min_matrix = nu_i.copy()
for j in min_matrix.index:
min_matrix.loc[j, :] = nu_prev_i.loc[j, s_prev_i]
# In each row, set each element to be the minimum between it and the value of the
# previously chosen scenario for that row
nu_i = np.minimum(nu_i, min_matrix)
# Get kantorovich distances for each scenario, which involves summing the cost function columnwise
if probabilities is not None:
probabilities = probabilities.drop(s_prev_i)
d_i = np.multiply(nu_i.T, probabilities).T.sum(axis=0)
else:
d_i = nu_i.sum(axis=0)
# Find column which has the lowest kantorovich distance. Column number = starting scenario
# We may get multiple scenarios here (with same summed value), so we always take the first one
s_i = d_i[d_i == d_i.min()].index[0]
chosen_scenarios.append(s_i)
# Latest matrix prepared to be the 'previous' matrix in the next iteration
nu_prev_i = nu_i.copy()
return chosen_scenarios
def get_redistributed_probabilities(costs, reduced_scenarios, probabilities=None):
"""
Redistribute probabilities from a full set of samples to a subset of chosen
reduced scenarios.
Parameters
----------
costs : list
Array of magnitudes of the full sample set. Ideally, objective function
value from a simplified LP optimisation would
give the magnitudes (and are thus monetary costs associated with that sample).
reduced_scenarios : list
list of indeces from within `costs` that are the chosen reduced scenarios
probabilities : list, default = None
Array of probabilities of occurance for each scenario in the original set.
If `None`, scenarios are assumed to be equiprobable
Returns
-------
scenario_df : pandas DataFrame
Dataframe with scenario number as index and scenario cost, reduced
scenario to which probability is distributed,
and redistributed probabilities as columns
"""
scenario_df = pd.DataFrame(
costs, columns=["cost"], index=[i for i in range(len(costs))]
)
reduced_costs = scenario_df.loc[reduced_scenarios, "cost"]
unassigned_scenarios = scenario_df.drop(reduced_scenarios)
scenario_df["reduced_scenario"] = np.nan
scenario_df["probability"] = probabilities if probabilities else 1/len(costs)
# Get closest reduced scenario for each scenario not chosen for the reduced set
for i in unassigned_scenarios.index:
scenario_df.loc[i, "reduced_scenario"] = (
(reduced_costs - unassigned_scenarios.loc[i, "cost"])
.abs().sort_values(inplace=False).index[0]
)
scenario_df.loc[reduced_scenarios, "reduced_scenario"] = reduced_scenarios
# Redistribute probabilities. Unassigned scenarios "give" their probability
# to their closest reduced set
for i in reduced_scenarios:
scenario_df.loc[i, 'probability'] = (
scenario_df.loc[scenario_df.reduced_scenario == i]["probability"].sum()
)
scenario_df.loc[unassigned_scenarios.index, "probability"] = 0
return scenario_df