-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate.py
280 lines (217 loc) · 11.1 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import os
import yaml
import time
import argparse
import random
from typing import List, Union, Dict, Any, Union, Optional, Callable, Tuple
import tqdm
import networkx as nx
from src.data.evaluation_dataset import EVALUATION_DATASETS
from src.model.models import BUILDERS, MERGERS, QUERY_INTERPRETERS, INFERENCE_ORACLES, EVALUATORS
from src.causal.counterfactual_inference import Query
from src.model.singleton import handle_config_singleton
from src.visualisation.visualisation import save_graph_as_png
from src.utils.evaluation_utils import sem_equals
from json.decoder import JSONDecodeError
from langchain_core.exceptions import OutputParserException
from networkx.exception import NetworkXError, NetworkXUnfeasible
DEFAULT_EVALUATION_GRAPHS_SAVE_FOLDER = 'evaluation_logs'
def parse_args():
parser = argparse.ArgumentParser(description='Compute end-to-end causal discovery and inference on dataset with ground truth')
parser.add_argument('config', type=str, help='Path to the query configuration file')
return parser.parse_args()
def build_query(graph : nx.DiGraph, query_text : str, interpreter : Callable) -> Dict[str,Any]:
nodes = list(graph.nodes(data=True))
query = interpreter(query_text, nodes)
return {'target_node': query['target_variable'],
'observation_nodes': None, # no observations
'intervention_nodes': query['intervention_variable'],
'observation_values': None, # no observations
'intervention_values': query['intervention_new_value'],
}
def build_topological_graph(graph : nx.DiGraph) -> nx.DiGraph:
order = list(nx.topological_sort(graph))
node_relabel = {node: str(i) for i, node in enumerate(order)}
return nx.relabel_nodes(graph, node_relabel, copy=True)
def match_attributes(graph : nx.DiGraph, ref_graph : nx.DiGraph):
graph = graph.copy()
node_relabel = {}
for node in graph.nodes:
graph.nodes[node]['observed'] = True
for ref_node in ref_graph.nodes:
if node.lower() == ref_graph.nodes[ref_node]['description'].lower():
graph.nodes[node]['description'] = ref_graph.nodes[ref_node]['description']
node_relabel[node] = ref_node
break
nx.relabel_nodes(graph, node_relabel, copy=False)
return graph
def save_errors(output_path : str, iteration_name : str, error : str):
output_path_i = os.path.join(output_path, f'iteration_{iteration_name}')
os.makedirs(output_path_i, exist_ok=True)
with open(os.path.join(output_path_i, 'errors.txt'), 'w') as f:
f.write(error)
def process_item(item : Dict[str,Any],
builder : Callable,
oracle : Callable,
interpreter : Callable,
evaluator : Callable,
graph_traversal_cutoff : Optional[int] = None,
no_build_graph : bool = False
) -> Tuple[Dict[str,Any], nx.DiGraph, nx.DiGraph, nx.DiGraph]:
name = item['name']
text = item['text']
query_text = item['query']
gt_graph = item['gt_graph']
label = item['label']
# Build graph
if no_build_graph:
graph = gt_graph
else:
try:
graph = builder.build_graph(name, text)
except (KeyError, JSONDecodeError, OutputParserException, NetworkXError) as e:
raise e.__class__(f"Error building graph: {e}")
# Answer counterfactual queries (num_queries per graph)
query_config = build_query(graph, query_text, interpreter)
# Compute counterfactuals
try:
query = Query(graph, oracle, **query_config, traversal_cutoff=graph_traversal_cutoff, compute_counterfactuals=True)
answer, computation_graph = query()
except (KeyError, JSONDecodeError, OutputParserException) as e:
raise e.__class__(f"Error computing counterfactuals: {e}")
# Evaluate counterfactual graphs
score = {
'item' : item,
}
# Compute label accuracy
correct = sem_equals(answer, label)
score['correct'] = correct
score['answer'] = answer
score['label'] = label
score['question_property'] = item['question_property']
# Update ground truth graph
gt_graph = match_attributes(gt_graph, graph)
# Compute graph self-scores
# for g, name in [(gt_graph, "ground_truth"), (graph, "estimated"), (computation_graph, "counterfactual")]:
for g, g_name in [(graph, "estimated"), (computation_graph, "counterfactual")]:
try:
plausibility, confidence, explanation = evaluator.evaluate(g)
except (JSONDecodeError, OutputParserException, NetworkXUnfeasible) as e:
plausibility, confidence, explanation = None, None, None
g.graph['plausibility_score'] = plausibility
g.graph['plausibility_score_confidence'] = confidence
g.graph['plausibility_score_explanation'] = explanation
score[f'{g_name}_plausibility'] = plausibility
score[f'{g_name}_confidence'] = confidence
score[f'{g_name}_explanation'] = explanation
# Compute graph edit distance (ged) similarity
max_nodes = max(gt_graph.number_of_nodes(), graph.number_of_nodes())
ged = nx.graph_edit_distance(gt_graph, graph)
norm_ged = ged / max_nodes
score['graph_edit_distance'] = ged
score['normalized_graph_edit_distance'] = norm_ged
# Compute intersection over union similarity
intersection = nx.intersection(gt_graph, graph)
union = nx.compose(gt_graph, graph)
iou_ged = nx.graph_edit_distance(intersection, union)
norm_iou_ged = iou_ged / max_nodes
score['intersection_over_union_graph_edit_distance'] = iou_ged
score['normalized_intersection_over_union_graph_edit_distance'] = norm_iou_ged
# Compute label-free ged
if nx.is_directed_acyclic_graph(gt_graph) and nx.is_directed_acyclic_graph(graph):
gt_graph_topo = build_topological_graph(gt_graph)
graph_topo = build_topological_graph(graph)
ged_topo = nx.graph_edit_distance(gt_graph_topo, graph_topo)
norm_ged_topo = ged_topo / max_nodes
# Compute label-free iou_ged
intersection_topo = nx.intersection(gt_graph_topo, graph_topo)
union_topo = nx.compose(gt_graph_topo, graph_topo)
iou_ged_topo = nx.graph_edit_distance(intersection_topo, union_topo)
norm_iou_ged_topo = iou_ged_topo / max_nodes
else:
ged_topo = None
norm_ged_topo = None
iou_ged_topo = None
norm_iou_ged_topo = None
score['graph_edit_distance_topological'] = ged_topo
score['normalized_graph_edit_distance_topological'] = norm_ged_topo
score['intersection_over_union_graph_edit_distance_topological'] = iou_ged_topo
score['normalized_intersection_over_union_graph_edit_distance_topological'] = norm_iou_ged_topo
return score, gt_graph, graph, computation_graph
def main(data_path : Union[str,List[str]],
dataset_class : str,
dataset_config : Dict[str,Any],
evaluator_class : str,
evaluator_config : Dict[str,Any],
oracle_class : str,
oracle_config : Dict[str,Any],
interpreter_class : str,
interpreter_config : Dict[str,Any],
builder_class : str,
builder_config : Dict[str,Any],
merger_class : Optional[str] = None,
merger_config : Optional[Dict[str,Any]] = None,
graph_traversal_cutoff : Optional[int] = None,
no_build_graph : bool = False,
save_graphs : bool = False,
save_logs : bool = True,
nb_queries : Optional[int] = None, # for debugging purposes
output_path : Optional[str] = None):
# Load text data
print(f"Using dataset {dataset_class}.")
data = EVALUATION_DATASETS[dataset_class](data_path, **dataset_config)
if nb_queries is not None:
random.seed(42)
indices = random.sample(range(len(data)), nb_queries)
data = data[indices]
# Load graph builder: build causal graphs from text data
print(f"Using builder {builder_class}.")
builder = BUILDERS[builder_class](**builder_config)
# Load oracle: answer counterfactual queries
print(f"Using oracle {oracle_class}.")
oracle = INFERENCE_ORACLES[oracle_class](**oracle_config)
# Load interpreter: interpret a node to build alternative/counterfactual instantiations
print(f"Using interpreter {interpreter_class}.")
interpreter = QUERY_INTERPRETERS[interpreter_class](**interpreter_config)
# Load evaluator: evaluate the plausibility of counterfactual graphs
print(f"Using evaluator {evaluator_class}.")
evaluator = EVALUATORS[evaluator_class](**evaluator_config)
if output_path is None:
evaluated_model = ''
if 'model_type' in builder_config:
evaluated_model += f"{builder_config['model_type']}_"
if 'model' in builder_config:
evaluated_model += f"{builder_config['model']}_"
output_path = os.path.join(DEFAULT_EVALUATION_GRAPHS_SAVE_FOLDER, f'logs_evaluation_{dataset_class}_{evaluated_model}_{time.strftime("%Y%m%d-%H%M%S")}')
os.makedirs(output_path, exist_ok=True)
# Iteratively build causal graphs
scores = []
for item in tqdm.tqdm(data):
try:
score, gt_graph, graph, computation_graph = process_item(item, builder, oracle, interpreter, evaluator, graph_traversal_cutoff, no_build_graph)
scores.append(score)
if save_logs or save_graphs:
output_path_i = os.path.join(output_path, f"iteration_{item['name']}")
os.makedirs(output_path_i, exist_ok=True)
if save_logs:
# Write logs
with open(os.path.join(output_path_i, 'logs.txt'), 'w') as f:
for key, value in score.items():
f.write(f"{key}: {value}\n")
if save_graphs:
# Write graph
nx.write_gml(gt_graph, os.path.join(output_path_i, f'ground_truth_graph.gml'))
save_graph_as_png(gt_graph, os.path.join(output_path_i, f'ground_truth_graph.png'), node_labels=[], edge_labels=[])
nx.write_gml(graph, os.path.join(output_path_i, f'initial_graph_score={score["estimated_plausibility"]}.gml'))
save_graph_as_png(graph, os.path.join(output_path_i, f'initial_graph_score={score["estimated_plausibility"]}.png'), node_labels=['description', 'type', 'values', 'current_value', 'context'], edge_labels=['description', 'details'])
nx.write_gml(computation_graph, os.path.join(output_path_i, f'counterfactual_graph_score={score["counterfactual_plausibility"]}.gml'))
save_graph_as_png(computation_graph, os.path.join(output_path_i, f'counterfactual_graph_score={score["counterfactual_plausibility"]}.png'), node_labels=['description','current_value','updated_value'])
except Exception as e:
save_errors(output_path, item['name'], str(e))
if __name__ == '__main__':
args = parse_args()
config_path = args.config
with open(config_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
config = handle_config_singleton(config)
main(**config)