-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbug_iden_calculate_acc.py
84 lines (64 loc) · 2.67 KB
/
bug_iden_calculate_acc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import json
refs_path = 'refs_zero_shot.txt'
pres_path = 'pres_zero_shot.txt'
path = 'debugevalsuite_task124.jsonl'
refs = [i.strip() for i in open(refs_path,'r',encoding='utf8').readlines()]
pres = [i.strip() for i in open(pres_path,'r',encoding='utf8').readlines()]
def read_data_file(path):
data = []
with open(path,'r',encoding='utf8') as file:
for line in file:
item = json.loads(line)
if item['task2_choice'] != '':
data.append(item)
return data
data = read_data_file(path)
langs = [i['language'] for i in data]
errors = [j['major_error_type'] for j in data]
import numpy as np
def is_single_answer(answer):
count = 0
if '(A)' in answer:
count += 1
if '(B)' in answer:
count += 1
if '(C)' in answer:
count += 1
if '(D)' in answer:
count += 1
if count <= 1:
return 1
else:
return 0
def calc_accuracy(refs, pres):
if refs in pres and is_single_answer(pres):
return 1
else:
return 0
dict = {}
all_score = []
for k in range(len(refs)):
acc = calc_accuracy(refs[k],pres[k])
if langs[k] not in dict.keys():
dict[langs[k]] = {}
if errors[k] not in dict[langs[k]].keys():
dict[langs[k]][errors[k]] = []
dict[langs[k]][errors[k]].append(acc)
all_score.append(acc)
print(dict)
print(f'all:{np.mean(all_score)}')
print(f'python_syntax:{np.mean(dict["python"]["syntax error"])}')
print(f'python_reference:{np.mean(dict["python"]["reference error"])}')
print(f'python_logic:{np.mean(dict["python"]["logic error"])}')
print(f'python_multiple:{np.mean(dict["python"]["multiple error"])}')
print(f'python_all:{np.mean(dict["python"]["syntax error"]+dict["python"]["reference error"]+dict["python"]["logic error"]+dict["python"]["multiple error"])}')
print(f'cpp_syntax:{np.mean(dict["cpp"]["syntax error"])}')
print(f'cpp_reference:{np.mean(dict["cpp"]["reference error"])}')
print(f'cpp_logic:{np.mean(dict["cpp"]["logic error"])}')
print(f'cpp_multiple:{np.mean(dict["cpp"]["multiple error"])}')
print(f'cpp_all:{np.mean(dict["cpp"]["syntax error"]+dict["cpp"]["reference error"]+dict["cpp"]["logic error"]+dict["cpp"]["multiple error"])}')
print(f'java_syntax:{np.mean(dict["java"]["syntax error"])}')
print(f'java_reference:{np.mean(dict["java"]["reference error"])}')
print(f'java_logic:{np.mean(dict["java"]["logic error"])}')
print(f'java_multiple:{np.mean(dict["java"]["multiple error"])}')
print(f'java_all:{np.mean(dict["java"]["syntax error"]+dict["java"]["reference error"]+dict["java"]["logic error"]+dict["java"]["multiple error"])}')