-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAnalyzer.py
101 lines (85 loc) · 5.58 KB
/
Analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
from scipy import stats
from scipy.stats import chi2
from config import project_name
from helper_scripts.results_helper import add_result
class Analyzer:
def analyze_results(self, name, smelling_co_changing_pairs, all_smelly_pairs, co_changed_pairs, all_pairs):
# All pairs formed from all files changed in the relevant time frame.
# Only keep the smelly pairs that are part of 'all_pairs'
relevant_smelly_pairs = all_smelly_pairs.intersection(all_pairs)
# Calculate sets for contingency table
non_smelling_non_co_changing_pairs = all_pairs.difference(relevant_smelly_pairs).difference(co_changed_pairs)
non_smelling_co_changing_pairs = co_changed_pairs.difference(relevant_smelly_pairs)
smelling_non_co_changing_pairs = relevant_smelly_pairs.difference(smelling_co_changing_pairs)
# Calculate values of the contingency table cells
non_smelling_non_co_changing_pairs_size = len(non_smelling_non_co_changing_pairs)
non_smelling_co_changing_pairs_size = len(non_smelling_co_changing_pairs)
smelling_non_co_changing_pairs_size = len(smelling_non_co_changing_pairs)
smelling_co_changing_pairs_size = len(smelling_co_changing_pairs)
# total amount of observations
n = non_smelling_non_co_changing_pairs_size + non_smelling_co_changing_pairs_size + smelling_non_co_changing_pairs_size + smelling_co_changing_pairs_size
print("general information:")
print("all changed pairs during the history: " + str(len(all_pairs)))
print("smells in project: " + str(len(all_smelly_pairs)))
print("smells contained in all pairs: " + str(len(relevant_smelly_pairs)))
print("all co changes in project: " + str(len(co_changed_pairs)))
print("\n")
self.perform_chi2_analysis(name, non_smelling_non_co_changing_pairs_size, non_smelling_co_changing_pairs_size, smelling_non_co_changing_pairs_size, smelling_co_changing_pairs_size, n)
self.perform_fisher(name, non_smelling_non_co_changing_pairs_size, non_smelling_co_changing_pairs_size, smelling_non_co_changing_pairs_size, smelling_co_changing_pairs_size)
# Executes Fisher's test on the passed contingency table values.
@staticmethod
def perform_fisher(name, non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs, smelling_non_co_changing_pairs, smelling_co_changing_pairs):
oddsratio, pvalue = stats.fisher_exact([[non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs], [smelling_non_co_changing_pairs, smelling_co_changing_pairs]])
print("Fisher values:")
print("oddsratio: " + str(oddsratio))
print("p-value: " + str(pvalue))
print("\n")
add_result(project_name, name + "_fisher_odds", oddsratio)
add_result(project_name, name + "_fisher_pvalue", pvalue)
# Executes the chi squared test on the passed contingency table values.
def perform_chi2_analysis(self, name, non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs, smelling_non_co_changing_pairs, smelling_co_changing_pairs, n):
print("chi2 values:")
print("non_smelling_non_co_changing_pairs: " + str(non_smelling_non_co_changing_pairs))
print("non_smelling_co_changing_pairs: " + str(non_smelling_co_changing_pairs))
print("smelling_non_co_changing_pairs: " + str(smelling_non_co_changing_pairs))
print("smelling_co_changing_pairs: " + str(smelling_co_changing_pairs))
if non_smelling_non_co_changing_pairs == 0 or non_smelling_co_changing_pairs == 0 or smelling_non_co_changing_pairs == 0 or smelling_co_changing_pairs == 0:
print('Cannot calculate chi2 due to zero in table')
return
# Calculate chi2
chi2_stat, p_val, dof, ex = stats.chi2_contingency([[non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs], [smelling_non_co_changing_pairs, smelling_co_changing_pairs]])
# calculate critical value
significance = 0.05
p = 1 - significance
critical_value = chi2.ppf(p, dof)
# calculate phi value
phi = np.sqrt(chi2_stat / n)
odds = self.odds_ratio(non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs, smelling_non_co_changing_pairs, smelling_co_changing_pairs)
print("===Chi2 Stat vs critical value===")
print('chi=%.6f, critical value=%.6f\n' % (chi2_stat, critical_value))
print('oddsratio: %.6f' % odds)
print("\n")
print("===Phi value[0.1: small | 0.3: average | 0.5: large]===")
print(phi)
print("\n")
print("===Degrees of Freedom===")
print(dof)
print("\n")
print("===P-Value===")
print(p_val)
print("\n")
print("===Contingency Table===")
print(ex)
add_result(project_name, name + "_chi_phi", phi)
add_result(project_name, name + "_chi_dof", dof)
add_result(project_name, name + "_chi_pvalue", p_val)
add_result(project_name, name + "_chi_odds", odds)
add_result(project_name, name + "_chi_chi", chi2_stat)
add_result(project_name, name + "_chi_crit", critical_value)
# Calculates the odds ratio on the passed contingency table values.
@staticmethod
def odds_ratio(non_smelling_non_co_changing_pairs, non_smelling_co_changing_pairs, smelling_non_co_changing_pairs, smelling_co_changing_pairs):
if non_smelling_co_changing_pairs*smelling_non_co_changing_pairs == 0:
return 0
return (non_smelling_non_co_changing_pairs*smelling_co_changing_pairs)/(non_smelling_co_changing_pairs*smelling_non_co_changing_pairs)