-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathevaluation.py
49 lines (38 loc) · 1.32 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def calculate_accuracy(df):
df['TITLE_MODIFIED'] = df['TITLE'].str.replace('I', 'L')
correct_predictions = df[df['TITLE_MODIFIED'] == df['DENOVO']]
accuracy = len(correct_predictions) / len(df)
return accuracy
def plot_distributions(df):
plt.figure(figsize=(12, 5))
# Plot Score distribution
plt.subplot(1, 2, 1)
sns.histplot(df['Score'], kde=True, bins=100)
plt.title('Score Distribution')
plt.xlabel('Score')
plt.ylabel('Frequency')
# Plot PPM Difference distribution
plt.subplot(1, 2, 2)
sns.histplot(df['PPM Difference'], kde=True, bins=30)
plt.title('PPM Difference Distribution')
plt.xlabel('PPM Difference')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()
def main():
parser = argparse.ArgumentParser(description='Evaluate predictions and plot distributions.')
parser.add_argument('--novorst', required=True, help='Path to the TSV file containing predictions')
args = parser.parse_args()
# Read the TSV file
df = pd.read_csv(args.novorst, sep='\t')
# Calculate accuracy
accuracy = calculate_accuracy(df)
print(f'Accuracy: {accuracy:.2f}')
# Plot distributions
plot_distributions(df)
if __name__ == "__main__":
main()