-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·70 lines (64 loc) · 2.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
import numpy as np
import random
from triager.classifier import models, kernels, selectors, tests, utils
from triager.classifier.parsers import Label
from triager import parsers
def main():
# Parse data into documents
#parser = parsers.MRSParser("data/prop/MRs/", project_match="OPW.*")
parser = parsers.CSVBugzillaParser("data/opensource/netbeans")
#parser = parsers.BugzillaParser("data/opensource/mozilla_firefox")
print "Parsing data by parser: %s" % parser
documents = parser.parse()
# Shuffle documents
random.seed(4) # use 3 for MRS
random.shuffle(documents)
random.seed()
# Filter unlabeled documents and documents that are not labeled with
# a class that occurs at lest `min_class_occur`
documents = [doc for doc in documents if doc.label]
print "Filtering unlabeled documents..."
min_class_occur = 30
documents = utils.filter_docs(documents, min_class_occur=min_class_occur)
print "Filtering documents labeled by class that occurs less than " \
+ "%s times." % min_class_occur
# Split between train and cv data
n = len(documents)
split_pct = 7/10.0
split_x = int(np.ceil(n*split_pct))
docs_train = documents[:split_x]
docs_cv = documents[split_x:]
# Create model
selector = selectors.TFIDFDecorator(selectors.StopWordsDecorator(
selectors.BasicSelector()))
kernel = kernels.GaussianKernel()
model = models.SVMModel(feature_selector=selector, kernel=kernel, C=240)
print "Created model: %s." % model
# Train model
print "Training model on %s instances..." % len(docs_train)
model.train(docs_train)
print "Number of classes is: %s" % len(model.feature_selector.labels)
# Test model (accuracy)
print "Computing accuracy for train set (size=%s)..." % len(docs_train)
accuracy_train = tests.accuracy(model, docs_train)
print "Computing accuracy for CV set (size=%s)..." % len(docs_cv)
accuracy_cv = tests.accuracy(model, docs_cv)
print "Accuracy of train set is: '%.4f'." % accuracy_train
print "Accuracy of CV set is: '%.4f'." % accuracy_cv
# Test model (precision and recall)
print "Computing macro-average precision and recall for train set..."
pr_train = tests.precision_and_recall(model, docs_train)
print "Computing macro-average precision and recall for CV set..."
pr_cv = tests.precision_and_recall(model, docs_cv)
print "Average precision and recall of train set is: '%.4f' and '%.4f'." \
% pr_train
print "Average precision and recall of CV set is: '%.4f' and '%.4f'." \
% pr_cv
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt as e:
pass
finally:
print "----------"