-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathbnf
executable file
·114 lines (102 loc) · 7.17 KB
/
bnf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
from BNfinder.MDL import MDL
from BNfinder.MIT import MIT
from BNfinder.BDE import BDE
from BNfinder.data import dataset
from BNfinder import util
if __name__=="__main__":
import sys
#from dispatch import *
#test(int(sys.argv[1]),int(sys.argv[2]),int(sys.argv[3]))
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-s", "--score", dest="score",default="BDE",help="Scoring function: BDE (default) or MDL or MIT")
parser.add_option("-d", "--data-factor", dest="df",default=1.0,type=float,help="Factor multiplying the data set")
parser.add_option("-e", "--expr", dest="expr",help="Expression data file (can be used with other data types as well)")
parser.add_option("-t", "--txt", dest="txt",help="Output file with the suboptimal parents sets")
parser.add_option("-n", "--net", dest="net",help="Output file with the network structure")
parser.add_option("-c", "--cpd", dest="cpd",help="Output file with the conditional probability distributions")
parser.add_option("-b", "--bif", dest="bif",help="Output file with the optimal network in BIF format")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Print comments")
parser.add_option("-p", "--prior-pseudocount", dest="prior",help="Total pseudocount number in Dirichlet priors of BDE score (default: 1 pseudocount for each possible value vector)")
parser.add_option("-i", "--suboptimal", dest="n_min", default=1, type=int,help="Number of computed suboptimal parents sets")
parser.add_option("-m", "--min-empty", dest="min_empty", default=None, type=float,help="Reported suboptimal parents sets' minimal probability ratio to the empty set")
parser.add_option("-o", "--min-optimal", dest="min_optim", default=None, type=float,help="Reported suboptimal parents sets' minimal probability ratio to the optimal set")
parser.add_option("-l", "--limit", dest="limit",help="Limit of the size of parents subsets")
parser.add_option("-f", "--fraction", dest="frac", type=float,default=None,help="Minimum weight of a parent to be considered in a parent set.")
parser.add_option("-r", "--fpr", dest="fpr", type=float, default=None, help="Type I error rate (expected proportion of false positive edges; procedure is switched off when not specified)")
parser.add_option("-x", "--max-permutations", dest="max_tries", type=int, default=None, help="Maximal number of permutations in type I error rate predetermination")
parser.add_option("-a", "--chi", dest="chi", type=float, default=.9999, help="alpha value for the chi-square distribution used in MIT score (default=.9999)")
parser.add_option("-g", "--sloops", dest="sloops", action='store_true', default=False, help="Allow self-loops in Dynamic Bayesian Networks (no self-loops by default)")
parser.add_option("-k", "--cpu", dest="cores", type=int, default=0, help="Number of cores to use for parallelization")
parser.add_option("-j", "--subset", dest="subset", help="Input file with subset of variables (divided by space character). The feature is useful when the inference complexity is too high, allowing distributed usage of BNFinder. After all the variables are processed (including regulators), put resulting 'genes_*' files into one folder and use 'concat' instead of file name. If only particular variables are subject for parents set inference it is advisable to simply edit input data file instead of using this argument")
parser.add_option("-q", "--algorithm", dest="alg", default="setwise", help="Parallelization algorithm: 'setwise' (default, uses each provided core to compute parents set of one variable before proceeding further) or 'hybrid' (evenly distributes cores between variables, it is recommended to use when underlying network is supposed to be homogeneous with respect to the number of parents per variable, in cases when computational complexity is low or small parents set limit is used)")
(options, args) = parser.parse_args()
if options.expr==None:
print "You must provide an expression file name. Run bnf --help for more options."
else:
if options.verbose:
print 'Loading data from:', options.expr, '...',
try:
d = dataset(options.expr).fromNewFile(open(options.expr))
except KeyError:
if options.verbose:
print "Not a new file format. The old format is deprecated!"
d = dataset(options.expr).fromFile(open(options.expr))
# if any([x.n_disc==0 for x in d.vertices]) and options.score=="MIT":
# print "\n\nERROR: the MIT score is not compatible with continuous variables"
# sys.exit()
if options.verbose:
print 'done\nLearning network ', d.name,' with', options.score, 'scoring function'
score=eval(options.score)(data_factor=options.df,prior=options.prior,chi_alpha=options.chi,sloops=options.sloops)
score,g,subpars = d.learn(score=score,\
data_factor=options.df,\
prior=options.prior,\
verbose=options.verbose,\
n_min=options.n_min,\
limit=options.limit,\
min_empty=options.min_empty,\
min_optim=options.min_optim,\
cores=options.cores,\
alg=options.alg,\
subset=options.subset,\
fpr=options.fpr,
max_tries=options.max_tries,)
if options.verbose:
print 'Total score of optimal network:',score
print g.to_SIF()
#print g.to_SIF(subpars)
if options.txt:
d.write_txt(subpars,options.txt)
if options.verbose:
print "Suboptimal parents sets written to:", options.txt
if options.net:
if options.n_min==1:
net_str=g.to_SIF()
else:
net_str=g.to_SIF(subpars)
f=open(options.net,"w")
f.write(net_str)
f.close()
#g.toDot().write_dot(options.net)
if options.verbose:
print "Network graph written to:", options.net
print net_str
if options.cpd:
f_cpd=open(options.cpd,"w")
#d.write_cpd(g,f_cpd,options.prior)
#f_cpd.write("XXXXXXXXXXXXXXXXX\n")
#print "XXXXX",repr(options.frac)
if options.frac:
g1=g.weighted_edges(subpars,float(options.frac))
util.pprint(f_cpd,d.to_cpd(g1,options.prior))
else: #outpu the best network
util.pprint(f_cpd,d.to_cpd(g,options.prior))
f_cpd.close()
if options.verbose:
print "Conditional probability distributions written to:", options.cpd
if options.bif:
d.write_bif(g,options.bif,options.prior,\
['Network graph learned with %s scoring function from expression data file \'%s\'' %(options.score,options.expr)])
if options.verbose:
print "Bayesian network written to:", options.bif