-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaineQTL.R
100 lines (74 loc) · 2.76 KB
/
maineQTL.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#First revision of Andy's pseudocode for unimputed BRCA SNP and Expression Data
#2/6/13
#NWK
library(sqldf)
library(plyr)
library(BatchExperiments)
library(MatrixEQTL)
###USAGE maineQTL.R <out.files> <root.dir> <out-dir> <annofile> <snp.expfile> <samples> <fold-validation> <time> <queue> <memory> <CISTRA|CIS>
makeClusterFunctionsLSF("~/lsf-standard.tmpl")
oargs <- commandArgs(trailingOnly=TRUE)
args <- list()
args$OUT.FILES <- oargs[1]
args$ROOT.DIR <- oargs[2]
args$OUT.DIR <- oargs[3]
args$ANNOFILE <- oargs[4]
args$SNP.EXPFILE <- oargs[5]
args$SAMPLES <- as.integer(oargs[6])
args$FOLD.VALIDATION <- as.integer(oargs[7])
args$TIME <- oargs[8]
args$QUEUE <- oargs[9]
args$MEMORY <- oargs[10]
args$CISTRA <- oargs[11]
root.dir <- args$ROOT.DIR
out.dir <- args$OUT.DIR
setwd(root.dir)
annofile <- args$ANNOFILE
snp.expdata <- args$SNP.EXPFILE
###Function for cross validation
mat.train <- function(i,snp.exploc,anno.loc,train.indices,MEQTL.params){
load(snp.exploc)
load(anno.loc)
total.ids <- snp.exp$snps$nCols()
snp.exp$snps$ColumnSubsample(train.indices)
snp.exp$gene$ColumnSubsample(match(colnames(snp.exp$snps),colnames(snp.exp$gene)))
with(MEQTL.params,
Matrix_eQTL_main(
snps=snp.exp$snps,
gene=snp.exp$gene,
output_file_name=paste(output.file.name.tra,i,".txt",sep=""),
output_file_name.cis=paste(output.file.name.cis,i,".txt",sep=""),
useModel=useModel,
verbose=verbose,
pvOutputThreshold=pvOutputThreshold.tra,
pvOutputThreshold.cis=pvOutputThreshold.cis,
snpspos=annolist$snp.anno,
genepos=annolist$exp.anno,
cisDist=cisDist,
pvalue.hist=pvalue.hist
)
)
}
samples <- args$SAMPLES
train.indices <- chunk(rep(1:samples,args$FOLD.VALIDATION),n.chunks=args$FOLD.VALIDATION)
test.indices <- chunk(1:samples,chunk.size=ceiling(samples/args$FOLD.VALIDATION))
train.indices <- mapply(FUN=function(x,y)x[-y],train.indices,test.indices,SIMPLIFY=F)
MEQTL.params <- list(
output.file.name.tra=paste(out.dir,args$OUT.FILES,"_trans",sep=""),
output.file.name.cis=paste(out.dir,args$OUT.FILES,"_cis",sep=""),
useModel=modelANOVA,
verbose=T,
pvOutputThreshold.tra=ifelse(args$CISTRA=="CISTRA",1e-8,0),
pvOutputThreshold.cis=1e-8,
cisDist=1e6,
pvalue.hist=F
)
m.dir <- tempfile(paste("meqtl.res",args$OUT.FILES,sep=""),tmpdir=out.dir)
registry.name <- paste("meqtl_reg_",args$OUT.FILES,sep="")
MEQTL.reg <- makeRegistry(registry.name,file.dir=m.dir,packages="MatrixEQTL")
batchMap(MEQTL.reg,mat.train,train.indices=train.indices,i=1:length(train.indices),more.args=list(
MEQTL.params=MEQTL.params,
snp.exploc=snp.expdata,
anno.loc=annofile))
submitJobs(MEQTL.reg,resources=list(queue=args$QUEUE,memory=args$MEMORY,time=args$TIME,threads=1))
Sys.sleep(35)