diff --git a/galaxy/metaMS/Readme.txt b/galaxy/metaMS/Readme.txt new file mode 100644 index 0000000..45b3433 --- /dev/null +++ b/galaxy/metaMS/Readme.txt @@ -0,0 +1,46 @@ +####----------------------------------------------------------------------#### +#### GC-MS data processing version 0.99.5 #### +####----------------------------------------------------------------------#### + +I. Date: 2015-05-26 + +II. Author and maintainer: + + Yann GUITTON + LINA - équipe Combi (CNRS, Univ-Nantes, EMN, INRIA) + IRISA - équipe Dyliss (CNRS, Univ-Rennes 1, INRIA) + Phone: +33 2 51 12 53 90 + E-mail: yann.guitton@irisa.fr / yann.guitton@gmail.com + +III. Funding + + Developed within IDEALG project (http://www.idealg.ueb.eu/versionAnglaise/) + +IV. Usage restrictions + + Use of this tool is restricted to the service conditions of the MetaboHUB-IFB infrastructures. + For any question regarding the use of these services, please contact: yann.guitton@univ-nantes.fr + +V. Installation + + 4 files are required for installation: + + 1) 'README.txt' + Instructions for installation + + 2) 'idealg_metams_runGC.xml' + Configuration file; to be put into the './galaxy-dist/tools/' directory + + 2.png files for illustration + + 3) 'metams.R' + Wrapper code written in R aimed at launching the runGC function from the metaMS package given the arguments entered by the user through the Galaxy interface + + 4) 'metaMS R package ' + The 'metaMS' package requires dependencies and can be installed with + source("http://bioconductor.org/biocLite.R") + biocLite("metaMS") + + This code is for installation of the Galaxy module on the Workflow4metabolomics.org MetaboHUB-IFB platform only and must not be distributed without the author agreement + + + diff --git a/galaxy/metaMS/idealg_metams_runGC.xml b/galaxy/metaMS/idealg_metams_runGC.xml new file mode 100644 index 0000000..4bf4a5c --- /dev/null +++ b/galaxy/metaMS/idealg_metams_runGC.xml @@ -0,0 +1,340 @@ + + + + Rscript + + + GC-MS data preprocessing using metaMS package + + + metams.r + #if $inputs.input == "lib": + library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library + ##library /usr/local/galaxy01/sharevm/Data_for_Galaxy/$inputs.library + #elif $inputs.input == "zip_file": + zipfile $zip_file + #elif $inputs.input == "xset": + xset $inputs.xset + #end if + + xfunction runGC + #if $settings.setting == "gcdefault": + settings "default" + #elif $settings.setting == "usersettings": + settings "User_defined" + fwhm $settings.fwhm + rtdiff $settings.rtdiff + minfeat $settings.minfeat + simthreshold $settings.simthreshold + minclassfraction $settings.minclassfraction + minclasssize $settings.minclasssize + #end if + #if $settings.options_rtrange.option == "show": + rtrange "c($settings.options_rtrange.rtrange)" + #end if + #if $settings.options_rtrange.option == "hide": + rtrange "NULL" + #end if + #if $settings.options_ri.option == "show": + ri $settings.options_ri.ri_input + #end if + #if $settings.options_ri.option == "hide": + ri "NULL" + #end if + #if $settings.options_db.option == "show": + db $settings.options_db.db_input + #end if + #if $settings.options_db.option == "hide": + db "NULL" + #end if + nSlaves \${GALAXY_SLOTS:-1} + unkn "c($unkn)" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Author(s)** Ron Wehrens (ron.wehrens@gmail.com), Georg Weingart, Fulvio Mattivi + +.. class:: infomark + +**Galaxy wrapper and scripts developpers** Guitton Yann IDEALG Project. CNRS-IRISA/LINA, Rennes, France, yann.guitton@irisa.fr + +.. class:: infomark + +**Please cites** + + metaMS : Wehrens, R.; Weingart, G.; Mattivi, F. Journal of Chromatography B. + + xcms : Smith, C. A.; Want, E. J.; O’Maille, G.; Abagyan, R.; Siuzdak, G. Anal. Chem. 2006, 78, 779–787. + + CAMERA : Kuhl, C.; Tautenhahn, R.; Böttcher, C.; Larson, T. R.; Neumann, S. Analytical Chemistry 2012, 84, 283–289. + + +--------------------------------------------------- + +==================== +metaMS.runGC +==================== + +----------- +Description +----------- +metaMS.runGC is a function dedicated to GCMS data processing from converted files to the generation of pseudospectra (compounds) table. + +**Process:** +Each of the converted data (cdf, mzML...) is profiled by a combination of xcms and CAMERA functions. Then all the mass spectra of detected peaks are compared and clustered. +For more details see metaMS : Wehrens, R.; Weingart, G.; Mattivi, F. Journal of Chromatography B (10.1016/j.jchromb.2014.02.051) link_ + +.. _link: http://www.sciencedirect.com/science/article/pii/S1570023214001548 + +**Main outputs:** +A PeakTable is generated with one line per "compound" and one column per sample. +A dataMatrix.csv file is generated and can be used for PCA or for further analysis. +A peakspectra.mps file is generated that contains all the spectra of the detected compounds in MSP format. That file can be used for database search online (Golm, MassBank) or locally (NIST MSSEARCH) +for NIST search a tutorial is available here_. + +.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToUseNIST_V01.pdf + +3 PDF are generated for BPCs, TICs and EICs of detected compounds in all samples + + + +----------------- +Workflow position +----------------- + + +**Upstream tools** + +You can start from here or use result file from : + +========================= ==================== ======= ========== +Name output file format parameter +========================= ==================== ======= ========== +xcms.xcmsSet xset.RData RData RData file +========================= ==================== ======= ========== + + + +**Downstream tools** + ++---------------------------+---------------------------------------+--------+ +| Name | Output file | Format | ++===========================+=======================================+========+ +|Determine Vdk or Lowess | dataMatrix.tsv | Tabular| ++---------------------------+---------------------------------------+--------+ +|Normalization Vdk/Lowess | dataMatrix.tsv | Tabular| ++---------------------------+---------------------------------------+--------+ +|Anova | dataMatrix.tsv | Tabular| ++---------------------------+---------------------------------------+--------+ +|PCA | dataMatrix.tsv | Tabular| ++---------------------------+---------------------------------------+--------+ +|Hierarchical Clustering | dataMatrix.tsv | Tabular| ++---------------------------+---------------------------------------+--------+ +|Golm Metabolome Search | peakspectra.msp | Text | ++---------------------------+---------------------------------------+--------+ + +**General schema of the metabolomic workflow for GCMS** + +.. image:: ./static/images/gcms_workflow.png + +----------- +Input files +----------- + +If you choose to use results from xcms.xcmsSet + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : RData file | RData | ++---------------------------+------------+ + +Or converted GCMS files (mzML, CDF...) in your local libray + + +---------- +Parameters +---------- + +Parameters are described in metaMS R package and mainly correspond to those of xcms.xcmsSet + +---------- +Outputs +---------- + +The output file **dataMatrix.tsv** is an tabular file. You can continue your analysis using it in the statistical tools. +The output file **peakspectra.msp** is a text file. You can continue your analysis using it in the golm search tool. Or you can load it in your +personnal NIST MSsearch program (c:/NISTXX/mssearch/nistms.exe) that tool is in general installed by default on GCMS apparatus. Tutorial available here_. + +.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToUseNIST_V01.pdf + + +---------------- +Working Exemple +---------------- +.. class:: warningmark + +**Reference Data for testing are taken from:** +Dittami,S.M. et al. (2012) Towards deciphering dynamic changes and evolutionary mechanisms involved in the adaptation to low salinities in Ectocarpus (brown algae): Adaptation to low salinities in Ectocarpus. The Plant Journal + +Input files +----------- + + | **Zip file** -> GCMS_Idealg_FWS_SWS.zip + +Parameters +---------- + + | Settings -> **User_Defined** + | RT range option -> **show** + | RTrange: -> **4.5,45** + | ...all default option values + + +Output files +------------ + + | **1) rungc.RData: RData file** + + | **2) Example of BPCs_raw.pdf (Base Peak Chromatograms) :** + +.. image:: ./static/images/metaMS_BPCs.png + + + + + diff --git a/galaxy/metaMS/lib_metams.r b/galaxy/metaMS/lib_metams.r new file mode 100644 index 0000000..a9ac5b1 --- /dev/null +++ b/galaxy/metaMS/lib_metams.r @@ -0,0 +1,506 @@ +# lib_metams.r version 0.99.5 +# R function for metaMS runGC under W4M +# author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015 + + +##ADDITIONS FROM Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + +} + +getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { +require(xcms) + + + + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(files) + class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,1]==class[i]) + } + + N <- dim(sampleMetadata)[1] + + + + TIC <- vector("list",N) + + + for (j in 1:N) { + + cat(files[j],"\n") + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] else + rtcor <- NULL + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 + dev.off() + +# invisible(TIC) +} + +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## +## overlay TIC from all files in current folder or from xcmsSet, create pdf +## +getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) { + + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(files) + class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,1]==class[i]) + } + + N <- dim(sampleMetadata)[1] + TIC <- vector("list",N) + + for (i in 1:N) { + cat(files[i],"\n") + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + + + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 + dev.off() + + + + + + # invisible(TIC) +} + + +##addition for quality control of peak picking +#metaMS EIC and pspectra plotting option +#version 20150512 +#only for Galaxy + +plotUnknowns<-function(resGC, unkn=""){ + + + ##Annotation table each value is a pcgrp associated to the unknown + ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS + ##R. Wehrens give me some clues on that and we found a correction + + mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1]) + + for (j in 1: length(resGC$xset)){ + test<-resGC$annotation[[j]] + print(paste("j=",j)) + for (i in 1:dim(test)[1]){ + if (as.numeric(row.names(test)[i])>dim(mat)[1]){ + next + } else{ + mat[as.numeric(row.names(test)[i]),j]<-test[i,1] + } + } + + } + colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])]) + #debug + + # print(dim(mat)) +# print(mat[1:3,]) +# write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE) + #correction of annotation matrix due to pcgrp removal by quality check in runGCresult +#matrix of correspondance between an@pspectra and filtered pspectra from runGC +allPCGRPs <- +lapply(1:length(resGC$xset), +function(i) { + an <- resGC$xset[[i]] + huhn <- an@pspectra[which(sapply(an@pspectra, length) >= + metaSetting(resGC$settings, + "DBconstruction.minfeat"))] + matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra)) + +}) + + if (unkn[1]==""){ + #plot EIC and spectra for all unknown for comparative purpose + + + par (mar=c(5, 4, 4, 2) + 0.1) + for (l in 1:dim(resGC$PeakTable)[1]){ #l=2 + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), + widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", + "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), + collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown", l,"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[l,c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] + if (pcgrp!=mat[l,c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + }else + { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + + }#end for l +}#end if unkn="" +else{ + par (mar=c(5, 4, 4, 2) + 0.1) + l=unkn + if (length(l)==1){ + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), + widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", + "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), + collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown", l,"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[l,c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] + if (pcgrp!=mat[l,c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + }else + { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + }else{ + par (mar=c(5, 4, 4, 2) + 0.1) + for (l in 1:length(unkn)){ #l=2 + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), + widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", + "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), + collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown",unkn[l],"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[unkn[l],c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2] + if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + }else + { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + + }#end for unkn[l] + + } + +} +} #end function + + + diff --git a/galaxy/metaMS/metams.r b/galaxy/metaMS/metams.r new file mode 100644 index 0000000..1ca218d --- /dev/null +++ b/galaxy/metaMS/metams.r @@ -0,0 +1,370 @@ +#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file +# metams.r version="0.99.8" +#created by Yann GUITTON + + +#Redirect all stdout to the log file +log_file=file("metams.log", open = "wt") +sink(log_file) +sink(log_file, type = "out") + + +library(batch) #necessary for parseCommandArgs function + +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} +print("step1") + + +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +print("new version 8") + + +print(listArguments) + + if (listArguments[["ri"]]!="NULL"){ + RIarg=read.table(listArguments[["ri"]], h=T) + colnames(RIarg)<-c("rt","RI") + # print(RIarg) + } else { + RIarg = NULL + # cat("Ri= ",RIarg) + } + + +DBarg=listArguments[["db"]] +# if (listArguments[["use_db"]]!="NULL"){ +if (DBarg!="NULL"){ + DBarg=listArguments[["db"]] + cat("Db= ",DBarg) + + } else { + DBarg = NULL + cat("NO Db : ",DBarg) + } + + + + +#for unknown EIC printing + +if(listArguments[["unkn"]]==0){ + unknarg<-"" +}else{ + unknarg<-listArguments[["unkn"]] +} + +print(paste("Unkn:",unknarg)) + +#remove unused arguments +listArguments[["unkn"]]<-NULL +listArguments[["db"]] <- NULL +listArguments[["ri"]] <- NULL + +print(" step2") + + +#saving the name of the function in a variable thefunction +thefunction = listArguments[["xfunction"]] +listArguments[["xfunction"]]=NULL #delete from the list of arguments + + + +if (thefunction == "runGC") { + #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool + #CASE 1 From Library + if(!is.null(listArguments[["library"]])){ + print("CASE 1 from library") + libraryparam=listArguments[["library"]]#only runGC use library + dir(libraryparam) + if(!file.exists(listArguments[["library"]])){ + error_message=paste("Cannot access the directory :",listArguments[["library"]],".Please verify if the directory exists or not.") + print (error_message) + stop(error_message) + } + listArguments[["library"]]=NULL #delete from the list of arguments + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + samples<-list.files(path=libraryparam, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE) + + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(samples) + sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) + row.names(sampleMetadata)<-NULL + + }else{ + libraryparam<-NULL + } + + #CASE 2 from zip file + #necessary to unzip .zip file uploaded to Galaxy + #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories + + if (!is.null(listArguments[["zipfile"]])){ + print("CASE 2 from zip file") + directory=unzip(listArguments[["zipfile"]]) + if (thefunction == "runGC") { + listArguments=append(list(directory), listArguments) + } + metams_zip_file= listArguments[["zipfile"]] + listArguments[["zipfile"]]=NULL + + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + #samples<-list.files(path=directory, pattern=filepattern, all.files=FALSE,recursive=TRUE,full.names=TRUE,ignore.case=FALSE) + samples<-directory[grep(filepattern, directory)] + # cat(samples) #debugg + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(samples) + sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) + row.names(sampleMetadata)<-NULL + } + else{ + metams_zip_file="" + } + + #CASE 3 from xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools + if (!is.null(listArguments[["xset"]])){ + print("CASE 3 from xset") + require(CAMERA, quietly = TRUE) + load(listArguments[["xset"]]) + cat(class(xset)) + xsetparam=listArguments[["xset"]] + listArguments[["xset"]]=NULL #remove xset from arguments + + #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting + if (class(xset)=="xcmsSet"){ + + #treat case where Rdata came from xcmsSet with zip file entry + if(exists("zip_file")){ + if (zip_file!=""){ + directory=unzip(zip_file) + print("CASE 3 from xset and with ZIP input") + + }else{ + print("CASE 3 from xset and with LIBRARY input") + } + } + #end zip file case + if (length(xset@rt$raw)>1){ + #create an exceptable list of xset for metaMS + xset.l<-vector("list",length(xset@rt$raw)) + for (i in 1:length(xset@rt$raw)){ + xset.l[[i]]<-new("xcmsSet") + xset.l[[i]]@peaks<-xset@peaks[which(xset@peaks[,"sample"]==i),] + df<-data.frame(class=xset@phenoData[i,]) + rownames(df)<-rownames(xset@phenoData)[i] + xset.l[[i]]@phenoData<-df + xset.l[[i]]@rt$raw<-xset@rt$raw[[i]] + xset.l[[i]]@rt$corrected<-xset@rt$corrected[[i]] + xset.l[[i]]@filepaths<-xset@filepaths[i] + xset.l[[i]]@profinfo<-xset@profinfo + } + }else{ + xset.l<-xset + } + + } + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xset@phenoData + sampleMetadata<-cbind(sampleMetadata=rownames(sampleMetadata),sampleMetadata) + row.names(sampleMetadata)<-NULL + samples<-xset@filepaths + } + else{ + xsetparam<-NULL + } + + + + #Import the different functions + source_local("lib_metams.r") + + #load function for ploting EICs and pspectra + # source_local("plotUnknown.r") + + #settings process + if(listArguments[["settings"]]=="default"){ + require(metaMS, quietly = TRUE) + data(FEMsettings) + if (listArguments[["rtrange"]][1]!="NULL"){ + rtrange=listArguments[["rtrange"]] + } else { + rtrange=NULL + } + + if (!is.null(DBarg)){ + manual <- read.msp(DBarg) + DBarg <- createSTDdbGC(stdInfo = NULL, settings = TSQXLS.GC, manualDB = manual) + } + nSlaves=listArguments[["nSlaves"]] + if(!is.null(libraryparam)|| !metams_zip_file==""){ + resGC<-runGC(files=samples,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al + } + if(!is.null(xsetparam)){ + settingslist=TSQXLS.GC + if (class(xset.l[[1]])!="xsAnnotate"){ + cat("Process xsAnnotate") + xset<-lapply(xset.l, + function(x) { + y <- xsAnnotate(x, sample = 1) + capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), + file = NULL) + z}) + + } + + resGC<-runGC(xset=xset,settings=TSQXLS.GC, rtrange=rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) #default settings for GC from Wehrens et al + } + } + + if(listArguments[["settings"]]=="User_defined"){ + require(metaMS, quietly=TRUE) + listArguments[["settings"]]=NULL #delete from the list of arguments + fwhmparam=listArguments[["fwhm"]] + rtdiffparam=listArguments[["rtdiff"]] + #RIdiffparam=listArguments[["RIdiff"]] #only if timeComparison = "RI" + minfeatparam=listArguments[["minfeat"]] + simthreshparam=listArguments[["simthreshold"]] + minclassfractionparam=listArguments[["minclassfraction"]] + minclasssizeparam=listArguments[["minclasssize"]] + if (listArguments[["rtrange"]]!="NULL"){ + rtrange=listArguments[["rtrange"]] + cat("rtrange= ",rtrange) + } else { + rtrange=NULL + cat("rtrange= ",rtrange) + } + nSlaves=listArguments[["nSlaves"]] + + GALAXY.GC <- metaMSsettings("protocolName" = "GALAXY.GC", + "chrom" = "GC", + PeakPicking = list( + method = "matchedFilter", + step = 0.5, + steps = 2, + mzdiff = .5, + fwhm = fwhmparam, + snthresh = 2, + max = 500), + CAMERA = list(perfwhm = 1)) + + metaSetting(GALAXY.GC, "DBconstruction") <- list( + minintens = 0.0, + rttol = rtdiffparam, + intensityMeasure = "maxo", + DBthreshold = .80, + minfeat = minfeatparam) + metaSetting(GALAXY.GC, "match2DB") <- list( + simthresh = simthreshparam, + timeComparison = "rt", + rtdiff = rtdiffparam, + RIdiff = 5, + minfeat = minfeatparam) + #to be used when DB option will be available + # metaSetting(GALAXY.GC, "matchIrrelevants") <- list( + # irrelevantClasses = c("Bleeding", "Plasticizers"), + # timeComparison = "rt", + # RIdiff = 2, + # rtdiff = rtdiffparam, + # simthresh = simthreshparam) + metaSetting(GALAXY.GC, "betweenSamples") <- list( + min.class.fraction = minclassfractionparam, + min.class.size = minclasssizeparam, + timeComparison = "rt", + rtdiff = rtdiffparam, + RIdiff = 2, + simthresh = simthreshparam) + + library(metaMS) + # files, xset, settings, rtrange = NULL, DB = NULL, + # removeArtefacts = TRUE, findUnknowns = nexp > 1, + # returnXset = FALSE, RIstandards = NULL, nSlaves = 0 + if (!is.null(DBarg)){ + manual <- read.msp(DBarg) + DBarg <- createSTDdbGC(stdInfo = NULL, settings = GALAXY.GC, manualDB = manual) + } + if(!is.null(libraryparam)|| !metams_zip_file==""){ + resGC<-runGC(files=samples,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg , removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) + + } + if(!is.null(xsetparam)){ + settingslist=GALAXY.GC + if (class(xset.l[[1]])!="xsAnnotate"){ + print("Process xsAnnotate") + xset<-lapply(xset.l, + function(x) { + y <- xsAnnotate(x, sample = 1) + capture.output(z <- groupFWHM(y, perfwhm = settingslist@CAMERA$perfwhm), + file = NULL) + z}) + + } + resGC<-runGC(xset=xset,settings=GALAXY.GC,rtrange = rtrange, DB= DBarg, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) + } + } + + #peakTable ordered by rt + peaktable<-resGC$PeakTable<-resGC$PeakTable[order(resGC$PeakTable[,"rt"]),] + write.table(peaktable, file="peaktable.tsv", sep="\t", row.names=FALSE) + + #peakTable for PCA + #dataMatrix + dataMatrix<-cbind(Name=resGC$PeakTable[,"Name"],resGC$PeakTable[,(colnames(resGC$PeakTable) %in% sampleMetadata[,1])]) + rownames(dataMatrix)<-NULL + write.table(dataMatrix, file="dataMatrix.tsv", sep="\t", row.names=FALSE, quote=FALSE) + + #variableMetadata + variableMetadata<-resGC$PeakTable[,!(colnames(resGC$PeakTable) %in% sampleMetadata[,1])] + rownames(variableMetadata)<-NULL + write.table(variableMetadata, file="variableMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) + + #sampleMetadata + write.table(sampleMetadata, file="sampleMetadata.tsv", sep="\t", row.names=FALSE, quote=FALSE) + + #peak spectrum as MSP for DB search + write.msp(resGC$PseudoSpectra, file="peakspectra.msp", newFile = TRUE) + + #TIC/BPC picture generation + # use files as entry not xset that do not exist + + print("TICs") + #Use getTIC2s and getBPC2s because getTICs and getBPCs can exists due to transfert of function in Rdata + b<-getTIC2s(files = samples, pdfname="TICs_raw.pdf", rt="raw") + +print("BPCs") + c<-getBPC2s(files=samples, rt="raw", pdfname="BPCs_raw.pdf") + +print("Step QC plot") + +#Quality controls plots but only working in R (don't know why) +a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value + +# create a mergpdf + +#test +system(paste('gs -o TICsBPCs_merged.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress *Cs_raw.pdf')) +system(paste('gs -o GCMS_EIC.pdf -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress Unknown_*')) + + +############################################TEST FUNCTION START################################################################# + +############################################TEST FUNCTION END################################################################# + + + + + #zip files of all runGC outputs + + system(paste('ls . | grep -e "tsv$" -e "msp$" -e "GCMS_" | zip -r -@ "rungc.zip"')) +} + +#delete the parameters to avoid the passage to the next tool in .RData image +rm(listArguments) + +#saving R data in .Rdata file to save the variables used in the present tool +save.image(paste(thefunction,"RData",sep=".")) + diff --git a/galaxy/metaMS/static/images/gcms_workflow.png b/galaxy/metaMS/static/images/gcms_workflow.png new file mode 100644 index 0000000..5554f9d Binary files /dev/null and b/galaxy/metaMS/static/images/gcms_workflow.png differ diff --git a/galaxy/metaMS/static/images/metaMS_BPCs.png b/galaxy/metaMS/static/images/metaMS_BPCs.png new file mode 100644 index 0000000..5b6a774 Binary files /dev/null and b/galaxy/metaMS/static/images/metaMS_BPCs.png differ diff --git a/galaxy/metaMS/test-data/W4M_GCMS_Dataset.zip b/galaxy/metaMS/test-data/W4M_GCMS_Dataset.zip new file mode 100644 index 0000000..40e6c76 Binary files /dev/null and b/galaxy/metaMS/test-data/W4M_GCMS_Dataset.zip differ diff --git a/galaxy/metaMS/test.txt b/galaxy/metaMS/test.txt deleted file mode 100644 index 9f4e8d7..0000000 --- a/galaxy/metaMS/test.txt +++ /dev/null @@ -1 +0,0 @@ -#test