Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
idrblab authored Oct 14, 2021
1 parent b53fcd6 commit 7136b34
Show file tree
Hide file tree
Showing 98 changed files with 24,374 additions and 0 deletions.
23 changes: 23 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Package: NOREVA
Type: Package
Title: R Package for Systematic Optimization of Metabolomic Data
Processing
Version: 2.1.1
Author: Jianbo Fu
Maintainer: Jianbo Fu <[email protected]>
Description: The NOREVA package not only enables the pre-processing and assessment of multi-class/time-series metabolomic data but also realize a high-throughput discovery of the well-performing pre-processing workflows.
Depends: R (>= 3.5.0), rJava (>= 0.5-0)
License: GPL
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
Imports: DiffCorr, affy, vsn, DT, ropls, e1071, AUC, impute, eulerr,
MetNorm, ggsci, timecourse, multiROC, dummies, ggplot2, ggord,
limma, ggfortify, usethis, ggrepel, ggpubr, sampling, crmn,
ProteoMM, dplyr, statTarget, NormalizeMets, RcmdrMisc,
reshape2, foreach, data.table, parallel, doSNOW, tidyverse,
iterators
Suggests: knitr, rmarkdown, testthat
VignetteBuilder: knitr
NeedsCompilation: yes
Packaged: 2021-10-04 15:23:19 UTC; moumj
110 changes: 110 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Generated by roxygen2: do not edit by hand

export(EIGENMS)
export(PrepareInuputFiles)
export(loplot)
export(normulticlassisall)
export(normulticlassisallgs)
export(normulticlassispart)
export(normulticlassmatrix)
export(normulticlassnoall)
export(normulticlassnoallgs)
export(normulticlassnopart)
export(normulticlassqcall)
export(normulticlassqcallgs)
export(normulticlassqcpart)
export(nortimecourseisall)
export(nortimecourseisallgs)
export(nortimecourseispart)
export(nortimecoursematrix)
export(nortimecoursenoall)
export(nortimecoursenoallgs)
export(nortimecoursenopart)
export(nortimecourseqcall)
export(nortimecourseqcallgs)
export(nortimecourseqcpart)
export(norvisualization)
export(shiftCor)
import(AUC)
import(DT)
import(DiffCorr)
import(MetNorm)
import(affy)
import(crmn)
import(doSNOW)
import(dplyr)
import(dummies)
import(e1071)
import(foreach)
import(ggfortify)
import(ggord)
import(ggplot2)
import(ggpubr)
import(ggrepel)
import(ggsci)
import(impute)
import(iterators)
import(limma, except=.__C__LargeDataObject)
import(multiROC)
import(parallel)
import(rJava)
import(ropls, except=plot)
import(sampling)
import(statTarget)
import(timecourse)
import(usethis)
import(vsn)
importFrom(NormalizeMets,LinearModelFit)
importFrom(ProteoMM,eig_norm1)
importFrom(ProteoMM,eig_norm2)
importFrom(RcmdrMisc,numSummary)
importFrom(data.table,data.table)
importFrom(grDevices,colorRampPalette)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(grDevices,png)
importFrom(grDevices,rainbow)
importFrom(grDevices,rgb)
importFrom(graphics,abline)
importFrom(graphics,close.screen)
importFrom(graphics,legend)
importFrom(graphics,lines)
importFrom(graphics,mtext)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,screen)
importFrom(graphics,split.screen)
importFrom(graphics,symbols)
importFrom(graphics,text)
importFrom(graphics,title)
importFrom(reshape2,melt)
importFrom(ropls,opls)
importFrom(stats,anova)
importFrom(stats,as.formula)
importFrom(stats,cor)
importFrom(stats,dnorm)
importFrom(stats,kmeans)
importFrom(stats,lm)
importFrom(stats,loess)
importFrom(stats,loess.control)
importFrom(stats,mad)
importFrom(stats,median)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,pf)
importFrom(stats,pnorm)
importFrom(stats,qnorm)
importFrom(stats,qt)
importFrom(stats,quantile)
importFrom(stats,resid)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,var)
importFrom(utils,combn)
importFrom(utils,read.csv)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(utils,write.csv)
importFrom(utils,write.table)
19 changes: 19 additions & 0 deletions R/CS_multi_class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' @importFrom ropls opls
options(warn = -1)
OPLSDA_C <- function(mat, lab) {
X <- mat
Y <- as.factor(lab)

# !!! adding a FALSE for scale.
# For the following PLS-DA model, the samples in row and variables in column.
#oplsda <- opls(X, Y, permI = 100, predI = 2, scaleC = "standard", crossvalI=2, plot = FALSE)
oplsda <- ropls::opls(X, Y, permI = 100, predI = 2, scaleC = "standard", crossvalI=2, fig.pdfC = FALSE)
#res <- oplsda$vipVn
res <- oplsda@vipVn
cpds <- data.frame(CompoundName = names(res), VIP = res)

cpds_filter <- cpds[order(as.numeric(cpds[,2]), decreasing=T),1]

return(cpds_filter)
}
##########################################################
53 changes: 53 additions & 0 deletions R/EIGENMS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#' @title EigenMS normalization
#' @description EigenMS is an adaptation of surrogate variable analysis, which identifies trends
#' attributable to bias by utilizing singular value decomposition on model residuals.
#' See the details at the following References.
#' @param data Input matrix of data
#' @param label Input the label of data
#' @return A structure with multiple components
#' \describe{
#' \item{m_logInts}{number of metabolites x number of samples
#' matrix of expression data with no missing values}
#' \item{grps}{either a single factor indicating the treatment group of
#' each sample i.e. [1 1 1 1 2 2 2 2...]
#' or a data frame of factors}
#' \item{m_ints_eig1}{First portion of EigenMS: Identify eigentrends attributable to bias}
#' \item{m_ints_norm1}{Eliminate the effects of systematic bias identified in eig_norm1()}
#' \item{mm_eigenMS}{matrix of normalized abundances, no extra columns}
#'}
#' @importFrom ProteoMM eig_norm1
#' @importFrom ProteoMM eig_norm2
#' @usage EIGENMS(data,label)
#' @examples
#' \donttest{data(mm_metabolites)}
#' \donttest{head(mm_metabolites)}
#' # different from parameter names as R uses outer name spaces
#' # if variable is undefined
#' \donttest{intsCols = 8:13}
#' \donttest{metaCols = 1:7} # reusing this variable
#' \donttest{m_logInts = make_intencities(mm_metabolites, intsCols)} # will reuse the name
#' \donttest{m_logInts = convert_log2(m_logInts)}
#' # 3 samples for CG and 3 for mCG
#' \donttest{grps = as.factor(c('CG','CG','CG', 'mCG','mCG','mCG'))}
#' \donttest{mm_eigenMS = EIGENMS(m_logInts,grps)}
#' @references 1. Metabolomics data normalization with EigenMS.
#' Karpievitch YK, Nikolic SB, Wilson R, Sharman JE, Edwards LM. 2014, PLoS ONE.
#' @references 2. Normalization of peak intensities in bottom-up MS-based proteomics
#' using singular value decomposition.
#' Karpievitch YV, Taverner T et al. 2009, Bioinformatics.

#' @export

EIGENMS <- function(data, label) {

a <- data
ddata<-a
m_logInts = ddata
grps = as.factor(label)
m_prot.info = cbind(rownames(ddata),rownames(ddata))
m_ints_eig1 = suppressWarnings(suppressMessages(eig_norm1(m=m_logInts,treatment=grps,prot.info=m_prot.info)))
m_ints_norm1 = suppressWarnings(suppressMessages(eig_norm2(rv=m_ints_eig1)))
eigenMS <-m_ints_norm1$norm_m
return(eigenMS)

}
173 changes: 173 additions & 0 deletions R/Evaluation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@

###==========================================================================###
### Other evaluation indexes, such as PEV, PMAD, PCV, and so on.
###==========================================================================###

#calculate PEV
PEV <- function(data0){
data0<-as.matrix(data0)
data0<-data0[order(data0[,1]),]
label <- as.factor(data0[,1])
data <- data0[,-1]
data <- t(as.matrix(data))
x<-levels(label)[1]
z<-1
y<-1
flag<-1
count<-0
varmem<-vector()
tempvar<-vector()
nonmissingmat<-vector()
for(i in 1:length(label))
{
if(x!=label[i] || i==length(label))
{
y<-i-1
if(i==length(label))
{
y<-i
}
if(flag==1)
{
count<-count+1
nonmissingmat<-(apply(data[,z:y],1,function(x) {((sum(!is.na(x))))}))-1
tempvar<-nonmissingmat*apply(data[,z:y],1,function(x) {var(x,na.rm=TRUE)})
}
if(flag==2)
{
count<-count+1
nonmissingmat<-(apply(data[,z:y],1,function(x) {((sum(!is.na(x))))}))-1
tempvar<-nonmissingmat*apply(data[,z:y],1,function(x) {var(x,na.rm=TRUE)})
}
varmem<-c(varmem,((sum(tempvar,na.rm=T))/(sum(nonmissingmat,na.rm=T))))
z<-i
x<-label[i]
flag=2;
}
}
avgvarmem<-varmem
names(avgvarmem)<-levels(label)
return(avgvarmem)
}
#calculate PMAD
PMAD <- function(data0){
data0<-as.matrix(data0)
data0<-data0[order(data0[,1]),]
label <- as.factor(data0[,1])
data <- data0[,-1]
data <- t(as.matrix(data))
data <- apply(data, 1:2, as.numeric)
x<-levels(label)[1]
z<-1
y<-1
flag<-1
count<-0
madmem<-matrix(nrow=nrow(data),ncol=length(levels(as.factor(unlist(label)))),byrow=T)
for(i in 1:length(label))
{
if(x!=label[i] || i==length(label))
{
y<-i-1
if(i==length(label))
{
y<-i
}
if(flag==1)
{
count<-count+1
madmem[,count]<-apply(data[,z:y],1,function(x) {mad(x,na.rm=T)})
}
if(flag==2)
{
count<-count+1
madmem[,count]<-apply(data[,z:y],1,function(x) {mad(x,na.rm=T)})
}
z<-i
x<-label[i]
flag=2;
}
}
avgmadmem<-apply(madmem,2,mean,na.rm=T)
names(avgmadmem)<-levels(label)
return(avgmadmem)
}

#' @importFrom RcmdrMisc numSummary
#calculate PCV
PCV <- function(data0){
data0<-as.matrix(data0)
data0<-data0[order(data0[,1]),]
label <- as.factor(data0[,1])
data <- data0[,-1]
data <- t(as.matrix(data))
data <- apply(data, 1:2, as.numeric)

tempcvmat<-matrix(nrow=nrow(data),ncol=length(levels(as.factor(unlist(label)))),byrow=T)
for(i in 1:nrow(data))
{
tempcv<-numSummary(data[i,],statistics=c("cv"),groups=unlist(label))
tempcvmat[i,]<-tempcv$table
}
temcvmatsum<-apply(tempcvmat,2,mean,na.rm=T)
avgcvmem <-((temcvmatsum*100))
names(avgcvmem)<-levels(label)
return(avgcvmem)
}



purity<-function(result,label){
total_num<-length(label)
cluster_counter = unique(result)
original_counter=unique(label)
t<-NULL

for (k in cluster_counter){
p_k<-NULL
for (j in original_counter){
count<-0
for (i in 1:length(result)){
if (result[i]==k && label[i]==j){
count<-count+1
}

}
p_k<-c(p_k,count)
}
temp_t<-max(p_k)
t<-c(t,temp_t)
}
res<-sum(t)/total_num
return(res)

}



CWvalue <- function(all_genelist=all_genelist,Y,n){
num <- 3
partial_genelist <- vector("list", length(all_genelist))
for (j in 1:length(all_genelist)) {
partial_genelist[[j]] <- all_genelist[[j]][1:n]
}

partial_genelist <- as.list(partial_genelist)
partial_genelist <- table(unlist(partial_genelist))

Ff_sum <- 0
for (k in 1:length(partial_genelist)) {
Ff_sum <- Ff_sum + partial_genelist[k] * (partial_genelist[k]-1)
}
Ff_sum <- as.numeric(Ff_sum)
#Y <- ncol(data)
N <- sum(partial_genelist)
D <- N %% Y
H <- N %% num

CWrel <- (Y*(N-D+Ff_sum)-N^2+D^2)/(Y*(H^2+num*(N-H)-D)-N^2+D^2)
return(CWrel)
}

### End
### ------------------------------------------------------------------------ ###

Loading

0 comments on commit 7136b34

Please sign in to comment.