Skip to content

Commit

Permalink
1.53 - Struwen Day Edition
Browse files Browse the repository at this point in the history
- man page for dlkjcorr
- added rlkjcorr function (Ben's code)
- ensemble optionally ignores link or sim
- map() now catches another corner case in linear model definitions
- map2stan plot() method for traceplots now has window and n_cols
arguments
- added divergent(), which counts number of divergent iteration in
map2stan fit. Automatically warns user.
- map2stan better handles missing values in outcome variables. Working
towards automatic prediction interface through this method.
- some file rearrangement for sanity
- precis() checks and warns about divergent iterations
- minor updates to README
  • Loading branch information
Richard McElreath committed Apr 3, 2015
1 parent 1d5b3c4 commit 9704c7d
Show file tree
Hide file tree
Showing 15 changed files with 303 additions and 161 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.52
Date: 2015-03-24
Version: 1.53
Date: 2015-04-03
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS, mvtnorm
Expand Down
43 changes: 42 additions & 1 deletion R/distributions.r
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,50 @@ rlaplace <- function(n,location=0,lambda=1) {

# onion method correlation matrix
dlkjcorr <- function( x , eta=1 , log=TRUE ) {
det(x)^(eta-1)
ll <- det(x)^(eta-1)
if ( log==FALSE ) ll <- exp(ll)
return(ll)
}

# Ben's rLKJ function
# K is dimension of matrix
rlkjcorr <- function ( n , K , eta = 1 ) {

stopifnot(is.numeric(K), K >= 2, K == as.integer(K))
stopifnot(eta > 0)
#if (K == 1) return(matrix(1, 1, 1))

f <- function() {
alpha <- eta + (K - 2)/2
r12 <- 2 * rbeta(1, alpha, alpha) - 1
R <- matrix(0, K, K) # upper triangular Cholesky factor until return()
R[1,1] <- 1
R[1,2] <- r12
R[2,2] <- sqrt(1 - r12^2)
if(K > 2) for (m in 2:(K - 1)) {
alpha <- alpha - 0.5
y <- rbeta(1, m / 2, alpha)

# Draw uniformally on a hypersphere
z <- rnorm(m, 0, 1)
z <- z / sqrt(crossprod(z)[1])

R[1:m,m+1] <- sqrt(y) * z
R[m+1,m+1] <- sqrt(1 - y)
}
return(crossprod(R))
}
R <- replicate( n , f() )
if ( dim(R)[3]==1 ) {
R <- R[,,1]
} else {
# need to move 3rd dimension to front, so conforms to array structure that Stan uses
R <- aperm(R,c(3,1,2))
}
return(R)
}

# wishart
rinvwishart <- function(s,df,Sigma,Prec) {
#s-by-s Inverse Wishart matrix, df degree of freedom, precision matrix
#Prec. Distribution of W^{-1} for Wishart W with nu=df+s-1 degree of
Expand Down
22 changes: 15 additions & 7 deletions R/ensemble.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# build ensemble of samples using DIC/WAIC weights
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 , replace=list() ) {
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 , replace=list() , do_link=TRUE , do_sim=TRUE ) {
# retrieve list of models
L <- list(...)
if ( is.list(L[[1]]) && length(L)==1 )
Expand All @@ -21,12 +21,18 @@ ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 , replace=list(

# generate simulated predictions for each model
# then compose ensemble using weights
link.list <- list("empty")
sim.list <- list("empty")
if ( missing(data) ) {
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh , replace=replace ) )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh , replace=replace ) )
if ( do_link==TRUE )
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh , replace=replace ) )
if ( do_sim==TRUE )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh , replace=replace ) )
} else {
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh , replace=replace ) )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh , replace=replace ) )
if ( do_link==TRUE )
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh , replace=replace ) )
if ( do_sim==TRUE )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh , replace=replace ) )
}
#print(str(sim.list))

Expand All @@ -48,8 +54,10 @@ ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 , replace=list(
for ( i in 1:length(idx) ) {
if ( idx[i]>0 ) {
idxrange <- idx_start[i]:idx_end[i]
link_out[idxrange,] <- link.list[[i]][idxrange,]
sim_out[idxrange,] <- sim.list[[i]][idxrange,]
if ( do_link==TRUE )
link_out[idxrange,] <- link.list[[i]][idxrange,]
if ( do_sim==TRUE )
sim_out[idxrange,] <- sim.list[[i]][idxrange,]
}
}
result <- list( link=link_out , sim=sim_out )
Expand Down
2 changes: 1 addition & 1 deletion R/map.r
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ map <- function( flist , data , start , method="BFGS" , hessian=TRUE , debug=FAL
} else {
fname <- as.character( RHS[[1]] )
}
if ( fname=="+" | fname=="*" | fname=="-" | fname=="/" | fname=="%*%" | fname %in% invlink.names | flag_monad_linear_model==TRUE ) {
if ( fname=="[" | fname=="+" | fname=="*" | fname=="-" | fname=="/" | fname=="%*%" | fname %in% invlink.names | flag_monad_linear_model==TRUE ) {
# linear model formula with no density (but maybe invlink) function
# return a list with parameter name in [[1]] and text of RHS in [[2]]
thetext <- list( as.character(LHS) , paste( deparse(RHS) , collapse=" " ) )
Expand Down
22 changes: 15 additions & 7 deletions R/map2stan-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ setMethod("pairs" , "map2stan" , function(x, n=500 , alpha=0.7 , cex=0.7 , pch=1
# my trace plot function
#rethink_palette <- c("#5BBCD6","#F98400","#F2AD00","#00A08A","#FF0000")
rethink_palette <- c("#8080FF","#F98400","#F2AD00","#00A08A","#FF0000")
tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5) , ask=TRUE , ... ) {
tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.7,0.5) , ask=TRUE , window , n_cols=2 , ... ) {
chain.cols <- col

if ( class(object)!="map2stan" ) stop( "requires map2stan fit" )
Expand All @@ -292,7 +292,6 @@ tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5

# figure out grid and paging
n_pars <- length( pars )
n_cols=2
n_rows=ceiling(n_pars/n_cols)
n_rows_per_page <- n_rows
paging <- FALSE
Expand All @@ -304,16 +303,23 @@ tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5
}
n_iter <- object@stanfit@sim$iter
n_warm <- object@stanfit@sim$warmup
wstart <- 1
wend <- n_iter
if ( !missing(window) ) {
wstart <- window[1]
wend <- window[2]
}

# worker
plot_make <- function( main , par , neff , ... ) {
ylim <- c( min(post[,,par]) , max(post[,,par]) )
plot( NULL , xlab="sample" , ylab="position" , col=chain.cols[1] , type="l" , main=main , xlim=c(1,n_iter) , ylim=ylim , ... )
ylim <- c( min(post[wstart:wend,,par]) , max(post[wstart:wend,,par]) )
plot( NULL , xlab="" , ylab="" , type="l" , xlim=c(wstart,wend) , ylim=ylim , ... )
# add polygon here for warmup region?
diff <- abs(ylim[1]-ylim[2])
ylim <- ylim + c( -diff/2 , diff/2 )
polygon( n_warm*c(-1,1,1,-1) , ylim[c(1,1,2,2)] , col=bg , border=NA )
mtext( paste("n_eff =",round(neff,0)) , 3 , adj=1 , cex=0.8 )
mtext( paste("n_eff =",round(neff,0)) , 3 , adj=1 , cex=0.9 )
mtext( main , 3 , adj=0 , cex=1 )
}
plot_chain <- function( x , nc , ... ) {
lines( 1:n_iter , x , col=col.alpha(chain.cols[nc],alpha) , lwd=0.5 )
Expand All @@ -323,7 +329,9 @@ tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5
n_eff <- summary(object@stanfit)$summary[,'n_eff']

# make window
set_nice_margins()
#set_nice_margins()
par(mgp = c(0.5, 0.5, 0), mar = c(1.5, 1.5, 1.5, 1) + 0.1,
tck = -0.02)
par(mfrow=c(n_rows_per_page,n_cols))

# draw traces
Expand All @@ -348,4 +356,4 @@ tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5

}#k

}
}
13 changes: 13 additions & 0 deletions R/map2stan-divergent.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# extracts n_divergent from stan fit
divergent <- function( fit , warmup=FALSE ) {
if ( class(fit)=="map2stan" ) fit <- fit@stanfit
x <- rstan::get_sampler_params(fit)
if ( warmup==FALSE ) {
nwarmup <- fit@stan_args[[1]]$warmup
niter <- fit@stan_args[[1]]$iter
n <- sapply( x , function(ch) sum(ch[(nwarmup+1):niter,5]) )
} else {
n <- sapply( x , function(ch) sum(ch[,5]) )
}
sum(n)
}
31 changes: 23 additions & 8 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
outcome <- concat( outcome , "[i]" )

# check for linear model names as parameters and add [i] to each
if ( length(fp[['lm']])>0 ) {
if ( length(fp[['lm']])>1 ) {
lm_names <- c()
for ( j in 1:length(fp[['lm']]) ) {
lm_names <- c( lm_names , fp[['lm']][[j]]$parameter )
Expand Down Expand Up @@ -1046,7 +1046,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
# build code in transformed parameters that constructs x_merge
m_tpars1 <- concat( m_tpars1 , indent , "real " , outcome , "[N];\n" )
m_tpars2 <- concat( m_tpars2 , indent , outcome , " <- " , lik$outcome , ";\n" )
m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:N_" , lik$outcome , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:" , lik$N_name , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
# add x_impute to start list
imputer_name <- concat(lik$outcome,"_impute")
start[[ imputer_name ]] <- rep( impute_bank[[lik$outcome]]$init , impute_bank[[lik$outcome]]$N_miss )
Expand All @@ -1063,12 +1063,19 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l

}

# add N variable to data block, if more than one likelihood in model
# add N variable to data block
the_N_name <- lik$N_name
if ( i > 1 ) {
if ( lik$outcome %in% names(impute_bank) )
if ( i > 0 ) {
if ( lik$outcome %in% names(impute_bank) ) {
the_N_name <- concat( the_N_name , "_missing" )
m_data <- concat( m_data , indent , "int<lower=1> " , the_N_name , ";\n" )
if ( i==1 ) {
warning(concat("Missing values in outcome variable ",lik$outcome," being imputed (predicted) through model. The model should fit fine. But helper functions like WAIC may not work."))
}
}
#m_data <- concat( m_data , indent , "int<lower=1> " , the_N_name , ";\n" )
fp[['used_predictors']][[the_N_name]] <- list( var=the_N_name , type="index" )
# warn about imputation on top-level outcome

}

# add number of cases to data list
Expand All @@ -1089,7 +1096,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
m_gq <- concat( m_model_declare , indent , "real dev;\n" , indent , "dev <- 0;\n" , m_gq )

# general data length data declare
m_data <- concat( m_data , indent , "int<lower=1> " , "N" , ";\n" )
#m_data <- concat( m_data , indent , "int<lower=1> " , "N" , ";\n" )
fp[['used_predictors']][['N']] <- list( var="N" , type="index" )
# data from used_predictors list
n <- length( fp[['used_predictors']] )
if ( n > 0 ) {
Expand Down Expand Up @@ -1245,7 +1253,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l

# single core
# so just run the model
fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )
if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )
fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , warmup=warmup , chains=chains , pars=pars , seed=rng_seed , ... )

} else {

Expand Down Expand Up @@ -1379,6 +1388,12 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
attr(result,"WAIC") = waic
}

# check divergent iterations
nd <- divergent(fit)
if ( nd > 0 ) {
warning( concat("There were ",nd," divergent iterations during sampling.\nCheck the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.") )
}

} else {
# just return list
result <- list(
Expand Down
107 changes: 107 additions & 0 deletions R/postcheck.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# graphical posterior validation checks for map and map2stan

postcheck <- function( fit , prob=0.89 , window=20 , n=1000 , col=rangi2 , ... ) {

undot <- function( astring ) {
astring <- gsub( "." , "_" , astring , fixed=TRUE )
astring
}

pred <- link(fit,n=n)
sims <- sim(fit,n=n)

if ( class(pred)=="list" )
if ( length(pred)>1 ) pred <- pred[[1]]

# get outcome variable
lik <- flist_untag(fit@formula)[[1]]
dname <- as.character(lik[[3]][[1]])
outcome <- as.character(lik[[2]])
if ( class(fit)=="map2stan" ) outcome <- undot(outcome)
y <- fit@data[[ outcome ]]

# compute posterior predictions for each case

mu <- apply( pred , 2 , mean )
mu.PI <- apply( pred , 2 , PI , prob=prob )
y.PI <- apply( sims , 2 , PI , prob=prob )

# figure out paging
if ( length(window)==1 ) {
window <- min(window,length(mu))
num_pages <- ceiling( length(mu)/window )
cases_per_page <- window
}

# display
ny <- length(fit@data[[ outcome ]])
ymin <- min(c(as.numeric(y.PI),mu,y))
ymax <- max(c(as.numeric(y.PI),mu,y))

# check for aggregated binomial context
mumax <- max(c(as.numeric(mu.PI)))
if ( ymax > 1 & mumax <= 1 & dname %in% c("dbinom","dbetabinom") ) {
# probably aggregated binomial
size_var <- as.character(lik[[3]][[2]])
size_var <- fit@data[[ size_var ]]
for ( i in 1:ny ) {
y.PI[,i] <- y.PI[,i]/size_var[i]
y[i] <- y[i]/size_var[i]
}
ymin <- 0
ymax <- 1
}
if ( dname=="dordlogit" ) {
# put mu and mu.PI on outcome scale
ymin <- 1
nlevels <- dim(pred)[3]
mu <- sapply( 1:dim(pred)[2] ,
function(i) {
temp <- t(pred[,i,])*1:7
mean(apply(temp,2,sum))
} )
mu.PI <- sapply( 1:dim(pred)[2] ,
function(i) {
temp <- t(pred[,i,])*1:7
PI(apply(temp,2,sum),prob)
} )
}

start <- 1
end <- cases_per_page

for ( i in 1:num_pages ) {

end <- min( ny , end )
window <- start:end

set_nice_margins()
plot( y[window] , xlab="case" , ylab=outcome , col=col , pch=16 , ylim=c( ymin , ymax ) , xaxt="n" , ... )
axis( 1 , at=1:length(window) , labels=window )

points( 1:length(window) , mu[window] )
for ( x in 1:length(window) ) {
lines( c(x,x) , mu.PI[,window[x]] )
points( c(x,x) , y.PI[,window[x]] , cex=0.7 , pch=3 )
}
mtext( paste("Posterior validation check") )

if ( num_pages > 1 ) {
ask_old <- devAskNewPage(ask = TRUE)
on.exit(devAskNewPage(ask = ask_old), add = TRUE)
}

start <- end + 1
end <- end + cases_per_page

}

# invisible result
result <- list(
mean=mu,
PI=mu.PI,
outPI=y.PI
)
invisible( result )
}

6 changes: 6 additions & 0 deletions R/precis.r
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ precis <- function( model , depth=1 , pars , ci=TRUE , prob=0.89 , corr=FALSE ,
Rhat <- Rhat[ -which(names(Rhat)=="dev") ]
}
result <- cbind( result , n_eff , Rhat )

# check divergent iterations
nd <- divergent(model)
if ( nd > 0 & warn==TRUE ) {
warning( concat("There were ",nd," divergent iterations during sampling.\nCheck the chains (trace plots, n_eff, Rhat) carefully to ensure they are valid.") )
}
}
if ( corr==TRUE ) {
result <- cbind( result , Rho )
Expand Down
Loading

0 comments on commit 9704c7d

Please sign in to comment.