Skip to content

Commit

Permalink
v1.51 - features and maintenance
Browse files Browse the repository at this point in the history
- fixed some non-ASCII chars in Fieldgoals.csv
- updates to various man pages
- added some new man pages (map2stan-class, rethinking e.g.)
- removed glmm() and bglmm()
- NAMESPACE gardening continues; importing more explicitly from
dependencies now
- ensemble() now understands replace list, like link()
- some source file renaming for sanity
- fixed resample() so it uses all samples from all chains to compute
new WAIC
- map2stan(): improved handling of variable names with dots . in them;
now warns
- map2stan(): notifies user when imputing from NAs
- various plotting functions that call set_nice_margins() now respect
mfrow/mfcol settings better
- sim() now understands replace list, like link()
- removed various lingering require() calls to dependencies now in
NAMESPACE
  • Loading branch information
Richard McElreath committed Mar 14, 2015
1 parent 3893c91 commit ec401c7
Show file tree
Hide file tree
Showing 18 changed files with 280 additions and 201 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.50
Date: 2015-03-07
Version: 1.51
Date: 2015-03-14
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS
Extends: rstan
Depends: rstan, parallel
Description: Utilities for fitting and comparing models
License: GPL (>= 3)
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,10 @@ exportPattern("^[^x]")
importFrom(coda, HPDinterval)
importFrom(coda, as.mcmc)
importFrom(MASS, mvrnorm)
importClassesFrom(rstan)
importFrom(rstan, stan)
importFrom(rstan, extract)
importFrom(rstan, sflist2stanfit)
importFrom(rstan, summary)
importClassesFrom(rstan , stanfit)
importFrom(parallel, mclapply)
importFrom(parallel, parLapply)
13 changes: 7 additions & 6 deletions R/ensemble.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# build ensemble of samples using DIC/WAIC weights
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 ) {
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 , replace=list() ) {
# retrieve list of models
L <- list(...)
if ( is.list(L[[1]]) && length(L)==1 )
Expand All @@ -22,11 +22,11 @@ ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 ) {
# generate simulated predictions for each model
# then compose ensemble using weights
if ( missing(data) ) {
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh ) )
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh , replace=replace ) )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh , replace=replace ) )
} else {
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh ) )
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh , replace=replace ) )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh , replace=replace ) )
}
#print(str(sim.list))

Expand All @@ -37,7 +37,8 @@ ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 ) {
if ( length(L)>1 )
for ( i in 2:length(idx) ) {
idx_start[i] <- min( idx_start[i-1] + idx[i-1] , n )
idx_end[i] <- min( idx_end[i-1] + idx[i] , n )
idx_end[i] <- min( idx_start[i] + idx[i] - 1 , n )
if ( i==length(idx) ) idx_end[i] <- n
}
#print(idx)
#print(idx_start)
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions R/map2stan_predict.r → R/link-map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
if ( lm_lik[k] == "ordered_logistic" ) {
# need vector of probabilities as result
cuts <- init[['cutpoints']]
v <- predict.ordlogit( r , cuts )
v <- predict_ordlogit( r , cuts )
#r <- t(v)
r <- v
}
Expand Down Expand Up @@ -215,7 +215,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
}
)

predict.ordlogit <- function( phi , a ) {
predict_ordlogit <- function( phi , a ) {
K <- 1:(length(a)+1)
a <- c( as.numeric(a) , Inf )
p <- sapply( K , function(k) logistic(a[k]-phi) )
Expand Down
6 changes: 3 additions & 3 deletions R/map2stan-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC
fit <- stan( fit=object@stanfit , data=data , init=init , pars=object@pars , iter=iter , warmup=warmup , chains=chains , ... )
} else {
init[[1]] <- object@start
require(parallel)
#require(parallel)
sys <- .Platform$OS.type
if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )
if ( sys=='unix' ) {
Expand All @@ -170,7 +170,7 @@ resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC
env0 <- list( fit=fit, data=data, pars=pars, rng_seed=rng_seed, iter=iter, warmup=warmup )
clusterExport(cl = CL, c("iter","warmup","data", "fit", "pars", "rng_seed"), as.environment(env0))
sflist <- parLapply(CL, 1:chains, fun = function(cid) {
require(rstan)
#require(rstan)
stan(fit = fit, data = data, pars = pars, chains = 1,
iter = iter, warmup = warmup, seed = rng_seed,
chain_id = cid)
Expand Down Expand Up @@ -218,7 +218,7 @@ resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC
#WAIC
if ( WAIC==TRUE ) {
attr(result,"WAIC") <- NULL
waic_calc <- try(WAIC(result))
waic_calc <- try(WAIC(result,n=0))
attr(result,"WAIC") <- waic_calc
} else {
# clear out any old WAIC calculation
Expand Down
20 changes: 17 additions & 3 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
# templates map R density functions onto Stan functions

# to-do:
# (*) different chains get different random start values
# (*) support cores argument -> compile -> resample
# (*) need to do p*theta and (1-p)*theta multiplication outside likelihood in betabinomial (similar story for gammapoisson) --- or is there an operator for pairwise vector multiplication?
# (-) handle improper input more gracefully
# (-) add "as is" formula type, with quoted text on RHS to dump into Stan code

##################
# map2stan itself

map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , ... ) {
map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , cores=1 , ... ) {

########################################
# empty Stan code
Expand Down Expand Up @@ -397,6 +399,15 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
impute_bank <- list()
d <- as.list(data)

# undot all the variable names
for ( i in 1:length(d) ) {
raw_name <- names(d)[i]
new_name <- undot(raw_name)
if ( raw_name != new_name )
warning( concat("Renaming variable '",raw_name,"' to '",new_name,"' internally.\nYou should rename the variable to remove all dots '.'") )
names(d)[i] <- new_name
}

##################################
# check for previous fit object
# if found, build again to get data right, but use compiled model later
Expand Down Expand Up @@ -490,7 +501,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l

# check for missing values
# if found, indicates predictor variable for imputation
if ( any(is.na(d[[lik$outcome]])) ) {
if ( any(is.na(d[[undot(lik$outcome)]])) ) {
# overwrite with top 'N' name
fp[['used_predictors']][[undot(lik$outcome)]] <- list( var=undot(lik$outcome) , N='N' , type=lik$out_type )

Expand All @@ -510,7 +521,10 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
# flag as used
fp[['used_predictors']][[missingness_name]] <- list( var=missingness_name , N=N_missing_name , type="int" )
# replace variable with missing values
d[[lik$outcome]] <- var_temp
d[[undot(lik$outcome)]] <- var_temp

# message to user
message(concat("Imputing ",length(var_missingness)," missing values (NA) in variable '",undot(lik$outcome),"'."))
}

# check for binomial size variable and mark used
Expand Down
9 changes: 6 additions & 3 deletions R/plotting.r
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# plotting

set_nice_margins <- function() {
mfrow_old <- par("mfrow")
on.exit(par(mfrow = mfrow_old))
par(mgp = c(1.5, 0.5, 0), mar = c(2.5, 2.5, 2, 1) + 0.1, tck = -0.02)
par_mf <- par("mfrow","mfcol")
if ( all(unlist(par_mf)==1) ) {
#par_old <- par(no.readonly = TRUE)
#on.exit(par(par_old))
par(mgp = c(1.5, 0.5, 0), mar = c(2.5, 2.5, 2, 1) + 0.1, tck = -0.02)
}
}

dens <- function( x , adj=0.5 , norm.comp=FALSE , main="" , show.HPDI=FALSE , show.zero=FALSE , rm.na=TRUE , add=FALSE , ...) {
Expand Down
11 changes: 6 additions & 5 deletions R/sim.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function( fit , data , n=1000 , ... ) {
)

setMethod("sim", "map",
function( fit , data , n=1000 , post , ll=FALSE , refresh=0.1 , ... ) {
function( fit , data , n=1000 , post , ll=FALSE , refresh=0.1 , replace=list() , ... ) {
# when ll=FALSE, simulates sampling, one sample for each sample in posterior
# when ll=TRUE, computes loglik of each observation, for each sample in posterior

Expand All @@ -32,7 +32,7 @@ function( fit , data , n=1000 , post , ll=FALSE , refresh=0.1 , ... ) {
# get linear model values from link
# use our posterior samples, so later parameters have right correlation structure
# don't flatten result, so we end up with a named list, even if only one element
pred <- link( fit , data=data , n=n , post=post , flatten=FALSE , refresh=refresh , ... )
pred <- link( fit , data=data , n=n , post=post , flatten=FALSE , refresh=refresh , replace=replace , ... )

# extract likelihood, assuming it is first element of formula
lik <- flist_untag(fit@formula)[[1]]
Expand Down Expand Up @@ -159,7 +159,7 @@ function( fit , data , n=1000 , post , ll=FALSE , refresh=0.1 , ... ) {
)

setMethod("sim", "map2stan",
function( fit , data , n=0 , post , ... ) {
function( fit , data , n , post , refresh=0.1 , replace=list() , ... ) {

########################################
# check arguments
Expand All @@ -174,12 +174,13 @@ function( fit , data , n=0 , post , ... ) {
if ( missing(post) )
post <- extract.samples(fit,n=n)

n <- dim(post[[1]])[1] # in case n=0 was passed
if ( missing(n) )
n <- dim(post[[1]])[1] # in case n=0 was passed

# get linear model values from link
# use our posterior samples, so later parameters have right correlation structure
# don't flatten result, so we end up with a named list, even if only one element
pred <- link( fit , data=data , n=n , post=post , flatten=FALSE , ... )
pred <- link( fit , data=data , n=n , post=post , flatten=FALSE , refresh=refresh , replace=replace , ... )

# extract likelihood, assuming it is first element of formula
lik <- flist_untag(fit@formula)[[1]]
Expand Down
32 changes: 16 additions & 16 deletions R/utilities.r
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,21 @@ progbar <- function( current , min=0 , max=100 , starttime , update.interval=100
}

# convenience interface to glmer (lme4) that always uses REML=FALSE
glmm <- function( ... , family , REML=FALSE ) {
require(lme4)
if ( missing(family) ) {
result <- lmer( ... , REML=REML )
} else {
result <- glmer( ... , family=family )
}
result
}

bglmm <- function( ... , REML=FALSE ) {
require(lme4)
require(blme)
blmer( ... , REML=REML )
}
#glmm <- function( ... , family , REML=FALSE ) {
# require(lme4)
# if ( missing(family) ) {
# result <- lmer( ... , REML=REML )
# } else {
# result <- glmer( ... , family=family )
# }
# result
#}

#bglmm <- function( ... , REML=FALSE ) {
# require(lme4)
# require(blme)
# blmer( ... , REML=REML )
#}

covmat <- function( m , digits=4 ) {
# upper diag is covariances
Expand Down Expand Up @@ -171,7 +171,7 @@ replicate2 <- function (n, expr, interval=0.1, simplify = "array") {

# multi-core replicate
mcreplicate <- function (n, expr, refresh = 0.1, mc.cores=2 ) {
require(parallel)
#require(parallel)
show_progress <- function(i) {
intervaln <- floor(n * refresh)
if (floor(i/intervaln) == i/intervaln) {
Expand Down
Loading

0 comments on commit ec401c7

Please sign in to comment.