Skip to content

Commit

Permalink
Merge pull request #118 from PLN-team/zipln
Browse files Browse the repository at this point in the history
Continuing development of ZIPLN models
  • Loading branch information
jchiquet authored Mar 5, 2024
2 parents afad556 + 25d2444 commit 9c0b27f
Show file tree
Hide file tree
Showing 47 changed files with 2,050 additions and 857 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ jobs:
matrix:
config:
- {os: macOS-latest, r: 'release'}
- {os: macOS-latest, r: 'oldrel-1'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: windows-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ BugReports: https://github.com/pln-team/PLNmodels/issues
License: GPL (>= 3)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Depends: R (>= 3.4)
LazyData: true
biocViews:
Expand Down Expand Up @@ -90,6 +90,7 @@ Collate:
'ZIPLNfit-class.R'
'ZIPLN.R'
'ZIPLNfit-S3methods.R'
'ZIPLNnetwork.R'
'barents.R'
'import_utils.R'
'mollusk.R'
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@ S3method(coef,ZIPLNfit)
S3method(fitted,PLNfit)
S3method(fitted,PLNmixturefit)
S3method(fitted,ZIPLNfit)
S3method(getBestModel,Networkfamily)
S3method(getBestModel,PLNPCAfamily)
S3method(getBestModel,PLNmixturefamily)
S3method(getBestModel,PLNnetworkfamily)
S3method(getBestModel,ZIPLNnetworkfamily)
S3method(getModel,Networkfamily)
S3method(getModel,PLNPCAfamily)
S3method(getModel,PLNmixturefamily)
S3method(getModel,PLNnetworkfamily)
S3method(getModel,ZIPLNnetworkfamily)
S3method(plot,Networkfamily)
S3method(plot,PLNLDAfit)
S3method(plot,PLNPCAfamily)
S3method(plot,PLNPCAfit)
Expand All @@ -21,6 +26,8 @@ S3method(plot,PLNmixturefamily)
S3method(plot,PLNmixturefit)
S3method(plot,PLNnetworkfamily)
S3method(plot,PLNnetworkfit)
S3method(plot,ZIPLNfit_sparse)
S3method(plot,ZIPLNnetworkfamily)
S3method(predict,PLNLDAfit)
S3method(predict,PLNfit)
S3method(predict,PLNmixturefit)
Expand Down Expand Up @@ -48,6 +55,8 @@ export(PLNnetwork)
export(PLNnetwork_param)
export(ZIPLN)
export(ZIPLN_param)
export(ZIPLNnetwork)
export(ZIPLNnetwork_param)
export(coefficient_path)
export(compute_PLN_starting_point)
export(compute_offset)
Expand Down Expand Up @@ -77,6 +86,7 @@ importFrom(corrplot,corrplot)
importFrom(future.apply,future_lapply)
importFrom(future.apply,future_sapply)
importFrom(glassoFast,glassoFast)
importFrom(grDevices,rgb)
importFrom(grid,nullGrob)
importFrom(grid,textGrob)
importFrom(gridExtra,arrangeGrob)
Expand Down
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Current (2024-01-23)
# PLNmodels 1.2.0 (2024-02-24)

* Addition of ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the (for now) standard PLN model (merge PR #116)
* new feature: ZIPLN (PLN with zero inflation)
* ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the standard PLN model (merge PR #116)
* ZIPLNnetwork() and ZIPLNfit_sparse-class to allow for zero-inflation in the PLNnetwork model (merge PR #118)
* Code factorization between PLNnetwork and ZIPLNnetwork (and associated classes)
* fix inconsistency between fitted and predict (merge PR #115)

# PLNmodels 1.1.0 (2024-01-08)

Expand Down
19 changes: 10 additions & 9 deletions R/PLNnetwork.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
#' Poisson lognormal model towards sparse network inference
#' Sparse Poisson lognormal model for network inference
#'
#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets).
#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model
#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values.
#' Use the (g)lm syntax to specify the model (including covariates and offsets).
#'
#' @param formula an object of class "formula": a symbolic description of the model to be fitted.
#' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called.
#' @param subset an optional vector specifying a subset of observations to be used in the fitting process.
#' @param weights an optional vector of observation weights to be used in the fitting process.
#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty.
#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See `PLNnetwork_param()` for additional tuning of the penalty.
#' @param control a list-like structure for controlling the optimization, with default generated by [PLNnetwork_param()]. See the corresponding documentation for details;
#'
#' @return an R6 object with class [`PLNnetworkfamily`], which contains
#' a collection of models with class [`PLNnetworkfit`]
#'
#' @rdname PLNnetwork
#' @examples
#' data(trichoptera)
#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate)
Expand All @@ -28,16 +29,16 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control
replace 'list(my_arg = xx)' by PLN_param(my_arg = xx) and see the documentation of PLNnetwork_param().")

## extract the data matrices and weights
args <- extract_model(match.call(expand.dots = FALSE), parent.frame())
data_ <- extract_model(match.call(expand.dots = FALSE), parent.frame())

## Instantiate the collection of models
if (control$trace > 0) cat("\n Initialization...")
myPLN <- PLNnetworkfamily$new(penalties, args$Y, args$X, args$O, args$w, args$formula, control)
myPLN <- PLNnetworkfamily$new(penalties, data_, control)

## Optimization
if (control$trace > 0) cat("\n Adjusting", length(myPLN$penalties), "PLN with sparse inverse covariance estimation\n")
if (control$trace) cat("\tJoint optimization alternating gradient descent and graphical-lasso\n")
myPLN$optimize(control$config_optim)
myPLN$optimize(data_, control$config_optim)

## Post-treatments
if (control$trace > 0) cat("\n Post-treatments")
Expand All @@ -59,15 +60,15 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control
#' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL`
#' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1.
#' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}
#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.
#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.
#' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on
#' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit),
#' which sometimes speeds up the inference.
#'
#' @return list of parameters configuring the fit.
#' @inherit PLN_param details
#' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer:
#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6
#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6
#' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50
#'
#' @seealso [PLN_param()]
Expand Down
79 changes: 49 additions & 30 deletions R/PLNnetworkfamily-S3methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,39 @@


## Auxiliary functions to check the given class of an objet
isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")}
isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")}

#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`])
#'
#' @name plot.PLNnetworkfamily
#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`])
#'
#' @inheritParams plot.PLNfamily
#' @inherit plot.PLNfamily return details
#'
#' @param x an R6 object with class [`PLNnetworkfamily`]
#' @param x an R6 object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]
#' @param type a character, either "criteria", "stability" or "diagnostic" for the type of plot.
#' @param criteria vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik").
#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when `type = "criteria"`.
#' @param criteria Vector of criteria to plot, to be selected among "loglik" (log-likelihood),
#' "BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood).
#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when `type = "criteria"`.
#' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`.
#' @param stability scalar: the targeted level of stability in stability plot. Default is .9.
#'
#'
#' @examples
#' data(trichoptera)
#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate)
#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera)
#' \dontrun{
#' plot(fits)
#' }
#' @return Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability plot
#' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered
#' (with \code{type = 'criteria'}, the default).
#' @return Produces either a diagnostic plot (with `type = 'diagnostic'`), a stability plot
#' (with `type = 'stability'`) or the evolution of the criteria of the different models considered
#' (with `type = 'criteria'`, the default).
#' @export
plot.PLNnetworkfamily <-
function(x,
plot.Networkfamily <- function(x,
type = c("criteria", "stability", "diagnostic"),
criteria = c("loglik", "pen_loglik", "BIC", "EBIC"),
reverse = FALSE,
log.x = TRUE,
stability = 0.9, ...) {
stopifnot(isPLNnetworkfamily(x))
stopifnot(isNetworkfamily(x))
type <- match.arg(type)
if (type == "criteria")
p <- x$plot(criteria, reverse)
Expand All @@ -53,27 +50,50 @@ plot.PLNnetworkfamily <-
p
}

#' @describeIn getModel Model extraction for [`PLNnetworkfamily`]
#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits
#' @export
getModel.PLNnetworkfamily <- function(Robject, var, index = NULL) {
stopifnot(isPLNnetworkfamily(Robject))
plot.PLNnetworkfamily <- plot.Networkfamily

#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits
#' @export
plot.ZIPLNnetworkfamily <- plot.Networkfamily

#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]
#' @export
getModel.Networkfamily <- function(Robject, var, index = NULL) {
stopifnot(isNetworkfamily(Robject))
Robject$getModel(var, index)
}

#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`]
#' @describeIn getModel Model extraction for [`PLNnetworkfamily`]
#' @export
getModel.PLNnetworkfamily <- getModel.Networkfamily

#' @describeIn getModel Model extraction for [`ZIPLNnetworkfamily`]
#' @export
getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) {
stopifnot(isPLNnetworkfamily(Robject))
getModel.ZIPLNnetworkfamily <- getModel.Networkfamily

#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]
#' @export
getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) {
stopifnot(isNetworkfamily(Robject))
stability <- list(...)[["stability"]]
if (is.null(stability)) stability <- 0.9
Robject$getBestModel(match.arg(crit), stability)
}

#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`]
#' @export
getBestModel.PLNnetworkfamily <- getBestModel.Networkfamily

#' @describeIn getBestModel Model extraction for [`ZIPLNnetworkfamily`]
#' @export
getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily

#' Extract the regularization path of a PLNnetwork fit
#'
#' @name coefficient_path
#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()]
#' @param Robject an object with class [`Networkfamily`], i.e. an output from [PLNnetwork()]
#' @param precision a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is `TRUE`.
#' @param corr a logical, should the correlation (partial in case `precision = TRUE`) be sent back. Default is `TRUE`.
#'
Expand All @@ -85,7 +105,7 @@ getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StAR
#' head(coefficient_path(fits))
#' @export
coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) {
stopifnot(isPLNnetworkfamily(Robject))
stopifnot(isNetworkfamily(Robject))
Robject$coefficient_path(precision, corr)
}

Expand All @@ -95,12 +115,12 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) {
#'
#' @description This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless `force = TRUE` has been specified.
#'
#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()]
#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.
#' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details.
#' @param Robject an object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`], i.e. an output from [PLNnetwork()] or [ZIPLNnetwork()]
#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations.
#' @param control a list controlling the main optimization process in each call to [PLNnetwork()] or [ZIPLNnetwork()]. See [PLN_param()] or [ZIPLN_param()] for details.
#' @param force force computation of the stability path, even if a previous one has been detected.
#'
#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`PLNnetworkfamily`]
#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`Networkfamily`]
#' @examples
#' data(trichoptera)
#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate)
Expand All @@ -112,16 +132,15 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) {
#' }
#' @export
stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork_param(), force = FALSE) {
stopifnot(isPLNnetworkfamily(Robject))
stopifnot(isNetworkfamily(Robject))
if (inherits(Robject, "ZIPLNnetworkfamily")) control <- ZIPLNnetwork_param()
if (force || anyNA(Robject$stability)) {
Robject$stability_selection(subsamples, control)
} else {
message("Previous stability selection detected. Use \"force = TRUE\" to recompute it.")
}
}



#' Extract edge selection frequency in bootstrap subsamples
#'
#' @description Extracts edge selection frequency in networks reconstructed from bootstrap subsamples
Expand Down Expand Up @@ -162,7 +181,7 @@ extract_probs <- function(Robject, penalty = NULL, index = NULL,
crit = c("StARS", "BIC", "EBIC"),
format = c("matrix", "vector"),
tol = 1e-5) {
stopifnot(isPLNnetworkfamily(Robject))
stopifnot(isNetworkfamily(Robject))
## Check if stability selection has been performed
stab_path <- Robject$stability_path
if (is.null(stab_path)) {
Expand Down
Loading

0 comments on commit 9c0b27f

Please sign in to comment.