man/xgb_train.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/boost_tree.R
\name{xgb_train}
\alias{xgb_train}
\alias{xgb_predict}
\title{Boosted trees via xgboost}
\usage{
xgb_train(
  x,
  y,
  weights = NULL,
  max_depth = 6,
  nrounds = 15,
  eta = 0.3,
  colsample_bynode = NULL,
  colsample_bytree = NULL,
  min_child_weight = 1,
  gamma = 0,
  subsample = 1,
  validation = 0,
  early_stop = NULL,
  counts = TRUE,
  event_level = c("first", "second"),
  ...
)

xgb_predict(object, new_data, ...)
}
\arguments{
\item{x}{A data frame or matrix of predictors}

\item{y}{A vector (factor or numeric) or matrix (numeric) of outcome data.}

\item{max_depth}{An integer for the maximum depth of the tree.}

\item{nrounds}{An integer for the number of boosting iterations.}

\item{eta}{A numeric value between zero and one to control the learning rate.}

\item{colsample_bynode}{Subsampling proportion of columns for each node
within each tree. See the \code{counts} argument below. The default uses all
columns.}

\item{colsample_bytree}{Subsampling proportion of columns for each tree.
See the \code{counts} argument below. The default uses all columns.}

\item{min_child_weight}{A numeric value for the minimum sum of instance
weights needed in a child to continue to split.}

\item{gamma}{A number for the minimum loss reduction required to make a
further partition on a leaf node of the tree}

\item{subsample}{Subsampling proportion of rows. By default, all of the
training data are used.}

\item{validation}{The \emph{proportion} of the data that are used for performance
assessment and potential early stopping.}

\item{early_stop}{An integer or \code{NULL}. If not \code{NULL}, it is the number of
training iterations without improvement before stopping. If \code{validation} is
used, performance is base on the validation set; otherwise, the training set
is used.}

\item{counts}{A logical. If \code{FALSE}, \code{colsample_bynode} and
\code{colsample_bytree} are both assumed to be \emph{proportions} of the proportion of
columns affects (instead of counts).}

\item{event_level}{For binary classification, this is a single string of either
\code{"first"} or \code{"second"} to pass along describing which level of the outcome
should be considered the "event".}

\item{...}{Other options to pass to \code{xgb.train()} or xgboost's method for \code{predict()}.}

\item{new_data}{A rectangular data object, such as a data frame.}
}
\value{
A fitted \code{xgboost} object.
}
\description{
\code{xgb_train()} and \code{xgb_predict()} are wrappers for \code{xgboost} tree-based
models where all of the model arguments are in the main function.
}
\keyword{internal}