-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathxgb_train.Rd
83 lines (67 loc) · 2.62 KB
/
xgb_train.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/boost_tree.R
\name{xgb_train}
\alias{xgb_train}
\alias{xgb_predict}
\title{Boosted trees via xgboost}
\usage{
xgb_train(
x,
y,
weights = NULL,
max_depth = 6,
nrounds = 15,
eta = 0.3,
colsample_bynode = NULL,
colsample_bytree = NULL,
min_child_weight = 1,
gamma = 0,
subsample = 1,
validation = 0,
early_stop = NULL,
counts = TRUE,
event_level = c("first", "second"),
...
)
xgb_predict(object, new_data, ...)
}
\arguments{
\item{x}{A data frame or matrix of predictors}
\item{y}{A vector (factor or numeric) or matrix (numeric) of outcome data.}
\item{max_depth}{An integer for the maximum depth of the tree.}
\item{nrounds}{An integer for the number of boosting iterations.}
\item{eta}{A numeric value between zero and one to control the learning rate.}
\item{colsample_bynode}{Subsampling proportion of columns for each node
within each tree. See the \code{counts} argument below. The default uses all
columns.}
\item{colsample_bytree}{Subsampling proportion of columns for each tree.
See the \code{counts} argument below. The default uses all columns.}
\item{min_child_weight}{A numeric value for the minimum sum of instance
weights needed in a child to continue to split.}
\item{gamma}{A number for the minimum loss reduction required to make a
further partition on a leaf node of the tree}
\item{subsample}{Subsampling proportion of rows. By default, all of the
training data are used.}
\item{validation}{The \emph{proportion} of the data that are used for performance
assessment and potential early stopping.}
\item{early_stop}{An integer or \code{NULL}. If not \code{NULL}, it is the number of
training iterations without improvement before stopping. If \code{validation} is
used, performance is base on the validation set; otherwise, the training set
is used.}
\item{counts}{A logical. If \code{FALSE}, \code{colsample_bynode} and
\code{colsample_bytree} are both assumed to be \emph{proportions} of the proportion of
columns affects (instead of counts).}
\item{event_level}{For binary classification, this is a single string of either
\code{"first"} or \code{"second"} to pass along describing which level of the outcome
should be considered the "event".}
\item{...}{Other options to pass to \code{xgb.train()} or xgboost's method for \code{predict()}.}
\item{new_data}{A rectangular data object, such as a data frame.}
}
\value{
A fitted \code{xgboost} object.
}
\description{
\code{xgb_train()} and \code{xgb_predict()} are wrappers for \code{xgboost} tree-based
models where all of the model arguments are in the main function.
}
\keyword{internal}