% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidy_xgboost.R
\name{tidy_xgboost}
\alias{tidy_xgboost}
\title{tidy xgboost}
\usage{
tidy_xgboost(
  .data,
  formula,
  ...,
  mtry = 0.75,
  trees = 500L,
  min_n = 2L,
  tree_depth = 7L,
  learn_rate = 0.05,
  loss_reduction = 1,
  sample_size = 0.75,
  stop_iter = 15L,
  counts = FALSE,
  tree_method = c("auto", "exact", "approx", "hist", "gpu_hist"),
  monotone_constraints = 0L,
  num_parallel_tree = 1L,
  lambda = 0.5,
  alpha = 0.1,
  scale_pos_weight = 1,
  verbosity = 0L,
  validate = TRUE,
  booster = c("gbtree", "gblinear")
)
}
\arguments{
\item{.data}{dataframe}

\item{formula}{formula}

\item{...}{additional parameters to be passed to  \code{\link[parsnip]{set_engine}}}

\item{mtry}{# Randomly Selected Predictors; defaults to .75; (xgboost: colsample_bynode) (type: numeric, range 0 - 1) (or type: integer if \code{count = TRUE})}

\item{trees}{# Trees (xgboost: nrounds) (type: integer, default: 500L)}

\item{min_n}{Minimal Node Size (xgboost: min_child_weight) (type: integer, default: 2L); [typical range: 2-10] Keep small value for highly imbalanced class data where leaf nodes can have smaller size groups. Otherwise increase size to prevent overfitting outliers.}

\item{tree_depth}{Tree Depth (xgboost: max_depth) (type: integer, default: 7L); Typical values: 3-10}

\item{learn_rate}{Learning Rate (xgboost: eta) (type: double, default: 0.05); Typical values: 0.01-0.3}

\item{loss_reduction}{Minimum Loss Reduction (xgboost: gamma) (type: double, default: 1.0);  range: 0 to Inf; typical value: 0 - 20 assuming low-mid tree depth}

\item{sample_size}{Proportion Observations Sampled (xgboost: subsample) (type: double, default: .75); Typical values: 0.5 - 1}

\item{stop_iter}{# Iterations Before Stopping (xgboost: early_stop) (type: integer, default: 15L) only enabled if validation set is provided}

\item{counts}{if \code{TRUE} specify \code{mtry} as an integer number of cols. Default \code{FALSE} to specify \code{mtry} as fraction of cols from 0 to 1}

\item{tree_method}{xgboost tree_method. default is \code{auto}. reference: \href{https://xgboost.readthedocs.io/en/stable/treemethod.html}{tree method docs}}

\item{monotone_constraints}{an integer vector with length of the predictor cols, of \code{-1, 1, 0} corresponding to decreasing, increasing, and no constraint respectively for the index of the predictor col. reference: \href{https://xgboost.readthedocs.io/en/stable/tutorials/monotonic.html}{monotonicity docs}.}

\item{num_parallel_tree}{should be set to the size of the forest being trained. default 1L}

\item{lambda}{[default=.5] L2 regularization term on weights. Increasing this value will make model more conservative.}

\item{alpha}{[default=.1] L1 regularization term on weights. Increasing this value will make model more conservative.}

\item{scale_pos_weight}{[default=1] Control the balance of positive and negative weights, useful for unbalanced classes. if set to TRUE, calculates sum(negative instances) / sum(positive instances). If first level is majority class, use values < 1, otherwise normally values >1 are used to balance the class distribution.}

\item{verbosity}{[default=1] Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3 (debug).}

\item{validate}{default TRUE. report accuracy metrics on a validation set.}

\item{booster}{defaults to 'gbtree' for tree boosting but can be set to 'gblinear'}
}
\value{
xgb.Booster model
}
\description{
Accepts a formula to run an xgboost model. Automatically determines whether the formula is
for classification or regression. Returns the xgboost model.
}
\details{
In binary classification the target variable must be a factor with the first level set to the event of interest.
A higher probability will predict the first level.

reference for parameters: \href{https://xgboost.readthedocs.io/en/stable/parameter.html}{xgboost docs}
}
\examples{

options(rlang_trace_top_env = rlang::current_env())


# regression on numeric variable

iris \%>\%
 framecleaner::create_dummies(Species) -> iris_dummy

iris_dummy \%>\%
 tidy_formula(target= Petal.Length) -> petal_form

iris_dummy \%>\%
 tidy_xgboost(
   petal_form,
   trees = 20,
   mtry = .5
 )  -> xg1


xg1 \%>\%
 tidy_predict(newdata = iris_dummy, form = petal_form) -> iris_preds

iris_preds \%>\%
 eval_preds()




}
