% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/variable_binning.R
\name{get_breaks_all}
\alias{get_breaks_all}
\alias{get_breaks}
\title{Generates Best Breaks for Binning}
\usage{
get_breaks_all(dat, target = NULL, x_list = NULL, ex_cols = NULL,
  pos_flag = NULL, occur_time = NULL, oot_pct = 0.7, best = TRUE,
  equal_bins = FALSE, g = 10, sp_values = NULL,
  tree_control = list(p = 0.05, cp = 0.000001, xval = 5, maxdepth = 10),
  bins_control = list(bins_num = 10, bins_pct = 0.05, b_chi = 0.05,
  b_odds = 0.1, b_psi = 0.05, b_or = 0.15, mono = 0.3, odds_psi = 0.2, kc =
  1), parallel = FALSE, note = FALSE, save_data = FALSE,
  file_name = NULL, dir_path = tempdir(), ...)

get_breaks(dat, x, target = NULL, pos_flag = NULL, best = TRUE,
  equal_bins = FALSE, g = 10, sp_values = NULL, occur_time = NULL,
  oot_pct = 0.7, tree_control = NULL, bins_control = NULL,
  note = FALSE, ...)
}
\arguments{
\item{dat}{A data frame with x and target.}

\item{target}{The name of target variable.}

\item{x_list}{A list of x variables.}

\item{ex_cols}{A list of excluded variables. Default is NULL.}

\item{pos_flag}{The value of positive class of target variable, default: "1".}

\item{occur_time}{The name of the variable that represents the time at which each observation takes place.}

\item{oot_pct}{Percentage of observations retained for overtime test (especially to calculate PSI). Defualt is 0.7}

\item{best}{Logical, if TRUE, merge initial breaks to get optimal breaks for binning.}

\item{equal_bins}{Logical, if TRUE, equal sample size initial breaks generates.If FALSE , tree breaks generates using desison tree.}

\item{g}{Integer, number of initial bins for equal_bins.}

\item{sp_values}{A list of missing values.}

\item{tree_control}{the list of tree parameters.
\itemize{
  \item \code{p} the minimum percent of observations in any terminal <leaf> node. 0 < p< 1; 0.01 to 0.1 usually work.
  \item \code{cp} complexity parameter. the larger, the more conservative the algorithm will be. 0 < cp< 1 ; 0.0001 to 0.0000001 usually work.
  \item \code{xval} number of cross-validations.Default: 5
  \item \code{max_depth} maximum depth of a tree. Default: 10
}}

\item{bins_control}{the list of parameters.
\itemize{
  \item \code{bins_num} The maximum number of bins. 5 to 10 usually work. Default: 10
  \item \code{bins_pct} The minimum percent of observations in any bins. 0 < bins_pct < 1 , 0.01 to 0.1 usually work. Default: 0.02
  \item \code{b_chi} The minimum threshold of chi-square merge. 0 < b_chi< 1; 0.01 to 0.1 usually work. Default: 0.02
  \item \code{b_odds} The minimum threshold of  odds merge. 0 < b_odds < 1; 0.05 to 0.2 usually work. Default: 0.1
  \item \code{b_psi} The maximum threshold of PSI in any bins. 0 < b_psi < 1 ; 0 to 0.1 usually work. Default: 0.05
  \item \code{b_or} The maximum threshold of G/B index in any bins.  0 < b_or < 1 ; 0.05 to 0.3 usually work. Default: 0.15
  \item \code{odds_psi} The maximum threshold of Training and Testing G/B index PSI in any bins. 0 < odds_psi < 1 ; 0.01 to 0.3 usually work. Default: 0.1
  \item \code{mono} Monotonicity of all bins, the larger, the more nonmonotonic the bins will be.  0 < mono < 0.5 ; 0.2 to 0.4 usually work. Default: 0.2
  \item \code{kc} number of cross-validations. 1 to 5 usually work. Default: 1
}}

\item{parallel}{Logical, parallel computing or not. Default is FALSE.}

\item{note}{Logical.Outputs info.Default is TRUE.}

\item{save_data}{Logical, save results in locally specified folder. Default is TRUE}

\item{file_name}{File name that save results in locally specified folder. Default is "breaks_list".}

\item{dir_path}{Path to save results. Default is "./variable"}

\item{...}{Additional parameters.}

\item{x}{The Name of an independent variable.}
}
\value{
A table containing a list of splitting points for each independent variable.
}
\description{
\code{get_breaks} is for generating optimal binning for numerical and nominal variables.
The \code{get_breaks_all}  is a simpler wrapper for \code{get_breaks}.
}
\examples{
#controls
tree_control = list(p = 0.02, cp = 0.000001, xval = 5, maxdepth = 10)
bins_control = list(bins_num = 10, bins_pct = 0.02, b_chi = 0.02, b_odds = 0.1,
                   b_psi = 0.05, b_or = 15, mono = 0.2, odds_psi = 0.1, kc = 5)
# get categrory variable breaks
b <-  get_breaks(dat = UCICreditCard[1:1000,], x = "MARRIAGE",
                target = "default.payment.next.month",
                occur_time = "apply_date",
                sp_values = list(-1, "Missing"),
                tree_control = tree_control, bins_control = bins_control)
# get numeric variable breaks
b2 <-  get_breaks(dat = UCICreditCard[1:1000,], x = "PAY_2",
                 target = "default.payment.next.month",
                 occur_time = "apply_date",
                 sp_values = list(-1, "Missing"),
                 tree_control = tree_control, bins_control = bins_control)
# get breaks of all predictive variables
b3 <-  get_breaks_all(dat = UCICreditCard[1:1000,], target = "default.payment.next.month",
                     x_list = c("MARRIAGE","PAY_2"),
                     occur_time = "apply_date", ex_cols = "ID",
                     sp_values = list(-1, "Missing"),
                    tree_control = tree_control, bins_control = bins_control,
                     save_data = FALSE)

}
\seealso{
\code{\link{get_tree_breaks}}, \code{\link{cut_equal}}, \code{\link{select_best_class}}, \code{\link{select_best_breaks}}
}
