% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pensem_regression.R
\name{pensem_cv}
\alias{pensem_cv}
\alias{pensem_cv.default}
\alias{pensem_cv.pense_cvfit}
\title{Compute Penalized Elastic Net M-Estimates from PENSE}
\usage{
pensem_cv(x, ...)

\method{pensem_cv}{default}(
  x,
  y,
  alpha = 0.5,
  nlambda = 50,
  lambda_min_ratio,
  lambda_m,
  lambda_s,
  standardize = TRUE,
  penalty_loadings,
  intercept = TRUE,
  bdp = 0.25,
  ncores = 1,
  sparse = FALSE,
  eps = 1e-06,
  cc = 4.7,
  cv_k = 5,
  cv_repl = 1,
  cl = NULL,
  cv_metric = c("tau_size", "mape", "rmspe"),
  add_zero_based = TRUE,
  explore_solutions = 10,
  explore_tol = 0.1,
  max_solutions = 10,
  fit_all = TRUE,
  comparison_tol = sqrt(eps),
  algorithm_opts = mm_algorithm_options(),
  mscale_opts = mscale_algorithm_options(),
  nlambda_enpy = 10,
  enpy_opts = enpy_options(),
  ...
)

\method{pensem_cv}{pense_cvfit}(
  x,
  scale,
  alpha,
  nlambda = 50,
  lambda_min_ratio,
  lambda_m,
  standardize = TRUE,
  penalty_loadings,
  intercept = TRUE,
  bdp = 0.25,
  ncores = 1,
  sparse = FALSE,
  eps = 1e-06,
  cc = 4.7,
  cv_k = 5,
  cv_repl = 1,
  cl = NULL,
  cv_metric = c("tau_size", "mape", "rmspe"),
  add_zero_based = TRUE,
  explore_solutions = 10,
  explore_tol = 0.1,
  max_solutions = 10,
  fit_all = TRUE,
  comparison_tol = sqrt(eps),
  algorithm_opts = mm_algorithm_options(),
  mscale_opts = mscale_algorithm_options(),
  x_train,
  y_train,
  ...
)
}
\arguments{
\item{x}{either a numeric matrix of predictor values, or a cross-validated PENSE fit from \code{\link[=pense_cv]{pense_cv()}}.}

\item{...}{ignored. See the section on deprecated parameters below.}

\item{y}{vector of response values of length \code{n}.
For binary classification, \code{y} should be a factor with 2 levels.}

\item{alpha}{elastic net penalty mixing parameter with \eqn{0 \le \alpha \le 1}.
\code{alpha = 1} is the LASSO penalty, and \code{alpha = 0} the Ridge penalty.
Can be a vector of several values, but \code{alpha = 0} cannot be mixed with other values.}

\item{nlambda}{number of penalization levels.}

\item{lambda_min_ratio}{Smallest value of the penalization level as a fraction of the largest
level (i.e., the smallest value for which all coefficients are zero). The default depends on
the sample size relative to the number of variables and \code{alpha}. If more observations than
variables are available, the default is \code{1e-3 * alpha}, otherwise \code{1e-2 * alpha}.}

\item{lambda_m, lambda_s}{optional user-supplied sequence of penalization levels for the S- and M-estimates.
If given and not \code{NULL}, \code{nlambda} and \code{lambda_min_ratio} are ignored for the respective estimate (S and/or M).}

\item{standardize}{logical flag to standardize the \code{x} variables prior to fitting the PENSE
estimates. Coefficients are always returned on the original scale. This can fail for
variables with a large proportion of a single value (e.g., zero-inflated data).
In this case, either compute with \code{standardize = FALSE} or standardize the data manually.}

\item{penalty_loadings}{a vector of positive penalty loadings (a.k.a. weights) for different
penalization of each coefficient. Only allowed for \code{alpha} > 0.}

\item{intercept}{include an intercept in the model.}

\item{bdp}{desired breakdown point of the estimator, between 0 and 0.5. The actual
breakdown point may be slightly larger/smaller to avoid instabilities of the S-loss.}

\item{ncores}{number of CPU cores to use in parallel. By default, only one CPU core is used.
Not supported on all platforms, in which case a warning is given.}

\item{sparse}{use sparse coefficient vectors.}

\item{eps}{numerical tolerance.}

\item{cc}{cutoff constant for Tukey's bisquare \eqn{\rho} function in the M-estimation objective function.}

\item{cv_k}{number of folds per cross-validation.}

\item{cv_repl}{number of cross-validation replications.}

\item{cl}{a \link[parallel:makeCluster]{parallel} cluster. Can only be used in combination with
\code{ncores = 1}.}

\item{cv_metric}{either a string specifying the performance metric to use, or a function to
evaluate prediction errors in a single CV replication.
If a function, the number of arguments define the data the function receives.
If the function takes a single argument, it is called with a single numeric vector of
prediction errors.
If the function takes two or more arguments, it is called with the predicted values as
first argument and the true values as second argument.
The function must always return a single numeric value quantifying the prediction performance.
The order of the given values corresponds to the order in the input data.}

\item{add_zero_based}{also consider the 0-based regularization path. See details for a
description.}

\item{explore_solutions}{number of solutions to compute up to the desired precision \code{eps}.}

\item{explore_tol}{numerical tolerance and maximum number of iterations for
exploring possible solutions. The tolerance should be (much) looser than \code{eps} to be useful,
and the number of iterations should also be much smaller than the maximum number of
iterations given via \code{algorithm_opts}.}

\item{max_solutions}{only retain up to \code{max_solutions} unique solutions per penalization level.}

\item{fit_all}{If \code{TRUE}, fit the model for all penalization levels.
Can also be any combination of \code{"min"} and \code{"{x}-se"}, in which case only models at the
penalization level with smallest average CV accuracy, or within \code{{x}} standard errors,
respectively.
Setting \code{fit_all} to \code{FALSE} is equivalent to \code{"min"}.
Applies to all \code{alpha} value.}

\item{comparison_tol}{numeric tolerance to determine if two solutions are equal.
The comparison is first done on the absolute difference in the value of the objective
function at the solution If this is less than \code{comparison_tol}, two solutions are deemed
equal if the squared difference of the intercepts is less than \code{comparison_tol} and the
squared \eqn{L_2} norm of the difference vector is less than \code{comparison_tol}.}

\item{algorithm_opts}{options for the MM algorithm to compute the estimates.
See \code{\link[=mm_algorithm_options]{mm_algorithm_options()}} for details.}

\item{mscale_opts}{options for the M-scale estimation. See \code{\link[=mscale_algorithm_options]{mscale_algorithm_options()}}
for details.}

\item{nlambda_enpy}{number of penalization levels where the EN-PY initial estimate is computed.}

\item{enpy_opts}{options for the ENPY initial estimates, created with the
\code{\link[=enpy_options]{enpy_options()}} function. See \code{\link[=enpy_initial_estimates]{enpy_initial_estimates()}} for details.}

\item{scale}{initial scale estimate to use in the M-estimation. By default the S-scale from the PENSE fit is used.}

\item{x_train, y_train}{override arguments \code{x} and \code{y} as provided in the call to \code{pense_cv()}. This is useful if
the arguments in the \code{pense_cv()} call are not available in the current environment.}
}
\value{
an object of cross-validated regularized M-estimates as returned from \code{\link[=regmest_cv]{regmest_cv()}}.
}
\description{
This is a convenience wrapper around \code{\link[=pense_cv]{pense_cv()}} and \code{\link[=regmest_cv]{regmest_cv()}}, for the common use-case of computing
a highly-robust S-estimate followed by a more efficient M-estimate using the scale of the residuals from the
S-estimate.
}
\details{
The built-in CV metrics are
\describe{
\item{\code{"tau_size"}}{\eqn{\tau}-size of the prediction error, computed by
\code{\link[=tau_size]{tau_size()}} (default).}
\item{\code{"mape"}}{Median absolute prediction error.}
\item{\code{"rmspe"}}{Root mean squared prediction error.}
\item{\code{"auroc"}}{Area under the receiver operator characteristic curve (actually 1 - AUROC).
Only sensible for binary responses.}
}
}
\seealso{
\code{\link[=pense_cv]{pense_cv()}} to compute the starting S-estimate.

Other functions to compute robust estimates with CV: 
\code{\link{pense_cv}()},
\code{\link{regmest_cv}()}
}
\concept{functions to compute robust estimates with CV}
