% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot.R
\name{diagnosticPlot}
\alias{diagnosticPlot}
\alias{diagnosticPlot.rlars}
\alias{diagnosticPlot.grplars}
\alias{diagnosticPlot.tslarsP}
\alias{diagnosticPlot.seqModel}
\alias{diagnosticPlot.perrySeqModel}
\alias{diagnosticPlot.tslars}
\alias{diagnosticPlot.sparseLTS}
\alias{diagnosticPlot.perrySparseLTS}
\alias{diagnosticPlot.default}
\title{Diagnostic plots for a sequence of regression models}
\usage{
diagnosticPlot(x, ...)

\method{diagnosticPlot}{seqModel}(x, s = NA, covArgs = list(), ...)

\method{diagnosticPlot}{perrySeqModel}(x, ...)

\method{diagnosticPlot}{tslars}(x, p, ...)

\method{diagnosticPlot}{sparseLTS}(x, s = NA, fit = c("reweighted",
  "raw", "both"), covArgs = list(), ...)

\method{diagnosticPlot}{perrySparseLTS}(x, ...)

\method{diagnosticPlot}{default}(x, which = c("all", "rqq", "rindex",
  "rfit", "rdiag"), ask = (which == "all"), facets = attr(x, "facets"),
  size = c(2, 4), id.n = NULL, ...)
}
\arguments{
\item{x}{the model fit for which to produce diagnostic plots, or a data
frame containing all necessary information for plotting (as generated by the
corresponding \code{\link[=fortify.seqModel]{fortify}} method).}

\item{\dots}{for the generic function, additional arguments to be passed
down to methods.  For the \code{"tslars"} method, additional arguments to be
passed down to the \code{"seqModel"} method.  For the \code{"perrySeqModel"}
and \code{"perrySparseLTS"} method, additional arguments to be passed down
to the \code{"seqModel"} and \code{"sparseLTS"} method, respectively.  For
the \code{"seqModel"} and \code{"sparseLTS"} methods, additional arguments
to be passed down to the default method.  For the default method, additional
arguments to be passed down to \code{\link[ggplot2]{geom_point}}.}

\item{s}{for the \code{"seqModel"} method, an integer vector giving
the steps of the submodels  for which to produce diagnostic plots (the
default is to use the optimal submodel).  For the \code{"sparseLTS"} method,
an integer vector giving the indices of the models for which to produce
diagnostic plots (the default is to use the optimal model for each of the
requested fits).}

\item{covArgs}{a list of arguments to be passed to
\code{\link[robustbase]{covMcd}} for the regression diagnostic plot (see
\dQuote{Details}).}

\item{p}{an integer giving the lag length for which to produce the plot
(the default is to use the optimal lag length).}

\item{fit}{a character string specifying for which fit to produce
diagnostic plots.  Possible values are \code{"reweighted"} (the default) for
diagnostic plots for the reweighted fit, \code{"raw"} for diagnostic plots
for the raw fit, or \code{"both"} for diagnostic plots for both fits.}

\item{which}{a character string indicating which plot to show.  Possible
values are \code{"all"} (the default) for all of the following, \code{"rqq"}
for a normal Q-Q plot of the standardized residuals, \code{"rindex"} for a
plot of the standardized residuals versus their index, \code{"rfit"} for a
plot of the standardized residuals versus the fitted values, or
\code{"rdiag"} for a regression diagnostic plot  (standardized residuals
versus robust Mahalanobis distances of the predictor variables).}

\item{ask}{a logical indicating whether the user should be asked before
each plot (see \code{\link[grDevices]{devAskNewPage}}). The default is to
ask if all plots are requested and not ask otherwise.}

\item{facets}{a faceting formula to override the default behavior.  If
supplied, \code{\link[ggplot2]{facet_wrap}} or
\code{\link[ggplot2]{facet_grid}} is called depending on whether the formula
is one-sided or two-sided.}

\item{size}{a numeric vector of length two giving the point and label size,
respectively.}

\item{id.n}{an integer giving the number of the most extreme observations
to be identified by a label.  The default is to use the number of identified
outliers, which can be different for the different plots.  See
\dQuote{Details} for more information.}
}
\value{
If only one plot is requested, an object of class \code{"ggplot"} (see
\code{\link[ggplot2]{ggplot}}), otherwise a list of such objects.
}
\description{
Produce diagnostic plots for a sequence of regression models, such as
submodels along a robust least angle regression sequence, or sparse least
trimmed squares regression models for a grid of values for the penalty
parameter.  Four plots are currently implemented.
}
\details{
In the normal Q-Q plot of the standardized residuals, a reference line is
drawn through the first and third quartile.  The \code{id.n} observations
with the largest distances from that line are identified by a label (the
observation number).  The default for \code{id.n} is the number of
regression outliers, i.e., the number of observations whose residuals are
too large (cf. \code{\link[=weights.sparseLTS]{weights}}).

In the plots of the standardized residuals versus their index or the fitted
values, horizontal reference lines are drawn at 0 and +/-2.5.  The
\code{id.n} observations with the largest absolute values of the
standardized residuals are identified by a label (the observation
number).  The default for \code{id.n} is the number of regression outliers,
i.e., the number of observations whose absolute residuals are too large (cf.
\code{\link[=weights.sparseLTS]{weights}}).

For the regression diagnostic plot, the robust Mahalanobis distances of the
predictor variables are computed via the MCD based on only those predictors
with non-zero coefficients (see
\code{\link[robustbase]{covMcd}}).  Horizontal reference lines are drawn at
+/-2.5 and a vertical reference line is drawn at the upper 97.5\% quantile
of the \eqn{\chi^{2}}{chi-squared} distribution with \eqn{p} degrees of
freedom, where \eqn{p} denotes the number of predictors with non-zero
coefficients.  The \code{id.n} observations with the largest absolute values
of the standardized residuals and/or largest robust Mahalanobis distances
are identified by a label (the observation number).  The default for
\code{id.n} is the number of all outliers: regression outliers (i.e.,
observations whose absolute residuals are too large, cf.
\code{\link[=weights.sparseLTS]{weights}}) and leverage points (i.e.,
observations with robust Mahalanobis distance larger than the 97.5\%
quantile of the \eqn{\chi^{2}}{chi-squared} distribution with \eqn{p}
degrees of freedom).
}
\examples{
## generate data
# example is not high-dimensional to keep computation time low
library("mvtnorm")
set.seed(1234)  # for reproducibility
n <- 100  # number of observations
p <- 25   # number of variables
beta <- rep.int(c(1, 0), c(5, p-5))  # coefficients
sigma <- 0.5      # controls signal-to-noise ratio
epsilon <- 0.1    # contamination level
Sigma <- 0.5^t(sapply(1:p, function(i, j) abs(i-j), 1:p))
x <- rmvnorm(n, sigma=Sigma)    # predictor matrix
e <- rnorm(n)                   # error terms
i <- 1:ceiling(epsilon*n)       # observations to be contaminated
e[i] <- e[i] + 5                # vertical outliers
y <- c(x \%*\% beta + sigma * e)  # response
x[i,] <- x[i,] + 5              # bad leverage points


## robust LARS
# fit model
fitRlars <- rlars(x, y, sMax = 10)
# create plot
diagnosticPlot(fitRlars)


## sparse LTS
# fit model
fitSparseLTS <- sparseLTS(x, y, lambda = 0.05, mode = "fraction")
# create plot
diagnosticPlot(fitSparseLTS)
diagnosticPlot(fitSparseLTS, fit = "both")
}
\seealso{
\code{\link[ggplot2]{ggplot}}, \code{\link{rlars}},
\code{\link{grplars}}, \code{\link{rgrplars}}, \code{\link{tslarsP}},
\code{\link{rtslarsP}}, \code{\link{tslars}}, \code{\link{rtslars}},
\code{\link{sparseLTS}}, \code{\link[robustbase:ltsPlot]{plot.lts}}
}
\author{
Andreas Alfons
}
\keyword{hplot}
