% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{pred-projection}
\alias{pred-projection}
\alias{proj_linpred}
\alias{proj_predict}
\title{Predictions from a submodel (after projection)}
\usage{
proj_linpred(
  object,
  newdata = NULL,
  offsetnew = NULL,
  weightsnew = NULL,
  filter_nterms = NULL,
  transform = FALSE,
  integrated = FALSE,
  .seed = sample.int(.Machine$integer.max, 1),
  ...
)

proj_predict(
  object,
  newdata = NULL,
  offsetnew = NULL,
  weightsnew = NULL,
  filter_nterms = NULL,
  nresample_clusters = 1000,
  .seed = sample.int(.Machine$integer.max, 1),
  resp_oscale = TRUE,
  ...
)
}
\arguments{
\item{object}{An object returned by \code{\link[=project]{project()}} or an object that can be
passed to argument \code{object} of \code{\link[=project]{project()}}.}

\item{newdata}{Passed to argument \code{newdata} of the reference model's
\code{extract_model_data} function (see \code{\link[=init_refmodel]{init_refmodel()}}). Provides the
predictor (and possibly also the response) data for the new (or old)
observations. May also be \code{NULL} (see argument \code{extract_model_data} of
\code{\link[=init_refmodel]{init_refmodel()}}). If not \code{NULL}, any \code{NA}s will trigger an error.}

\item{offsetnew}{Passed to argument \code{orhs} of the reference model's
\code{extract_model_data} function (see \code{\link[=init_refmodel]{init_refmodel()}}). Used to get the
offsets for the new (or old) observations.}

\item{weightsnew}{Passed to argument \code{wrhs} of the reference model's
\code{extract_model_data} function (see \code{\link[=init_refmodel]{init_refmodel()}}). Used to get the
weights for the new (or old) observations.}

\item{filter_nterms}{Only applies if \code{object} is an object returned by
\code{\link[=project]{project()}}. In that case, \code{filter_nterms} can be used to filter \code{object}
for only those elements (submodels) with a number of solution terms in
\code{filter_nterms}. Therefore, needs to be a numeric vector or \code{NULL}. If
\code{NULL}, use all submodels.}

\item{transform}{For \code{\link[=proj_linpred]{proj_linpred()}} only. A single logical value indicating
whether the linear predictor should be transformed to response scale using
the inverse-link function (\code{TRUE}) or not (\code{FALSE}). In case of the latent
projection, argument \code{transform} is similar in spirit to argument
\code{resp_oscale} from other functions and affects the scale of both output
elements \code{pred} and \code{lpd} (see sections "Details" and "Value" below).}

\item{integrated}{For \code{\link[=proj_linpred]{proj_linpred()}} only. A single logical value
indicating whether the output should be averaged across the projected
posterior draws (\code{TRUE}) or not (\code{FALSE}).}

\item{.seed}{Pseudorandom number generation (PRNG) seed by which the same
results can be obtained again if needed. Passed to argument \code{seed} of
\code{\link[=set.seed]{set.seed()}}, but can also be \code{NA} to not call \code{\link[=set.seed]{set.seed()}} at all. Here,
this seed is used for drawing new group-level effects in case of a
multilevel submodel (however, not yet in case of a GAMM) and for drawing
from the predictive distributions of the submodel(s) in case of
\code{\link[=proj_predict]{proj_predict()}}. If a clustered projection was performed, then in
\code{\link[=proj_predict]{proj_predict()}}, \code{.seed} is also used for drawing from the set of
projected clusters of posterior draws (see argument \code{nresample_clusters}).}

\item{...}{Arguments passed to \code{\link[=project]{project()}} if \code{object} is not already an
object returned by \code{\link[=project]{project()}}.}

\item{nresample_clusters}{For \code{\link[=proj_predict]{proj_predict()}} with clustered projection
only. Number of draws to return from the predictive distributions of the
submodel(s). Not to be confused with argument \code{nclusters} of \code{\link[=project]{project()}}:
\code{nresample_clusters} gives the number of draws (\emph{with} replacement) from
the set of clustered posterior draws after projection (with this set being
determined by argument \code{nclusters} of \code{\link[=project]{project()}}).}

\item{resp_oscale}{Only relevant for the latent projection. A single logical
value indicating whether to draw from the posterior-projection predictive
distributions on the original response scale (\code{TRUE}) or on latent scale
(\code{FALSE}).}
}
\value{
In the following, \eqn{S_{\mathrm{prj}}}{S_prj}, \eqn{N},
\eqn{C_{\mathrm{cat}}}{C_cat}, and \eqn{C_{\mathrm{lat}}}{C_lat} from help
topic \link{refmodel-init-get} are used. (For \code{\link[=proj_linpred]{proj_linpred()}} with \code{integrated = TRUE}, we have \eqn{S_{\mathrm{prj}} = 1}{S_prj = 1}.) Furthermore, let
\eqn{C} denote either \eqn{C_{\mathrm{cat}}}{C_cat} (if \code{transform = TRUE})
or \eqn{C_{\mathrm{lat}}}{C_lat} (if \code{transform = FALSE}). Then, if the
prediction is done for one submodel only (i.e., \code{length(nterms) == 1 || !is.null(solution_terms)} in the call to \code{\link[=project]{project()}}):
\itemize{
\item \code{\link[=proj_linpred]{proj_linpred()}} returns a \code{list} with the following elements:
\itemize{
\item Element \code{pred} contains the actual predictions, i.e., the linear
predictors, possibly transformed to response scale (depending on
argument \code{transform}).
\item Element \code{lpd} is non-\code{NULL} only if \code{newdata} is \code{NULL} or if
\code{newdata} contains response values in the corresponding column. In that
case, it contains the log predictive density values (conditional on
each of the projected parameter draws if \code{integrated = FALSE} and
averaged across the projected parameter draws if \code{integrated = TRUE}).
}

In case of (i) the traditional projection, (ii) the latent projection
with \code{transform = FALSE}, or (iii) the latent projection with
\code{transform = TRUE} and \verb{<refmodel>$family$cats} (where \verb{<refmodel>} is
an object resulting from \code{\link[=init_refmodel]{init_refmodel()}}; see also
\code{\link[=extend_family]{extend_family()}}'s argument \code{latent_y_unqs}) being \code{NULL}, both
elements are \eqn{S_{\mathrm{prj}} \times N}{S_prj x N} matrices. In
case of (i) the augmented-data projection or (ii) the latent projection
with \code{transform = TRUE} and \verb{<refmodel>$family$cats} being not \code{NULL},
\code{pred} is an \eqn{S_{\mathrm{prj}} \times N \times C}{S_prj x N x C}
array and \code{lpd} is an \eqn{S_{\mathrm{prj}} \times N}{S_prj x N}
matrix.
\item \code{\link[=proj_predict]{proj_predict()}} returns an \eqn{S_{\mathrm{prj}} \times N}{S_prj x N}
matrix of predictions where \eqn{S_{\mathrm{prj}}}{S_prj} denotes
\code{nresample_clusters} in case of clustered projection. In case of (i) the
augmented-data projection or (ii) the latent projection with \code{resp_oscale =   TRUE} and \verb{<refmodel>$family$cats} being not \code{NULL}, this matrix has an
attribute called \code{cats} (the character vector of response categories) and
the values of the matrix are the predicted indices of the response
categories (these indices refer to the order of the response categories
from attribute \code{cats}).
}

If the prediction is done for more than one submodel, the output from above
is returned for each submodel, giving a named \code{list} with one element for
each submodel (the names of this \code{list} being the numbers of solution terms
of the submodels when counting the intercept, too).
}
\description{
After the projection of the reference model onto a submodel, the linear
predictors (for the original or a new dataset) based on that submodel can be
calculated by \code{\link[=proj_linpred]{proj_linpred()}}. These linear predictors can also be
transformed to response scale and averaged across the projected parameter
draws. Furthermore, \code{\link[=proj_linpred]{proj_linpred()}} returns the corresponding log predictive
density values if the (original or new) dataset contains response values. The
\code{\link[=proj_predict]{proj_predict()}} function draws from the predictive distributions (there is
one such distribution for each observation from the original or new dataset)
of the submodel that the reference model has been projected onto. If the
projection has not been performed yet, both functions call \code{\link[=project]{project()}}
internally to perform the projection. Both functions can also handle multiple
submodels at once (for \code{object}s of class \code{vsel} or \code{object}s returned by a
\code{\link[=project]{project()}} call to an object of class \code{vsel}; see \code{\link[=project]{project()}}).
}
\details{
Currently, \code{\link[=proj_predict]{proj_predict()}} ignores observation weights that are not
equal to \code{1}. A corresponding warning is thrown if this is the case.

In case of the latent projection and \code{transform = FALSE}:
\itemize{
\item Output element \code{pred} contains the linear predictors without any
modifications that may be due to the original response distribution (e.g.,
for a \code{\link[brms:brmsfamily]{brms::cumulative()}} model, the ordered thresholds are not taken into
account).
\item Output element \code{lpd} contains the \emph{latent} log predictive density values,
i.e., those corresponding to the latent Gaussian distribution. If \code{newdata}
is not \code{NULL}, this requires the latent response values to be supplied in a
column called \verb{.<response_name>} of \code{newdata} where \verb{<response_name>} needs
to be replaced by the name of the original response variable (if
\verb{<response_name>} contained parentheses, these have been stripped off by
\code{\link[=init_refmodel]{init_refmodel()}}; see the left-hand side of \verb{formula(<refmodel>)}). For
technical reasons, the existence of column \verb{<response_name>} in \code{newdata}
is another requirement (even though \verb{.<response_name>} is actually used).
}
}
\examples{
if (requireNamespace("rstanarm", quietly = TRUE)) {
  # Data:
  dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)

  # The "stanreg" fit which will be used as the reference model (with small
  # values for `chains` and `iter`, but only for technical reasons in this
  # example; this is not recommended in general):
  fit <- rstanarm::stan_glm(
    y ~ X1 + X2 + X3 + X4 + X5, family = gaussian(), data = dat_gauss,
    QR = TRUE, chains = 2, iter = 500, refresh = 0, seed = 9876
  )

  # Projection onto an arbitrary combination of predictor terms (with a small
  # value for `nclusters`, but only for the sake of speed in this example;
  # this is not recommended in general):
  prj <- project(fit, solution_terms = c("X1", "X3", "X5"), nclusters = 10,
                 seed = 9182)

  # Predictions (at the training points) from the submodel onto which the
  # reference model was projected:
  prjl <- proj_linpred(prj)
  prjp <- proj_predict(prj, .seed = 7364)
}

}
