% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/formula-helpers.R
\name{gp}
\alias{gp}
\title{Set up Gaussian process terms in \pkg{brms}}
\usage{
gp(..., by = NA, k = NA, cov = "exp_quad", iso = TRUE,
  gr = FALSE, cmc = TRUE, scale = TRUE, c = NULL)
}
\arguments{
\item{...}{One or more predictors for the GP.}

\item{by}{A numeric or factor variable of the same length as 
each predictor. In the numeric vector case, the elements multiply 
the values returned by the GP. In the factor variable 
case, a separate GP is fitted for each factor level.}

\item{k}{Optional number of basis functions for computing approximate
GPs. If \code{NA} (the default), exact GPs are computed.}

\item{cov}{Name of the covariance kernel. By default, 
the exponentiated-quadratic kernel \code{"exp_quad"} is used.}

\item{iso}{A flag to indicate whether an isotropic (\code{TRUE}; the 
default) of a non-isotropic GP should be used. 
In the former case, the same amount of smoothing is applied to all
predictors. In the latter case, predictors may have different smoothing.
Ignored if only a single predictors is supplied.}

\item{gr}{Logical; Indicates if auto-grouping should be used (defaults 
to \code{FALSE}). If enabled, observations sharing the same 
predictor values will be represented by the same latent variable
in the GP. This will improve sampling efficiency
drastically if the number of unique predictor combinations is small
relative to the number of observations.}

\item{cmc}{Logical; Only relevant if \code{by} is a factor. If \code{TRUE}
(the default), cell-mean coding is used for the \code{by}-factor, that is
one GP per level is estimated. If \code{FALSE}, contrast GPs are estimated
according to the contrasts set for the \code{by}-factor.}

\item{scale}{Logical; If \code{TRUE} (the default), predictors are
scaled so that the maximum Euclidean distance between two points
is 1. This often improves sampling speed and convergence.}

\item{c}{Numeric value only used in approximate GPs. Defines the 
multiplicative constant of the predictors' range over which
predictions should be computed. A good default could be \code{c = 5/4} 
but we are still working on providing better recommendations.}
}
\value{
An object of class \code{'gpterm'}, which is a list 
  of arguments to be interpreted by the formula 
  parsing functions of \pkg{brms}.
}
\description{
Set up a Gaussian process (GP) term in \pkg{brms}. The function does not
evaluate its arguments -- it exists purely to help set up a model with
GP terms.
}
\details{
A GP is a stochastic process, which
 describes the relation between one or more predictors 
 \eqn{x = (x_1, ..., x_d)} and a response \eqn{f(x)}, where 
 \eqn{d} is the number of predictors. A GP is the
 generalization of the multivariate normal distribution
 to an infinite number of dimensions. Thus, it can be
 interpreted as a prior over functions. Any finite sample 
 realized from this stochastic process is jointly multivariate 
 normal, with a covariance matrix defined by the covariance
 kernel \eqn{k_p(x)}, where \eqn{p} is the vector of parameters
 of the GP:
 \deqn{f(x) ~ MVN(0, k_p(x))}
 The smoothness and general behavior of the function \eqn{f} 
 depends only on the choice of covariance kernel. 
 For a more detailed introduction to Gaussian processes,
 see \url{https://en.wikipedia.org/wiki/Gaussian_process}.
 
 Below, we describe the currently supported covariance kernels:
 \itemize{
   \item{"exp_quad": }{The exponentiated-quadratic kernel is defined as
   \eqn{k(x_i, x_j) = sdgp^2 exp(- || x_i - x_j || / (2 lscale^2))},
   where \eqn{|| . ||} is the Euclidean norm, \eqn{sdgp} is a 
   standard deviation parameter, and \eqn{lscale} is characteristic 
   length-scale parameter. The latter practically measures how close two 
   points \eqn{x_i} and \eqn{x_j} have to be to influence each other 
   substantially.}
 }

 In the current implementation, \code{"exp_quad"} is the only supported 
 covariance kernel. More options will follow in the future.
}
\examples{
\dontrun{
# simulate data using the mgcv package
dat <- mgcv::gamSim(1, n = 30, scale = 2)

# fit a simple GP model
fit1 <- brm(y ~ gp(x2), dat, chains = 2)
summary(fit1)
me1 <- marginal_effects(fit1, nsamples = 200, spaghetti = TRUE)
plot(me1, ask = FALSE, points = TRUE)

# fit a more complicated GP model
fit2 <- brm(y ~ gp(x0) + x1 + gp(x2) + x3, dat, chains = 2)
summary(fit2)
me2 <- marginal_effects(fit2, nsamples = 200, spaghetti = TRUE)
plot(me2, ask = FALSE, points = TRUE)

# fit a multivariate GP model
fit3 <- brm(y ~ gp(x1, x2), dat, chains = 2)
summary(fit3)
me3 <- marginal_effects(fit3, nsamples = 200, spaghetti = TRUE)
plot(me3, ask = FALSE, points = TRUE)

# compare model fit
LOO(fit1, fit2, fit3)

# simulate data with a factor covariate
dat2 <- mgcv::gamSim(4, n = 90, scale = 2)

# fit separate gaussian processes for different levels of 'fac'
fit4 <- brm(y ~ gp(x2, by = fac), dat2, chains = 2)
summary(fit4)
plot(marginal_effects(fit4), points = TRUE)
}

}
\seealso{
\code{\link{brmsformula}}
}
