% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gpspike.R
\name{gpSpike}
\alias{gpSpike}
\title{Bayesian single-index regression with Gaussian process link and spike-and-slab prior}
\usage{
gpSpike(
  x,
  y,
  prior = list(index = list(r1 = 1, r2 = 1, sigma_theta = 0.25), link =
    list(inv_lambda_shape = 1, inv_lambda_rate = 0.1), sigma2 = list(shape = 0.001, rate
    = 0.001)),
  init = list(index = list(pi = 0.5, nu = NULL, index = NULL), link = list(inv_lambda =
    NULL), sigma2 = NULL),
  sampling = TRUE,
  fitted = TRUE,
  monitors2 = NULL,
  niter = 10000,
  nburnin = 1000,
  thin = 1,
  thin2 = NULL,
  nchain = 1,
  setSeed = FALSE
)
}
\arguments{
\item{x}{Numeric data.frame/matrix of predictors. Each row is an observation.}

\item{y}{Numeric response numeric vector/matrix. Other types  are not available}

\item{prior}{Optional named list of prior settings with sublists:
\describe{
\item{\code{index}}{Spike and slab prior hyperparameters: Beta-binomial for variable selection (default \code{r1 = 1, r2 = 1}),
and normal distribution for selected variables (default: \eqn{N(0, \sigma_{\theta}^{2}}))}
\item{\code{link}}{Gaussian process prior hyperparameters \code{lambda}: Inverse-Gamma prior is assigned for \eqn{\lambda^{-1}}
(default \code{inv_lambda_shape = 1, inv_lambda_rate = 0.1})}
\item{\code{sigma2}}{Error-variance prior hyperparameters. An Inverse-Gamma prior is assigned to \eqn{\sigma^2}
where \code{shape} is shape parameter and \code{rate} is rate parameter of inverse gamma distribution.
(default \code{shape = 0.001, rate = 100})}
}}

\item{init}{Optional named list of initial values. If the values are not assigned, they are randomly sampled from prior.
\describe{
\item{\code{index}}{
\enumerate{
\item{\code{pi}: Initial selecting variable probability. (default: \code{0.5})}
\item{\code{nu}: Initial vector of inclusion indicators . By default, each \code{nu} is randomly drawn by  \eqn{Bernoulli(1/2)}}
\item{\code{index}: Initial vector of index. By default, each element of index vector, which is chosen by nu, is proposed by normal distribution.}
}}
\item{\code{link}}{Initial scalar of lambda (\code{inv_lambda}) for covariance of Gaussian process.}
\item{\code{sigma2}}{Initial scalar error variance. (default: \code{0.01})}
}}

\item{sampling}{Logical. If \code{TRUE} (default), run MCMC; otherwise return prepared nimble model objects without sampling.}

\item{fitted}{Logical. If \code{fitted = FALSE}, fitted values are not drawn and only \code{c("nu", "indexstar", "sigma2")} are monitored.
If \code{fitted = TRUE} (default), fitted values drawn from posterior distribution are included in the output and \code{c("Xlin", "invlambda")} is additionally monitored for prediction.}

\item{monitors2}{Optional character vector of additional monitor nodes. To check the names of the nodes, set \code{fit <- gpSpike(x, y, sampling = FALSE)} and then inspect the variable names stored in the model object using \code{fit$model$getVarNames()}.}

\item{niter}{Integer. Total MCMC iterations (default \code{10000}).}

\item{nburnin}{Integer. Burn-in iterations (default \code{1000}).}

\item{thin}{Integer. Thinning for monitors1 (default \code{1}).}

\item{thin2}{Integer. Optional thinning for \code{monitors2} (default \code{1}).}

\item{nchain}{Integer. Number of MCMC chains (default \code{1}).}

\item{setSeed}{Logical or numeric argument.  Further details are provided in \link[nimble]{runMCMC}.}
}
\value{
A \code{list} typically containing:
\describe{
\item{\code{model}}{Nimble model}
\item{\code{sampler}}{Nimble sampler}
\item{\code{sampling}}{Posterior draws of \eqn{\nu}, \eqn{\theta^*}, \eqn{\sigma^2}, and nodes for fitted values by default. Variables specified in \code{monitors2} will be added if provided.}
\item{\code{fitted}}{If \code{fitted = TRUE}, in-sample fitted values.}
\item{\code{input}}{List of input values for prior, initial values and execution time without compiling.}
}
}
\description{
Fits a single-index model \eqn{Y_i \sim \mathcal{N}(f(X_i'\theta), \sigma^2), i = 1,\cdots,n}
where index vector \eqn{\theta} has a spike and slab prior and
the link \eqn{f(\cdot)} is represented by Gaussian process and the
}
\details{
\strong{Model} The single–index model is specified as \eqn{Y_i = f(X_i' \theta) + \epsilon_i},
where \eqn{\theta} is a p-dimensional index vector subject to a spike-and-slab
prior for variable selection. The link function \eqn{f(\cdot)} is modeled
using a Gaussian process prior with zero mean and squared exponential covariance
kernel \eqn{K(x_1, x_2) = \exp\{-\rho {(x_1 - x_2)^{T}\theta}^2\}},
where \eqn{\rho} determines the smoothness of \eqn{f}.
The covariance kernel is re-parameterized to \eqn{\exp\{-{(x_1 - x_2)^{T}\theta^{*}}^2\}} where
\eqn{\rho = ||\theta^{*}||} and
\eqn{\theta = ||\theta||^{-1}\theta^{*}}.
Therefore, \eqn{\theta^{*}} is sampled in MCMC.

\strong{Priors}
\itemize{
\item Inclusion indicators \eqn{\nu_l}: Bernoulli(\eqn{\pi}).
\item Inclusion probability \eqn{\pi}: Beta(\eqn{r_1, r_2}).
\item Slab coefficients \eqn{\theta_l^*}: Gaussian \eqn{N(0, \sigma_\theta^2)}.
\item GP precision \eqn{\lambda^{-1}}: Gamma(\eqn{a_\lambda, b_\lambda}).
\item Error precision \eqn{(\sigma^2)^{-1}}: Gamma(\eqn{a_\sigma, b_\sigma}).
}

\strong{Sampling} A random walk Metropolis algorithm is used to sample \eqn{\lambda^{-1}}
and a Metropolis-Hastings algorithm is used for the main parameters \eqn{(\theta^{*}, \nu)}.
The variance \eqn{\sigma^2} is directly sampled from posterior distribution.
\eqn{f} is not directly sampled by MCMC.
}
\examples{
\donttest{
set.seed(123)
n <- 200; d <- 4
theta <- c(2, 1, 1, 1); theta <- theta / sqrt(sum(theta^2))
f <- function(u) u^2 * exp(u)
sigma <- 0.5
X <- matrix(runif(n * d, -1, 1), nrow = n)
index_vals <- as.vector(X \%*\% theta)
y <- f(index_vals) + rnorm(n, 0, sigma)

# One tool version
fit <- gpSpike(X, as.vector(y))

# Split version
models <- gpSpike(X, as.vector(y), sampling = FALSE)
Ccompile <- compileModelAndMCMC(models)
mcmc.out <- runMCMC(Ccompile$mcmc, niter = 5000, nburnin = 1000, thin = 1,
                   nchains = 1, setSeed = TRUE, init = models$input$init,
                   summary = TRUE, samplesAsCodaMCMC = TRUE)
}

}
\references{
McGee, G., Wilson, A., Webster, T. F., & Coull, B. A. (2023).
Bayesian multiple index models for environmental mixtures.
\emph{Biometrics}, 79(1), 462-474.
}
