% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fit.R
\name{fit_two_layer}
\alias{fit_two_layer}
\title{MCMC sampling for two layer deep GP}
\usage{
fit_two_layer(
  x,
  y,
  D = ifelse(is.matrix(x), ncol(x), 1),
  nmcmc = 10000,
  trace = TRUE,
  w_0 = suppressWarnings(matrix(x, nrow = length(y), ncol = D)),
  g_0 = 0.01,
  theta_y_0 = 0.1,
  theta_w_0 = 0.1,
  true_g = NULL,
  settings = list(l = 1, u = 2, alpha = list(g = 1.5, theta_w = 1.5, theta_y = 1.5),
    beta = list(g = 3.9, theta_w = 3.9/4, theta_y = 3.9/6))
)
}
\arguments{
\item{x}{vector or matrix of input locations}

\item{y}{vector of response values}

\item{D}{integer designating dimension of hidden layer, defaults to 
dimension of \code{x}}

\item{nmcmc}{number of MCMC iterations}

\item{trace}{logical indicating whether to print iteration progress}

\item{w_0}{initial value for hidden layer \code{w}, defaults to identity 
mapping (must be matrix of dimension \code{nrow(x)} by \code{D} or 
dimension \code{nrow(x) - 1} by \code{D})}

\item{g_0}{initial value for \code{g}}

\item{theta_y_0}{initial value for \code{theta_y} (length scale of outer 
layer)}

\item{theta_w_0}{initial value for \code{theta_w} (length scale of inner 
layer), may be single value or vector of length \code{D}}

\item{true_g}{if true nugget is known it may be specified here (set to a 
small value to make fit deterministic).  Note - values that are too 
small may cause numerical issues in matrix inversions.}

\item{settings}{hyperparameters for proposals and priors on \code{g}, 
\code{theta_y}, and \code{theta_w}}
}
\value{
a list of the S3 class "\code{dgp2}" with elements:
\itemize{
  \item \code{x}: copy of input matrix
  \item \code{y}: copy of response vector
  \item \code{nmcmc}: number of MCMC iterations
  \item \code{settings}: copy of proposal/prior settings
  \item \code{g}: vector of MCMC samples for \code{g}
  \item \code{theta_y}: vector of MCMC samples for \code{theta_y} (length
        scale of outer layer)
  \item \code{theta_w}: matrix of MCMC samples for \code{theta_w} (length 
        scale of inner layer)
  \item \code{w}: list of MCMC samples for hidden layer \code{w}
  \item \code{time}: computation time in seconds
}
}
\description{
Conducts MCMC sampling of hyperparameters and hidden layer 
    "\code{w}" for a two layer deep GP.  Covariance structure is based on 
    inverse exponentiated squared euclidean distance.  Separate length scale 
    parameters "\code{theta_w}" and "\code{theta_y}" govern the correlation 
    strength of the hidden layer and outer layer respectively.  Nugget 
    parameter "\code{g}" governs noise on the outer layer.
}
\details{
Maps inputs "\code{x}" through hidden layer "\code{w}" to outputs 
    "\code{y}".  Conducts sampling of the hidden layer using Elliptical 
    Slice sampling.  Utilizes Metropolis Hastings sampling of the length 
    scale and nugget parameters with proposals and priors controlled by 
    \code{settings}.  Proposals for \code{g}, \code{theta_y}, and 
    \code{theta_w} follow a uniform sliding window scheme, e.g.
    
    \code{g_star <- runif(1, l * g_t / u, u * g_t / l)}, 
    
    with defaults \code{l = 1} and \code{u = 2} provided in \code{settings}.   
    Priors on \code{g} and \code{theta} follow Gamma distributions with 
    shape parameter (\code{alpha}) and rate parameter (\code{beta}) provided 
    in \code{settings}.  These priors are designed for "\code{x}" scaled to 
    [0,1] and "\code{y}" scaled to have mean 0 and variance 1.  
    
    The output object of class 
    "\code{dgp2}" is designed for use with \code{continue}, \code{trim}, 
    and \code{predict}. If \code{w_0} is of dimension \code{nrow(x) - 1} by 
    \code{D}, the final row is predicted using kriging.  This is helpful in 
    sequential design when adding a new input location and starting the MCMC 
    at the place where the previous MCMC left off.
}
\examples{
# Toy example (runs in less than 5 seconds) --------------------------------
# This example uses a small number of MCMC iterations in order to run quickly
# More iterations are required to get appropriate fits
# Function defaults are recommended (see additional example below)

f <- function(x) {
  if (x <= 0.4) return(-1)
  if (x >= 0.6) return(1)
  if (x > 0.4 & x < 0.6) return(10*(x-0.5))
}
x <- seq(0.05, 0.95, length = 7)
y <- sapply(x, f)
x_new <- seq(0, 1, length = 100)

# Fit model and calculate ALC
fit <- fit_two_layer(x, y, nmcmc = 500)
fit <- trim(fit, 400)
fit <- predict(fit, x_new)
alc <- ALC(fit)

\donttest{
# Two Layer and ALC --------------------------------------------------------

f <- function(x) {
  exp(-10 * x) * (cos(10 * pi * x - 1) + sin(10 * pi * x - 1)) * 5 - 0.2
}

# Training data
x <- seq(0, 1, length = 30)
y <- f(x) + rnorm(30, 0, 0.05)

# Testing data
xx <- seq(0, 1, length = 100)
yy <- f(xx)

# Conduct MCMC
fit <- fit_two_layer(x, y, D = 1, nmcmc = 9000)
fit <- continue(fit, 1000)
plot(fit) # investigate trace plots
fit <- trim(fit, 8000, 2)

# Option 1 - calculate ALC from MCMC iterations
alc <- ALC(fit, xx)

# Option 2 - calculate ALC after predictions
fit <- predict(fit, xx)
alc <- ALC(fit)

# Visualize fit
plot(fit)
par(new = TRUE) # overlay ALC
plot(xx, alc$value, type = 'l', lty = 2, axes = FALSE, xlab = '', ylab = '')

# Select next design point
x_new <- xx[which.max(alc$value)]

# Evaluate fit
rmse(yy, fit$mean) # lower is better
}

}
\references{
Sauer, A, RB Gramacy, and D Higdon. 2020. "Active Learning for Deep Gaussian 
    Process Surrogates." arXiv:2012.08015. \cr\cr
Murray, I, RP Adams, and D MacKay. 2010. "Elliptical slice sampling." 
    \emph{Journal of Machine Learning Research 9}, 541-548.
}
