% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spatialGEV_fit.R, R/spatialGEV_model.R
\name{spatialGEV_fit}
\alias{spatialGEV_fit}
\alias{spatialGEV_model}
\title{Fit a GEV-GP model.}
\usage{
spatialGEV_fit(
  data,
  locs,
  random = c("a", "ab", "abs"),
  method = c("laplace", "maxsmooth"),
  init_param,
  reparam_s,
  kernel = c("spde", "matern", "exp"),
  X_a = NULL,
  X_b = NULL,
  X_s = NULL,
  nu = 1,
  s_prior = NULL,
  beta_prior = NULL,
  matern_pc_prior = NULL,
  return_levels = 0,
  get_return_levels_cov = T,
  sp_thres = -1,
  adfun_only = FALSE,
  ignore_random = FALSE,
  silent = FALSE,
  mesh_extra_init = list(a = 0, log_b = -1, s = 0.001),
  get_hessian = TRUE,
  ...
)

spatialGEV_model(
  data,
  locs,
  random = c("a", "ab", "abs"),
  method = c("laplace", "maxsmooth"),
  init_param,
  reparam_s,
  kernel = c("spde", "matern", "exp"),
  X_a = NULL,
  X_b = NULL,
  X_s = NULL,
  nu = 1,
  s_prior = NULL,
  beta_prior = NULL,
  matern_pc_prior = NULL,
  sp_thres = -1,
  ignore_random = FALSE,
  mesh_extra_init = list(a = 0, log_b = -1, s = 0.001),
  ...
)
}
\arguments{
\item{data}{If \code{method == "laplace"}, a list of length \code{n_loc} where each
element contains the GEV observations at the given spatial location.
If \code{method == "maxsmooth"} as list with two elements: \code{est},
an \verb{n_loc x 3} matrix of parameter estimates at each location,
and \code{var}, a \verb{3 x 3 x n_loc} array of corresponding variance estimates.}

\item{locs}{An \verb{n_loc x 2} matrix of longitude and latitude of the corresponding response values.}

\item{random}{Either "a", "ab", or "abs", where \code{a} indicates the location parameter,
\code{b} indicates the scale parameter, \code{s} indicates the shape parameter.  This tells the model
which GEV parameters are considered as random effects.}

\item{method}{Either "laplace" or "maxsmooth". Default is "laplace". See details.}

\item{init_param}{A list of initial parameters. See details.}

\item{reparam_s}{A flag indicating whether the shape parameter is "zero", "unconstrained",
constrained to be "negative", or constrained to be "positive". If model "abs" is used,
\code{reparam_s} cannot be zero. See details.}

\item{kernel}{Kernel function for spatial random effects covariance matrix. Can be "exp"
(exponential kernel), "matern" (Matern kernel), or "spde" (Matern kernel with SPDE
approximation described in Lindgren el al. 2011). To use the SPDE approximation,
the user must first install the INLA R package.}

\item{X_a}{\verb{n_loc x r_a} design matrix for a, where \code{r-1} is the number of covariates. If not
provided, a \verb{n_loc x 1} column matrix of 1s is used.}

\item{X_b}{\verb{n_loc x r_b} design matrix for log(b). Does not need to be provided if b is fixed.}

\item{X_s}{\verb{n_loc x r_s} design matrix for g(s), where g() is a transformation function of \code{s}.
Does not need to be provided if s is fixed.}

\item{nu}{Hyperparameter of the Matern kernel. Default is 1.}

\item{s_prior}{Optional. A length 2 vector where the first element is the mean of the normal
prior on s or log(s) and the second is the standard deviation. Default is NULL, meaning a
uniform prior is put on s if s is fixed, or a GP prior is applied if s is a random effect.}

\item{beta_prior}{Optional named list that specifies normal priors on the GP mean function
coefficients \code{beta}s. Each element of the list should be a named length 2 vector in which
the first element is mean and second element is sd.
E.g. \code{beta_prior=list(beta_a=c(0,100), beta_b=c(0,10), beta_s=c(-2,5))}.
Default is NULL, which means imposing a noninformative uniform flat prior.}

\item{matern_pc_prior}{Optional named list that specifies Penalized complexity
priors on the GP Matern covariance hyperparameters \code{sig} and \code{rho}, where \code{sig = sqrt(sigma)} and \code{rho = sqrt(8*nu)/kappa}. Names must be \code{matern_a}, \code{matern_b},
or \code{matern_s}.
E.g. \code{matern_pc_prior=list(matern_s=matern_pc_prior(100, 0.9, 2, 0.1))}.
Default is NULL, which means a flat prior. See \code{?matern_pc_prior} for more details.}

\item{return_levels}{Optional vector of return-level probabilities.
If provided, the posterior mean and standard deviation of the upper-tail GEV quantile at each
spatial location for each of these probabilities will be included in the summary output.
See \code{?summary.spatialGEV_fit} for details.}

\item{get_return_levels_cov}{Default is TRUE if \code{return_levels} is specified. Can be turned off
for when the number of locations is large so that the high-dimensional covariance matrix for
the return levels is not stored.}

\item{sp_thres}{Optional. Thresholding value to create sparse covariance matrix. Any distance
value greater than or equal to \code{sp_thres} will be set to 0. Default is -1, which means not
using sparse matrix. Caution: hard thresholding the covariance matrix often results in bad
convergence.}

\item{adfun_only}{Only output the ADfun constructed using TMB? If TRUE, model fitting is not
performed and only a TMB tamplate \code{adfun} is returned (along with the created mesh if kernel is
"spde").
This can be used when the user would like to use a different optimizer other than the default
\code{nlminb}. E.g., call \code{optim(adfun$par, adfun$fn, adfun$gr)} for optimization.}

\item{ignore_random}{Ignore random effect? If TRUE, spatial random effects are not integrated
out in the model. This can be helpful for checking the marginal likelihood.}

\item{silent}{Do not show tracing information?}

\item{mesh_extra_init}{A named list of scalars. Used when the SPDE kernel is used. The list
provides the initial values for a, log(b), and s on the extra triangles created in the mesh.
The default is \code{list(a=1, log_b=0, s=0.001)}.}

\item{get_hessian}{Default to TRUE so that \code{spatialGEV_sample()} can be used for sampling
from the Normal approximated posterior with the inverse Hessian as the Normal covariance.}

\item{...}{Arguments to pass to \code{INLA::inla.mesh.2d()}. See details \code{?inla.mesh.2d()} and
Section 2.1 of Lindgren & Rue (2015) JSS paper.
This is used specifically for when \code{kernel="spde"}, in which case a mesh needs to be
constructed on the spatial domain. When no arguments are passed to \code{inla.mesh.2d()}, a
default argument is \code{max.edge=2}, which simply specifies the largest allowed triangle edge
length. It is strongly suggested that the user should specify these arguments if they would
like to use the SPDE kernel. Please make sure INLA package is installed before
using the SPDE approximation.}
}
\value{
If \code{adfun_only=TRUE}, this function outputs a list returned by \code{TMB::MakeADFun()}.
This list contains components \verb{par, fn, gr} and can be passed to an R optimizer.
If \code{adfun_only=FALSE}, this function outputs an object of class \code{spatialGEVfit}, a list
\itemize{
\item An adfun object
\item A fit object given by calling \code{nlminb()} on the adfun
\item An object of class \code{sdreport} from TMB which contains the point estimates, standard error,
and precision matrix for the fixed and random effects
\item Other helpful information about the model: kernel, data coordinates matrix, and optionally
the created mesh if `kernel="spde" (See details).
}

\code{spatialGEV_model()} is used internally by \code{spatialGEV_fit()} to parse its inputs.  It returns a list with elements \code{data}, \code{parameters}, \code{random}, and \code{map} to be passed to \code{\link[TMB:MakeADFun]{TMB::MakeADFun()}}.  If \code{kernel == "spde"}, the list also contains an element \code{mesh}.
}
\description{
Fit a GEV-GP model.
}
\details{
This function adopts Laplace approximation using TMB model to integrate out the random effects.

Specifying \code{method="laplace"} means integrating out the random effects \eqn{u} in the joint likelihood
via the Laplace approximation: \eqn{p_{\mathrm{LA}}(y \mid \theta) \approx \int p(y, u \mid \theta) \ \mathrm{d}u}.
Then the random effects posterior is constructed via a Normal approximation centered at the Laplace-approximated
marginal likelihood mode with the covariance being the quadrature of it.
If \code{method="maxsmooth"}, the inference is carried out in two steps. First, the user provide the MLEs
and variance estimates of \code{a}, \code{b} and \code{s} at each location to \code{data}, which is known as the max step.
The max-step estimates are denoted as \eqn{\hat{u}}, and the likelihood function at each location is approximated
by a Normal distribution at \eqn{\mathcal{N}(\hat{u}, \widehat{Var}(u))}.
Second, the Laplace approximation is used to integrate out the random effects in the joint likelihood
\eqn{p_{\mathrm{LA}}(\hat{u} \mid \theta) \approx \int p(\hat{u},u \mid \theta) \ \mathrm{d}u}, followed by a Normal
approximation at mode and quadrature of the approximated marginal likelihood \eqn{p_{\mathrm{LA}}(\hat{u} \mid \theta)}.
This is known as the smooth step.

The random effects are assumed to follow Gaussian processes with mean 0 and covariance matrix
defined by the chosen kernel function. E.g., using the exponential kernel function:

\if{html}{\out{<div class="sourceCode">}}\preformatted{cov(i,j) = sigma*exp(-|x_i - x_j|/ell)
}\if{html}{\out{</div>}}

When specifying the initial parameters to be passed to \code{init_param}, care must be taken to
count the number of parameters. Described below is how to specify \code{init_param} under different
settings of \code{random} and \code{kernel}. Note that the order of the parameters must match the
descriptions below (initial values specified below such as 0 and 1 are only examples).
\itemize{
\item random = "a", kernel = "exp":
\code{a} should be a vector and the rest are scalars. \code{log_sigma_a} and \code{log_ell_a} are
hyperparameters in the exponential kernel for the Gaussian process describing the spatial
variation of \code{a}.
}

\if{html}{\out{<div class="sourceCode">}}\preformatted{init_param = list(a = rep(1,n_locations), log_b = 0, s = 1,
                  beta_a = rep(0, n_covariates),
                  log_sigma_a = 0, log_ell_a = 0)
}\if{html}{\out{</div>}}

Note that even if \code{reparam_s=="zero"}, an initial value for \code{s} still must be provided, even
though in this case the value does not matter anymore.
\itemize{
\item random = "ab", kernel = "exp":
When \code{b} is considered a random effect, its corresponding GP hyperparameters \code{log_sigma_b}
and \code{log_ell_b} need to be specified.
}

\if{html}{\out{<div class="sourceCode">}}\preformatted{init_param = list(a = rep(1,n_locations),
                  log_b = rep(0,n_locations), s=1,
                  beta_a = rep(0, n_covariates), beta_b = rep(0, n_covariates),
                  log_sigma_a = 0,log_ell_a = 0,
                  log_sigma_b = 0,log_ell_b = 0).
}\if{html}{\out{</div>}}
\itemize{
\item random = "abs", kernel = "exp":
}

\if{html}{\out{<div class="sourceCode">}}\preformatted{init_param = list(a = rep(1,n_locations),
                  log_b = rep(0,n_locations),
                  s = rep(0,n_locations),
                  beta_a = rep(0, n_covariates),
                  beta_b = rep(0, n_covariates),
                  beta_s = rep(0, n_covariates),
                  log_sigma_a = 0,log_ell_a = 0,
                  log_sigma_b = 0,log_ell_b = 0).
                  log_sigma_s = 0,log_ell_s = 0).
}\if{html}{\out{</div>}}
\itemize{
\item random = "abs", kernel = "matern" or "spde":
When the Matern or SPDE kernel is used, hyperparameters for the GP kernel are \code{log_sigma_a/b/s}
and \code{log_kappa_a/b/s} for each spatial random effect.
}

\if{html}{\out{<div class="sourceCode">}}\preformatted{init_param = list(a = rep(1,n_locations),
                  log_b = rep(0,n_locations),
                  s = rep(0,n_locations),
                  beta_a = rep(0, n_covariates),
                  beta_b = rep(0, n_covariates),
                  beta_s = rep(0, n_covariates),
                  log_sigma_a = 0,log_kappa_a = 0,
                  log_sigma_b = 0,log_kappa_b = 0).
                  log_sigma_s = 0,log_kappa_s = 0).
}\if{html}{\out{</div>}}

\code{raparam_s} allows the user to reparametrize the GEV shape parameter \code{s}. For example,
\itemize{
\item if the data is believed to be right-skewed and lower bounded, this means \code{s>0} and one should
use \code{reparam_s = "positive"};
\item if the data is believed to be left-skewed and upper bounded, this means \code{s<0} and one should
use \code{reparam_s="negative"}.
\item When \code{reparam_s = "zero"}, the data likelihood is a Gumbel distribution. In this case the data
has no upper nor lower bound. Finally, specify \code{reparam_s = "unconstrained"} if no sign
constraint should be imposed on \code{s}.
}

Note that when reparam_s = "negative" or "postive", the initial value of \code{s} in \code{init_param}
should be that of log(|s|).

When the SPDE kernel is used, a mesh on the spatial domain is created using
\code{INLA::inla.mesh.2d()}, which extends the spatial domain by adding additional triangles in the
mesh to avoid boundary effects in estimation. As a result, the number of \code{a} and \code{b}  will be
greater than the number of locations due to these additional triangles: each of them also has
their own \code{a} and \code{b} values. Therefore, the fit function will return a vector \code{meshidxloc} to
indicate the positions of the observed coordinates in the random effects vector.
}
\examples{
\donttest{
library(SpatialGEV)
n_loc <- 20
a <- simulatedData$a[1:n_loc]
logb <- simulatedData$logb[1:n_loc]
logs <- simulatedData$logs[1:n_loc]
y <- simulatedData$y[1:n_loc]
locs <- simulatedData$locs[1:n_loc,]
# No covariates are included, only intercept is included.
fit <- spatialGEV_fit(
  data = y,
  locs = locs,
  random = "ab",
  init_param = list(
    a = rep(0, n_loc),
    log_b = rep(0, n_loc),
    s = 0,
    beta_a = 0,
    beta_b = 0,
    log_sigma_a = 0,
    log_kappa_a = 0,
    log_sigma_b = 0,
    log_kappa_b = 0
  ),
  reparam_s = "positive",
  kernel = "matern",
  X_a = matrix(1, nrow=n_loc, ncol=1),
  X_b = matrix(1, nrow=n_loc, ncol=1),
  silent = TRUE
)
print(fit)

# To use a different optimizer other than the default `nlminb()`, create
# an object ready to be passed to optimizer functions using `adfun_only=TRUE`
obj <- spatialGEV_fit(
  data = y,
  locs = locs, random = "ab",
  init_param = list(
    a = rep(0, n_loc),
    log_b = rep(0, n_loc),
    s = 0,
    beta_a = 0,
    beta_b = 0,
    log_sigma_a = 0,
    log_kappa_a = 0,
    log_sigma_b = 0,
    log_kappa_b = 0
  ),
  reparam_s = "positive",
  kernel = "matern",
  X_a = matrix(1, nrow=n_loc, ncol=1),
  X_b = matrix(1, nrow=n_loc, ncol=1),
  adfun_only = TRUE
)
fit <- optim(obj$par, obj$fn, obj$gr)
}

# Using the SPDE kernel (SPDE approximation to the Matern kernel)
# Make sure the INLA package is installed before using `kernel="spde"`
\dontrun{
library(INLA)
n_loc <- 20
y <- simulatedData2$y[1:n_loc]
locs <- simulatedData2$locs[1:n_loc,]
fit_spde <- spatialGEV_fit(
  data = y,
  locs = locs,
  random = "abs",
  init_param = list(
    a = rep(0, n_loc),
    log_b = rep(0, n_loc),
    s = rep(-2, n_loc),
    beta_a = 0,
    beta_b = 0,
    beta_s = -2,
    log_sigma_a = 0,
    log_kappa_a = 0,
    log_sigma_b = 0,
    log_kappa_b = 0,
    log_sigma_s = 0,
    log_kappa_s = 0
  ),
  reparam_s = "positive",
  kernel = "spde",
  beta_prior = list(
    beta_a=c(0,100),
    beta_b=c(0,10),
    beta_s=c(0,10)
  ),
  matern_pc_prior = list(
    matern_a=matern_pc_prior(1e5,0.95,5,0.1),
    matern_b=matern_pc_prior(1e5,0.95,3,0.1),
    matern_s=matern_pc_prior(1e2,0.95,1,0.1)
  )
)
plot(fit_spde$mesh) # Plot the mesh
points(locs[,1], locs[,2], col="red", pch=16) # Plot the locations
}
}
