% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/all_functions.R
\name{bgsmtr}
\alias{bgsmtr}
\title{Bayesian Group Sparse Multi-Task Regression for Imaging Genetics}
\usage{
bgsmtr(X, Y, group, tuning = "CV.mode", lam_1_fixed = NULL,
  lam_2_fixed = NULL, iter_num = 10000, burn_in = 5001)
}
\arguments{
\item{X}{A d-by-n matrix; d is the number of SNPs and n is the number of subjects. Each row of X should correspond to a particular SNP
and each column should correspond to a particular subject. Each element of X should give the number of minor alleles for the corresponding
SNP and subject. The function will center each row of X to have mean zero prior to running the Gibbs sampling algorithm.}

\item{Y}{A c-by-n matrix; c is the number of phenotypes (brain imaging measures) and n is the number of subjects. Each row of
Y should correspond to a particular phenotype and each column should correspond to a particular subject. Each element of Y should give
the measured value for the corresponding phentoype and subject. The function will center and scale each row of Y to have mean zero and unit
variance prior to running the Gibbs sampling algorithm.}

\item{group}{A vector of length d; d is the number of SNPs. Each element of this vector is a string representing a gene or group
label associated with each SNP. The SNPs represented by this vector should be ordered according to the rows of X.}

\item{tuning}{A string, either 'WAIC' or 'CV.mode'. If 'WAIC', the Gibbs sampler is run with fixed values of the tuning
parameters specified by the arguments \emph{lam_1_fixed} and  \emph{lam_2_fixed} and the WAIC is computed based on the sampling output. This
can then be used to choose optimal values for \emph{lam_1_fixed} and \emph{lam_2_fixed} based on multiple runs with each run using different
values of \emph{lam_1_fixed} and \emph{lam_2_fixed}. This option is best suited for either comparing a small set of tuning parameter values or
for computation on a high performance computing cluster where different nodes can be used to run the function with different
values of \emph{lam_1_fixed} and \emph{lam_2_fixed}. Posterior inference is then based on the run that produces the lowest value for the WAIC.
The option 'CV.mode', which is the default, is best suited for computation using just a single processor. In this case the
tuning parameters are chosen based on five-fold cross-validation over a grid of possible values with out-of-sample prediction based on an
approximate posterior mode. The Gibbs sampler is then run using the chosen values of the tuning parameters. When tuning = 'CV.mode' the values
for the arguments \emph{lam_1_fixed} and \emph{lam_2_fixed} are not required.}

\item{lam_1_fixed}{Only required if tuning = 'WAIC'. A positive number giving the value for the gene-specific tuning parameter. Larger values lead to a larger
degree of shrinkage to zero of estimated regression coefficients at the gene level (across all SNPs and phenotypes).}

\item{lam_2_fixed}{Only required if tuning = 'WAIC'. A positive number giving the value for the SNP-specific tuning parameter. Larger values lead to a larger
degree of shrinkage to zero of estimated regression coefficients at the SNP level (across all phenotypes).}

\item{iter_num}{Positive integer representing the total number of iterations to run the Gibbs sampler. Defaults to 10,000.}

\item{burn_in}{Nonnegative integer representing the number of MCMC samples to discard as burn-in. Defaults to 5001.}
}
\value{
A list with the elements
\item{WAIC}{If tuning = 'WAIC' this is the value of the WAIC computed from the MCMC output. If tuning = 'CV.mode' this component is excluded.}
\item{Gibbs_setup}{A list providing values for the input parameters of the function.}
\item{Gibbs_W_summaries}{A list with five components, each component being a d-by-c matrix giving some posterior summary of the regression parameter
matrix W, where the ij-th element of W represents the association between the i-th SNP and j-th phenotype.

-Gibbs_W_summaries$W_post_mean is a d-by-c matrix giving the posterior mean of W.



-Gibbs_W_summaries$W_post_mode is a d-by-c matrix giving the posterior mode of W.


-Gibbs_W_summaries$W_post_sd is a d-by-c matrix giving the posterior standard deviation for each element of W.



-Gibbs_W_summaries$W_2.5_quantile is a d-by-c matrix giving the posterior 2.5 percent quantile for each element of W.



-Gibbs_W_summaries$W_97.5_quantile is a d-by-c matrix giving the posterior 97.5 percent quantile for each element of W.'}
}
\description{
Runs the the Gibbs sampling algorithm to fit a Bayesian group sparse multi-task regression model.
Tuning parameters can be chosen using either the MCMC samples and the WAIC (multiple runs) or using an approximation to
the posterior mode and five-fold cross-validation (single run).
}
\examples{
data(bgsmtr_example_data)
names(bgsmtr_example_data)
\dontshow{
## Toy example with a small subset of the data for routine CRAN testing
## reduce the number of SNPs to 5, subjects to 5, phenotypes to 5
fit = bgsmtr(X = bgsmtr_example_data$SNP_data[1:5,1:5], Y = bgsmtr_example_data$BrainMeasures[1:5,1:5],
group = bgsmtr_example_data$SNP_groups[1:5], tuning = 'WAIC', lam_1_fixed = 2, lam_2_fixed = 2,
iter_num = 5, burn_in = 1)
}

\dontrun{
## test run the sampler for 100 iterations with fixed tunning parameters and compute WAIC
## we recomend at least 5,000 iterations for actual use
fit = bgsmtr(X = bgsmtr_example_data$SNP_data, Y = bgsmtr_example_data$BrainMeasures,
group = bgsmtr_example_data$SNP_groups, tuning = 'WAIC', lam_1_fixed = 2, lam_2_fixed = 2,
iter_num = 100, burn_in = 50)
## posterior mean for regression parameter relating 100th SNP to 14th phenotype
fit$Gibbs_W_summaries$W_post_mean[100,14]
## posterior mode for regression parameter relating 100th SNP to 14th phenotype
fit$Gibbs_W_summaries$W_post_mode[100,14]
## posterior standard deviation for regression parameter relating 100th SNP to 14th phenotype
fit$Gibbs_W_summaries$W_post_sd[100,14]
## 95\% equal-tail credible interval for regression parameter relating 100th SNP to 14th phenotype
c(fit$Gibbs_W_summaries$W_2.5_quantile[100,14],fit$Gibbs_W_summaries$W_97.5_quantile[100,14])
}

\dontrun{
## run the sampler for 10,000 iterations with tuning parameters set using cross-validation
## On a standard computer with a small numer of cores this is the recomended option
fit = bgsmtr(X = bgsmtr_example_data$SNP_data, Y = bgsmtr_example_data$BrainMeasures,
group = bgsmtr_example_data$SNP_groups, tuning = 'CV.mode',iter_num = 10000, burn_in = 5000)
}

}
\references{
Greenlaw, Keelin, Elena Szefer, Jinko Graham, Mary Lesperance, and Farouk S. Nathoo. "A Bayesian Group Sparse Multi-Task Regression Model for Imaging Genetics." arXiv preprint arXiv:1605.02234 (2016).

Nathoo, Farouk S., Keelin Greenlaw, and Mary Lesperance. "Regularization Parameter Selection for a Bayesian Multi-Level Group Lasso Regression Model with Application to Imaging Genomics." arXiv preprint arXiv:1603.08163 (2016).
}
\author{
Farouk S. Nathoo, \email{nathoo@uvic.ca}

Keelin Greenlaw  \email{keelingreenlaw@gmail.com}

Mary Lesperance  \email{mlespera@uvic.ca}
}
