% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/simReg.R
\name{sim.reg}
\alias{sim.reg}
\title{Similarity regression}
\usage{
sim.reg(hpo.terms, y, x = NULL, g = rep(0, length(y)), its = 10000,
  thin = 1, record_sims = FALSE,
  information.content = get.term.info.content(hpo.terms, hpo.phenotypes = x),
  ttsm = get.term.term.matrix(hpo.terms, information.content, TRUE),
  row_is_column_anc = get.term.descendancy.matrix(hpo.terms,
  names(information.content)), case_ids = unlist(mapply(SIMPLIFY = FALSE, FUN
  = rep, 0:(length(x) - 1), sapply(x, length))),
  term_ids = as.integer(match(unlist(x), colnames(row_is_column_anc))) - 1,
  return_tuning_runs = FALSE, tuning_its = 10000, tuning_burn = 1000,
  burn = 2000, gamma = (runif(1) < gamma_prior_prob), alpha_star = rnorm(n
  = 1, mean = alpha_star_mean, sd = alpha_star_sd), alpha = rnorm(n = 1, mean
  = alpha_mean, sd = alpha_sd), log_beta = rnorm(n = 1, mean = log_beta_mean,
  sd = log_beta_sd), phi = sample.int(n = ncol(row_is_column_anc), size = 3,
  replace = TRUE) - 1, logit_mean_f = rnorm(n = 1, mean = logit_mean_f_mean,
  sd = logit_mean_f_sd), log_alpha_plus_beta_f = rnorm(n = 1, mean =
  log_alpha_plus_beta_f_mean, sd = log_alpha_plus_beta_f_sd),
  logit_mean_g = rnorm(n = 1, mean = logit_mean_g_mean, sd = logit_mean_g_sd),
  log_alpha_plus_beta_g = rnorm(n = 1, mean = log_alpha_plus_beta_g_mean, sd =
  log_alpha_plus_beta_g_sd), gamma_prior_prob = 0.05, alpha_star_mean = 0,
  alpha_mean = 0, alpha_star_sd = 5, alpha_sd = 5, log_beta_mean = 2,
  log_beta_sd = 1, logit_mean_f_mean = 1, logit_mean_f_sd = 1,
  log_alpha_plus_beta_f_mean = 2, log_alpha_plus_beta_f_sd = 1,
  logit_mean_g_mean = 0, logit_mean_g_sd = 1.5,
  log_alpha_plus_beta_g_mean = 2, log_alpha_plus_beta_g_sd = 1,
  alpha_star_proposal_sd = 2, alpha_proposal_sd = 2,
  log_beta_proposal_sd = 2, logit_mean_f_proposal_sd = 2,
  log_alpha_plus_beta_f_proposal_sd = 2, logit_mean_g_proposal_sd = 2,
  log_alpha_plus_beta_g_proposal_sd = 2,
  phi_jumps = c(0:(ncol(row_is_column_anc) - 1), rep(match(unlist(lapply(x[y],
  get.ancestors, hpo.terms = hpo.terms)), colnames(row_is_column_anc)) - 1,
  50)), pseudo_phi_marginal_prior = c(0:(ncol(row_is_column_anc) - 1),
  rep(match(unlist(lapply(x[y], get.ancestors, hpo.terms = hpo.terms)),
  colnames(row_is_column_anc)) - 1, 50)), phi_num_leaves_geometric_rate = 1,
  lit_sims = setNames(rep(1, ncol(row_is_column_anc)), colnames(ttsm)))
}
\arguments{
\item{hpo.terms}{R-Object representation of HPO}

\item{y}{Logical vector of genotypes (typically 1 for rare genotype, 0 for common genotype)}

\item{x}{List of character vectors of HPO phenotypes of cases}

\item{g}{Genotype log odds offsets per individual}

\item{its}{Number of update cycles to perform}

\item{thin}{Factor by which to thin resultant chains of parameter samples}

\item{record_sims}{Logical indicating whether to record trace of similarities}

\item{information.content}{Numeric vector, named by HPO IDs, containing the information content of corresponding terms}

\item{ttsm}{The `term-term' similarity matrix, a numeric matrix whose dimensions are named by the terms so that cell i,j contains the similarity of term i to term j}

\item{row_is_column_anc}{Logical matrix, whose dimensions are named by HPO term IDs so that cell i,j is TRUE if i is an ancestor term of j}

\item{case_ids}{IDs for the N cases from 0 to N-1, indicating which case terms in \code{term_ids} belong to (automatically determined given x)}

\item{term_ids}{Vector of HPO term IDs belonging to cases}

\item{return_tuning_runs}{Logical indicating whether to return the MCMC output of the tuning phase of the inference procedure}

\item{tuning_its}{Number of update cycles to perform in the tuning phase of the inference procedure}

\item{tuning_burn}{Number of update cycles to discard in tuning phase}

\item{burn}{Number of update cycles to discard}

\item{gamma}{Initial value of model selection indicator gamma.}

\item{alpha_star}{Initial value of alpha_star, the rate of observing the rare genotype y = 1 under gamma = 0, i.e. the no association model}

\item{alpha}{Initial value of alpha, the background rate of observing the rare genotype under gamma = 1}

\item{log_beta}{Initial value of log_beta, the log of the effect size of onotological similarity}

\item{phi}{Character vector of HPO term IDs giving the initial value of phi, the characteristic phenotype}

\item{logit_mean_f}{Initial value of logit_mean_f}

\item{log_alpha_plus_beta_f}{Initial value of log_alpha_plus_beta_f}

\item{logit_mean_g}{Initial value of logit_mean_g}

\item{log_alpha_plus_beta_g}{Initial value of log_alpha_plus_beta_g}

\item{gamma_prior_prob}{Prior probability of gamma = 1}

\item{alpha_star_mean}{Prior mean of alpha_star given gamma = 0}

\item{alpha_mean}{Prior mean of alpha given gamma = 1}

\item{alpha_star_sd}{Prior sd of alpha_star given gamma = 0}

\item{alpha_sd}{Prior sd of alpha given gamma = 1}

\item{log_beta_mean}{Prior mean of log_beta given gamma = 1}

\item{log_beta_sd}{Prior sd of log_beta given gamma = 1}

\item{logit_mean_f_mean}{Prior mean of logit_mean_f given gamma = 1}

\item{logit_mean_f_sd}{Prior sd of logit_mean_f given gamma = 1}

\item{log_alpha_plus_beta_f_mean}{Prior mean of log_alpha_plus_beta_f given gamma = 1}

\item{log_alpha_plus_beta_f_sd}{Prior sd of log_alpha_plus_beta_f given gamma = 1}

\item{logit_mean_g_mean}{Prior mean of logit_mean_g given gamma = 1}

\item{logit_mean_g_sd}{Prior sd of logit_mean_g given gamma = 1}

\item{log_alpha_plus_beta_g_mean}{Prior mean of log_alpha_plus_beta_g given gamma = 1}

\item{log_alpha_plus_beta_g_sd}{Prior sd of log_alpha_plus_beta_g given gamma = 1}

\item{alpha_star_proposal_sd}{Proposal sd of local jumps in MH updates of alpha_star used during inference}

\item{alpha_proposal_sd}{Proposal sd of local jumps in MH updates of alpha used during inference}

\item{log_beta_proposal_sd}{Proposal sd of local jumps in MH updates of log_beta used during inference}

\item{logit_mean_f_proposal_sd}{Proposal sd of local jumps in MH updates of logit_mean_f used during inference}

\item{log_alpha_plus_beta_f_proposal_sd}{Proposal sd of local jumps in MH updates of log_alpha_plus_beta_f used during inference}

\item{logit_mean_g_proposal_sd}{Proposal sd of local jumps in MH updates of logit_mean_g used during inference}

\item{log_alpha_plus_beta_g_proposal_sd}{Proposal sd of local jumps in MH updates of log_alpha_plus_beta_g used during inference}

\item{phi_jumps}{Vector of HPO term IDs to be used as jumping distribution for proposal replacements of terms in phi during inference given gamma = 1}

\item{pseudo_phi_marginal_prior}{Vector of HPO term IDs to be used as prior distribution on marginal probability of single term in phi given gamma = 0}

\item{phi_num_leaves_geometric_rate}{Geometric parameter for truncated geometric distribution on number of leaf terms in phi}

\item{lit_sims}{Numeric vector of similarities (greater than 0) of literature phenotype to individual terms (named by term ID)}
}
\value{
List (by parameter) of vectors of consecutive parameter samples from MCMC inference.
}
\description{
Performins Bayesian `similarity regression' on given binary genotype \code{y} (logical vector) against HPO encoded phenotype \code{x} (list of character vectors of HPO term IDs). It returns a list of traces for the various estimated parameters. Of particular interest are the estimated mean posterior of \code{gamma} (the model selection indicator, thus giving an estimate of the probability of an association under the model assumptions - obtained with \code{mean(result$gamma)}) and the posterior distribution of the characteristic phenotype (which can be visualised by the functions \code{\link{hpo.plot.marginal.freqs}}, \code{\link{two.term.marginals.plot}}, and \code{\link{single.term.marginals.plot}}).
}
\examples{
\dontrun{
set.seed(0)
data(hpo.terms)
disease.terms <- c("HP:0005537", "HP:0000729", "HP:0001873")
all.terms <- Filter(x=get.ancestors(hpo.terms,
	c(disease.terms, sample(hpo.terms$id, size=50))),
	f=function(tm) "HP:0000001" \%in\% hpo.terms$ancestors[[tm]])
y <- c(rep(FALSE, 96), rep(TRUE, 3))
x <- lapply(y, function(.y) clean.terms(
	hpo.terms, if (!.y) sample(all.terms, size=3) else
		c(sample(all.terms, size=1), disease.terms[runif(n=3) < 0.8])))
sim.reg.out <- sim.reg(hpo.terms, x=x, y=y)
mean(sim.reg.out$gamma)
hpo.plot.marginal.freqs(hpo.terms,
	get.term.descendancy.matrix(hpo.terms, all.terms),
	sim.reg.out$phi[sim.reg.out$gamma,])
}
}

