% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/multidog.R
\name{multidog}
\alias{multidog}
\title{Fit \code{\link{flexdog}} to multiple SNP's.}
\usage{
multidog(
  refmat,
  sizemat,
  ploidy,
  model = c("norm", "hw", "bb", "ash", "s1", "s1pp", "f1", "f1pp", "flex", "uniform",
    "custom"),
  nc = 1,
  p1_id = NULL,
  p2_id = NULL,
  bias_init = exp(c(-1, -0.5, 0, 0.5, 1)),
  outliers = FALSE,
  prior_vec = NULL,
  ...
)
}
\arguments{
\item{refmat}{A matrix of reference read counts. The columns index
the individuals and the rows index the markers (SNP's). This matrix must have
rownames (for the names of the markers) and column names (for the names
of the individuals). These names must match the names in \code{sizemat}.}

\item{sizemat}{A matrix of total read counts. The columns index
the individuals and the rows index the markers (SNP's). This matrix must have
rownames (for the names of the markers) and column names (for the names
of the individuals). These names must match the names in \code{refmat}.}

\item{ploidy}{The ploidy of the species. Assumed to be the same for each
individual.}

\item{model}{What form should the prior (genotype distribution) take?
See Details for possible values.}

\item{nc}{The number of computing cores to use. This should never be
more than the number of cores available in your computing environment.
You can determine the maximum number of available cores by running
\code{parallel::detectCores()} in R.}

\item{p1_id}{The ID of the first parent. This should be a character of
length 1. This should correspond to a single column name in \code{refmat}
and \code{sizemat}.}

\item{p2_id}{The ID of the second parent. This should be a character of
length 1. This should correspond to a single column name in \code{refmat}
and \code{sizemat}.}

\item{bias_init}{A vector of initial values for the bias parameter
over the multiple runs of \code{flexdog_full}.}

\item{outliers}{A logical. Should we allow for the inclusion of outliers
(\code{TRUE}) or not (\code{FALSE}). Only supported when
\code{model = "f1"} or \code{model = "s1"}. I wouldn't
recommend it for any other model anyway.}

\item{prior_vec}{The pre-specified genotype distribution. Only used if
\code{model = "custom"} and must otherwise be \code{NULL}. If specified,
then it should be a vector of length \code{ploidy + 1} with
non-negative elements that sum to 1.}

\item{...}{Additional parameters to pass to \code{\link{flexdog_full}}.}
}
\value{
A list-like object of two data frames.
\describe{
\item{\code{snpdf}}{A data frame containing properties of the SNP's (markers).
    The rows index the SNP's. The variables include:
    \describe{
    \item{\code{snp}}{The name of the SNP (marker).}
    \item{\code{bias}}{The estimated allele bias of the SNP.}
    \item{\code{seq}}{The estimated sequencing error rate of the SNP.}
    \item{\code{od}}{The estimated overdispersion parameter of the SNP.}
    \item{\code{prop_mis}}{The estimated proportion of individuals
        misclassified in the SNP.}
    \item{\code{num_iter}}{The number of iterations performed during
        the EM algorithm for that SNP.}
    \item{\code{llike}}{The maximum marginal likelihood of the SNP.}
    \item{\code{ploidy}}{The provided ploidy of the species.}
    \item{\code{model}}{The provided model for the prior genotype
        distribution.}
    \item{\code{Pr_k}}{The estimated frequency of individuals with genotype
        k, where k can be any integer between 0 and the ploidy level.}
    \item{Model specific parameter estimates}{See the return value of
        \code{par} in the help page of \code{\link{flexdog}}.}
    }}
\item{\code{inddf}}{A data frame containing the properties of the
    individuals at each SNP. The variables include:
    \describe{
    \item{\code{snp}}{The name of the SNP (marker).}
    \item{\code{ind}}{The name of the individual.}
    \item{\code{ref}}{The provided reference counts for that individual at
         that SNP.}
    \item{\code{size}}{The provided total counts for that individual at
         that SNP.}
    \item{\code{geno}}{The posterior mode genotype for that individual at
         that SNP. This is the estimated reference allele dosage for a
         given individual at a given SNP.}
    \item{\code{postmean}}{The posterior mean genotype for that individual
         at that SNP. This is a continuous genotype estimate of the
         reference allele dosage for a given individual at a given SNP.}
    \item{\code{maxpostprob}}{The maximum posterior probability. This
         is the posterior probability that the individual was genotyped
         correctly.}
    \item{\code{Pr_k}}{The posterior probability that a given individual
         at a given SNP has genotype k, where k can vary from 0 to the
         ploidy level of the species.}
    }}
}
}
\description{
This is a convenience function that will run \code{\link{flexdog}} over many SNP's.
Support is provided for parallel computing through the doParallel package.
This function has not been extensively tested. Please report any bugs to
\url{http://github.com/dcgerard/updog/issues}.
}
\details{
You should format your reference counts and total read counts in two
separate matrices. The rows should index the markers (SNP's) and the
columns should index the individuals. Row names are how we ID the SNP's
and column names are how we ID the individuals, and so they are required
attributes.

See the details of \code{\link{flexdog}} for the possible values of
\code{model}.

If \code{model = "f1"}, \code{model = "f1pp"}, \code{model = "s1"},
or \code{model = "s1pp"} then the user may provide the individual ID
for parent(s) via the \code{p1_id} and \code{p2_id} arguments.

The output is a list containing two data frames. The first data frame,
called \code{snpdf}, contains information on each SNP, such as the allele bias
and the sequencing error rate. The second data frame, called \code{inddf},
contains information on each individual at each SNP, such as the estimated
genotype and the posterior probability of being classified correctly.

Using an \code{nc} value greater than \code{1} will allow you to
run \code{\link{flexdog}} in parallel. Only set \code{nc} greater than
\code{1} if you are sure you have access to the proper number of cores.
The upper bound on the value of \code{nc} you should try can be determined
by running \code{parallel::detectCores()} in R.

SNP's that contain 0 reads (or all missing data) are entirely removed.
}
\examples{
\dontrun{
data("uitdewilligen")
mout <- multidog(refmat = t(uitdewilligen$refmat),
                 sizemat = t(uitdewilligen$sizemat),
                 ploidy = uitdewilligen$ploidy,
                 nc = 2)
mout$inddf
mout$snpdf
}

}
\author{
David Gerard
}
