% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/VCF_to_catalog_functions.R
\name{SplitStrelkaSBSVCF}
\alias{SplitStrelkaSBSVCF}
\title{Split an in-memory Strelka VCF into SBS, DBS, and variants involving
> 2 consecutive bases}
\usage{
SplitStrelkaSBSVCF(
  vcf.df,
  max.vaf.diff = 0.02,
  name.of.VCF = NULL,
  always.merge.SBS = FALSE
)
}
\arguments{
\item{vcf.df}{An in-memory data frame containing a Strelka VCF file contents.}

\item{max.vaf.diff}{The maximum difference of VAF, default value is 0.02. If
the absolute difference of VAFs for adjacent SBSs is bigger than
\code{max.vaf.diff}, then these adjacent SBSs are likely to be "merely"
asynchronous single base mutations, opposed to a simultaneous doublet
mutation or variants involving more than two consecutive bases. Use negative
value (e.g. -1) to suppress merging adjacent SBSs to DBS.}

\item{name.of.VCF}{Name of the VCF file.}

\item{always.merge.SBS}{If \code{TRUE} merge adjacent SBSs as DBSs
regardless of VAFs and regardless of the value of \code{max.vaf.diff}.}
}
\value{
A list of in-memory objects with the elements:

\enumerate{
   \item \code{SBS.vcf}: Data frame of pure SBS mutations -- no DBS or 3+BS
   mutations.

   \item \code{DBS.vcf}: Data frame of pure DBS mutations -- no SBS or 3+BS
   mutations.

   \item \code{discarded.variants}: \strong{Non-NULL only if} there are
   variants that were excluded from the analysis. See the added extra column
   \code{discarded.reason} for more details.

   }
}
\description{
SBSs are single base substitutions,
e.g. C>T, A>G,....  DBSs are double base substitutions,
e.g. CC>TT, AT>GG, ...  Variants involving > 2 consecutive
bases are rare, so this function just records them. These
would be variants such ATG>CCT, AGAT>TCTA, ...
}
\keyword{internal}
