% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extractor.R
\name{extractor}
\alias{extractor}
\alias{extract_taxids}
\alias{extract_kraken_output}
\alias{extract_kraken_reads}
\title{Extract reads and output from Kraken}
\usage{
extract_taxids(
  kraken_report,
  taxon = c("d__Bacteria", "d__Fungi", "d__Viruses")
)

extract_kraken_output(
  kraken_out,
  taxids,
  odir,
  ofile = "kraken_microbiome_output.txt",
  ...
)

extract_kraken_reads(
  kraken_out,
  reads,
  ofile = NULL,
  odir = getwd(),
  threads = NULL,
  ...,
  envpath = NULL,
  seqkit = NULL
)
}
\arguments{
\item{kraken_report}{The path to kraken report file.}

\item{taxon}{An atomic character specify the taxa name wanted. Should follow
the kraken style, connected by rank codes, two underscores, and the
scientific name of the taxon (e.g., "d__Viruses")}

\item{kraken_out}{The path to kraken output file.}

\item{taxids}{A character specify NCBI taxonony identifier to extract.}

\item{odir}{A string of directory to save the \code{ofile}.}

\item{ofile}{A string of file save the kraken output of specified \code{taxids}.}

\item{...}{\itemize{
\item \code{extract_kraken_output}: Additional arguments passed to
\code{\link[polars:IO_sink_csv]{sink_csv()}}.
\item \code{extract_kraken_reads}: Additional arguments passed to
\code{\link[blit:cmd_run]{cmd_run()}} method.
}}

\item{reads}{The original fastq files (input in \code{kraken2}). You can pass
two paired-end files directly.}

\item{threads}{Number of threads to use, see
\code{blit::cmd_help(blit::seqkit("grep"))}.}

\item{envpath}{A string of path to be added to the environment variable
\code{PATH}.}

\item{seqkit}{A string of path to \code{seqkit} command.}
}
\value{
\itemize{
\item \code{extract_taxids}: An atomic character vector of taxon identifiers.
}

\itemize{
\item \code{extract_kraken_output}: A polars \link[polars:DataFrame_class]{DataFrame}.
}

\itemize{
\item \code{extract_kraken_reads}: Exit status invisiblely.
}
}
\description{
Extract reads and output from Kraken
}
\examples{
\dontrun{
# For 10x Genomic data, `fq1` only contain barcode and umi, but the official
# didn't give any information for this. In this way, I prefer using
# `umi-tools` to transform the `umi` into fq2 and then run `rsahmi` with
# only fq2.
blit::kraken2(
    fq1 = fq1,
    fq2 = fq2,
    classified_out = "classified.fq",
    # Number of threads to use
    blit::arg("--threads", 10L, format = "\%d"),
    # the kraken database
    blit::arg("--db", kraken_db),
    "--use-names", "--report-minimizer-data",
) |> blit::cmd_run()

# `kraken_report` should be the output of `blit::kraken2()`
taxids <- extract_taxids(kraken_report = "kraken_report.txt")

# 1. `kraken_out` should be the output of `blit::kraken2()`
# 2. `taxids` should be the output of `extract_taxids()`
# 3. `odir`: the output directory
extract_kraken_output(
    kraken_out = "kraken_output.txt",
    taxids = taxids,
    odir = # specify the output directory
)

# 1. `kraken_out` should be the output of `extract_kraken_output()`
# 2. `fq1` and `fq2` should be the same with `blit::kraken2()`
extract_kraken_reads(
    kraken_out = "kraken_microbiome_output.txt",
    reads = c(fq1, fq2),
    threads = 10L, # Number of threads to use
    # try to change `seqkit` argument into your seqkit path. If `NULL`, the
    # internal will detect it in your `PATH` environment variable
    seqkit = NULL
)
}
}
\seealso{
\url{https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown}
}
