% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read.R, R/read_feats.R, R/read_seqs.R
\name{read_tracks}
\alias{read_tracks}
\alias{read_feats}
\alias{read_subfeats}
\alias{read_links}
\alias{read_sublinks}
\alias{read_seqs}
\title{Read files in various standard formats (FASTA, GFF3, GBK, BED, BLAST, ...) into track tables}
\usage{
read_feats(files, .id = "file_id", format = NULL, parser = NULL, ...)

read_subfeats(files, .id = "file_id", format = NULL, parser = NULL, ...)

read_links(files, .id = "file_id", format = NULL, parser = NULL, ...)

read_sublinks(files, .id = "file_id", format = NULL, parser = NULL, ...)

read_seqs(
  files,
  .id = "file_id",
  format = NULL,
  parser = NULL,
  parse_desc = TRUE,
  ...
)
}
\arguments{
\item{files}{files to reads. Should all be of same format. In many cases,
compressed files (\code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip}) are supported.
Similarly, automatic download of remote files starting with \verb{http(s)://} or
\verb{ftp(s)://} works in most cases.}

\item{.id}{the column with the name of the file a record was read from.
Defaults to "file_id". Set to "bin_id" if every file represents a different
bin.}

\item{format}{specify a format known to gggenomes, such as \code{gff3}, \code{gbk}, ...
to overwrite automatic determination based on the file extension (see
\code{\link[=def_formats]{def_formats()}} for full list).}

\item{parser}{specify the name of an R function to overwrite automatic
determination based on format, e.g. \code{parser="read_tsv"}.}

\item{...}{additional arguments passed on to the format-specific read
function called down the line.}

\item{parse_desc}{turn \verb{key=some value} pairs from \code{seq_desc} into \code{key}-named
columns and remove them from \code{seq_desc}.}
}
\value{
A gggenomes-compatible sequence, feature or link tibble

tibble with features

tibble with features

tibble with links

tibble with links

tibble with sequence information
}
\description{
Convenience functions to read sequences, features or links from various
bioinformatics file formats, such as FASTA, GFF3, Genbank, BLAST tabular
output, etc. See \code{\link[=def_formats]{def_formats()}} for full list. File formats and the
corresponding read-functions are automatically determined based on file
extensions. All these functions can read multiple files in the same format at
once, and combine them into a single table - useful, for example, to read a
folder of gff-files with each file containing genes of a different genome.
}
\section{Functions}{
\itemize{
\item \code{read_feats()}: read files as features mapping onto
sequences.

\item \code{read_subfeats()}: read files as subfeatures mapping onto other features

\item \code{read_links()}: read files as links connecting sequences

\item \code{read_sublinks()}: read files as sublinks connecting features

\item \code{read_seqs()}: read sequence ID, description and length.

}}
\examples{
# read genes/features from a gff file
read_feats(ex("eden-utr.gff"))


# read all gff files from a directory
read_feats(list.files(ex("emales/"), "*.gff$", full.names = TRUE))


# read remote files
\donttest{
gbk_phages <- c(
  PSSP7 = paste0(
    "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/",
    "000/858/745/GCF_000858745.1_ViralProj15134/",
    "GCF_000858745.1_ViralProj15134_genomic.gff.gz"
  ),
  PSSP3 = paste0(
    "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/",
    "000/904/555/GCF_000904555.1_ViralProj195517/",
    "GCF_000904555.1_ViralProj195517_genomic.gff.gz"
  )
)
read_feats(gbk_phages)
}

# read sequences from a fasta file.
read_seqs(ex("emales/emales.fna"), parse_desc = FALSE)

# read sequence info from a fasta file with `parse_desc=TRUE` (default). `key=value`
# pairs are removed from `seq_desc` and parsed into columns with `key` as name
read_seqs(ex("emales/emales.fna"))

# read sequence info from samtools/seqkit style index
read_seqs(ex("emales/emales.fna.seqkit.fai"))

# read sequence info from multiple gff file
read_seqs(c(ex("emales/emales.gff"), ex("emales/emales-tirs.gff")))

}
