% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/construct_ngrams.R
\name{construct_ngrams}
\alias{construct_ngrams}
\title{Construct and filter n-grams}
\usage{
construct_ngrams(
  target,
  seq,
  u,
  n_max,
  conf_level = 0.95,
  gap = TRUE,
  use_heuristics = TRUE
)
}
\arguments{
\item{target}{\code{integer} vector with target information (e.g. class labels).}

\item{seq}{a vector or matrix describing sequence(s).}

\item{u}{\code{integer}, \code{numeric} or \code{character} vector of all
possible unigrams.}

\item{n_max}{size of constructed n-grams.}

\item{conf_level}{confidence level.}

\item{gap}{\code{logical}, if \code{TRUE} gaps are used. See Details.}

\item{use_heuristics, }{if \code{FALSE} then all n-grams are tested. This may
slow down computations significantly}
}
\value{
a vector of n-grams.
}
\description{
Builds and selects important n-grams stepwise.
}
\details{
\code{construct_ngrams} starts by 
extracting unigrams from the sequences, pasting them together in all combination and 
choosing from them significant features (with p-value below \code{conf_level}). The 
chosen n-grams are further extended to the specified by \code{n_max} size by pasting 
unigrams at both ends.

The \code{gap} parameter determines if \code{construct_ngrams} performs the
feature selection on exact n-grams (\code{gap} equal to FALSE) or on all features in the 
Hamming distance 1 from the n-gram (\code{gap} equal to TRUE).
}
\examples{
# to make the example faster, we run construct_ngrams() on the 
# subset of data
deg_seqs <- degenerate(human_cleave[c(1L:100, 801L:900), 1L:9],
list(`1` = c(1, 6, 8, 10, 11, 18),
     `2` = c(2, 13, 14, 16, 17),
     `3` = c(5, 19, 20),
     `4` = c(7, 9, 12, 15),
     '5' = c(3, 4)))
bigrams <- construct_ngrams(human_cleave[c(1L:100, 801L:900), "tar"], deg_seqs, 1L:5, 2)
}
\seealso{
Feature filtering method: \code{\link{test_features}}.
}
