% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TermDocumentMatrix.R
\docType{methods}
\name{as.TermDocumentMatrix}
\alias{as.TermDocumentMatrix}
\alias{as.DocumentTermMatrix}
\alias{as.TermDocumentMatrix,character-method}
\alias{as.DocumentTermMatrix,character-method}
\alias{as.TermDocumentMatrix,bundle-method}
\alias{as.DocumentTermMatrix,bundle-method}
\alias{as.TermDocumentMatrix,partition_bundle-method}
\alias{as.DocumentTermMatrix,partition_bundle-method}
\alias{as.DocumentTermMatrix,context-method}
\alias{as.TermDocumentMatrix,context-method}
\title{Generate TermDocumentMatrix / DocumentTermMatrix.}
\usage{
as.TermDocumentMatrix(x, ...)

\S4method{as.TermDocumentMatrix}{character}(x, p_attribute, s_attribute,
  verbose = TRUE, ...)

\S4method{as.DocumentTermMatrix}{character}(x, p_attribute, s_attribute,
  verbose = TRUE, ...)

\S4method{as.TermDocumentMatrix}{bundle}(x, col, p_attribute = NULL,
  verbose = TRUE, ...)

\S4method{as.DocumentTermMatrix}{bundle}(x, col, p_attribute = NULL,
  verbose = TRUE, ...)

\S4method{as.TermDocumentMatrix}{partition_bundle}(x, p_attribute = NULL,
  col = NULL, verbose = TRUE, ...)

\S4method{as.DocumentTermMatrix}{partition_bundle}(x, p_attribute = NULL,
  col = NULL, verbose = TRUE, ...)

\S4method{as.DocumentTermMatrix}{context}(x, p_attribute, verbose = TRUE, ...)

\S4method{as.TermDocumentMatrix}{context}(x, p_attribute, verbose = TRUE, ...)
}
\arguments{
\item{x}{a \code{character} vector indicating a corpus, or an object of class
\code{bundle}, or inheriting from class \code{bundle} (e.g. \code{partition_bundle})}

\item{...}{s-attribute definitions used for subsetting the corpus, compare partition-method}

\item{p_attribute}{p-attribute counting is be based on}

\item{s_attribute}{s-attribute that defines content of columns, or rows}

\item{verbose}{logial, whether to output progress messages}

\item{col}{the column of \code{data.table} in slot \code{stat} (if \code{x}
is a \code{bundle}) to use of assembling the matrix}
}
\value{
a TermDocumentMatrix
}
\description{
Methods to generate the classes \code{TermDocumentMatrix} or 
\code{DocumentTermMatrix} as defined in the \code{tm} package. These classes
inherit from the \code{simple_triplet_matrix}-class defined in the 
\code{slam}-package. There are many text mining applications for 
document-term matrices. A \code{DocumentTermMatrix} is required as input by
the \code{topicmodels} package, for instance.
}
\details{
The method can be applied on objects of the class 
\code{character}, \code{bundle}, or classes inheriting from the
\code{bundle} class.

If \code{x} refers to a corpus (i.e. is a length 1 character vector), a
\code{TermDocumentMatrix}, or \code{DocumentTermMatrix} will be generated for
subsets of the corpus based on the \code{s_attribute} provided. Counts are
performed for the \code{p_attribute}. Further parameters provided (passed in
as \code{...} are interpreted as s-attributes that define a subset of the
corpus for splitting it according to \code{s_attribute}. If struc values for
\code{s_attribute} are not unique, the necessary aggregation is performed, slowing
things somewhat down.

If \code{x} is a \code{bundle} or a class inheriting from it, the counts or
whatever measure is present in the \code{stat} slots (in the column
indicated by \code{col}) will be turned into the values of the sparse
matrix that is generated. A special case is the generation of the sparse
matrix based on a \code{partition_bundle} that does not yet include counts.
In this case, a \code{p_attribute} needs to be provided. Then counting will
be performed, too.
}
\examples{
use("polmineR")
 
# do-it-yourself 
p <- partition("GERMAPARLMINI", date = ".*", regex = TRUE)
pB <- partition_bundle(p, s_attribute = "date")
pB <- enrich(pB, p_attribute="word")
tdm <- as.TermDocumentMatrix(pB, col = "count")
   
 # leave the counting to the as.TermDocumentMatrix-method
pB2 <- partition_bundle(p, s_attribute = "date")
tdm <- as.TermDocumentMatrix(pB2, p_attribute = "word", verbose = TRUE)
   
# diretissima
tdm <- as.TermDocumentMatrix("GERMAPARLMINI", p_attribute = "word", s_attribute = "date")
}
\author{
Andreas Blaette
}
