% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/textstat_keyness.R
\name{textstat_keyness}
\alias{textstat_keyness}
\title{calculate keyness statistics}
\usage{
textstat_keyness(x, target = 1L, measure = c("chi2", "exact", "lr"),
  sort = TRUE)
}
\arguments{
\item{x}{a \link{dfm} containing the features to be examined for keyness}

\item{target}{the document index (numeric, character or logical) identifying the 
document forming the "target" for computing keyness; all other documents' 
feature frequencies will be combined for use as a reference}

\item{measure}{(signed) association measure to be used for computing keyness.
Currenly available: \code{"chi2"} (\eqn{chi^2} with Yates correction); 
\code{"exact"} (Fisher's exact test); \code{"lr"} for the likelihood ratio
\eqn{G} statistic with Yates correction.}

\item{sort}{logical; if \code{TRUE} sort features scored in descending order 
of the measure, otherwise leave in original feature order}
}
\value{
a data.frame of computed statistics and associated p-values, where the features 
scored name each row.  
  For \code{measure = "chi2"} this is the chi-squared value, signed 
  positively if the observed value in the target exceeds its expected value; 
  for \code{measure = "exact"} this is the estimate of the odds ratio; for 
  \code{measure = "lr"} this is the likelihood ratio \eqn{G} statistic.
}
\description{
calculate keyness statistics
}
\examples{
# compare pre- v. post-war terms using grouping
period <- ifelse(docvars(data_corpus_inaugural, "Year") < 1945, "pre-war", "post-war")
mydfm <- dfm(data_corpus_inaugural, groups = period)
head(mydfm) # make sure 'post-war' is in the first row
head(result <- textstat_keyness(mydfm), 10)
tail(result, 10)

# compare pre- v. post-war terms using logical vector
mydfm2 <- dfm(data_corpus_inaugural)
textstat_keyness(mydfm2, docvars(data_corpus_inaugural, "Year") >= 1945)

# compare Trump 2017 to other post-war preseidents
pwdfm <- dfm(corpus_subset(data_corpus_inaugural, period == "post-war"))
head(textstat_keyness(pwdfm, target = "2017-Trump"), 10)
# using the likelihood ratio method
head(textstat_keyness(dfm_smooth(pwdfm), measure = "lr", target = "2017-Trump"), 10)
}
\references{
Bondi, Marina, and Mike Scott, eds. 2010.  \emph{Keyness in 
  Texts}. Amsterdam, Philadelphia: John Benjamins, 2010.
  
  Stubbs, Michael. 2010.  "Three Concepts of Keywords". In \emph{Keyness in 
  Texts}, Marina Bondi and Mike Scott, eds. pp21–42. Amsterdam, Philadelphia:
  John Benjamins.
  
  Scott, M. & Tribble, C. 2006.  \emph{Textual Patterns: keyword and corpus 
  analysis in language education}.  Amsterdam: Benjamins, p. 55.
  
  Dunning, Ted. 1993. "Accurate Methods for the Statistics of Surprise and Coincidence", 
  \emph{Computational Linguistics}, Vol 19, No. 1, pp. 61-74.
}
\keyword{textstat}

