% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/two-sample-stats.R
\name{two-sample-stats}
\alias{two-sample-stats}
\alias{stat_welch}
\alias{stat_student}
\alias{stat_t}
\alias{stat_fisher}
\alias{stat_f}
\alias{stat_mean}
\alias{stat_hotelling}
\alias{stat_bs}
\alias{stat_student_ip}
\alias{stat_t_ip}
\alias{stat_fisher_ip}
\alias{stat_f_ip}
\alias{stat_bg_ip}
\alias{stat_energy_ip}
\alias{stat_cq_ip}
\alias{stat_mod_ip}
\alias{stat_dom_ip}
\title{Test Statistics for the Two-Sample Problem}
\usage{
stat_welch(data, indices1, ...)

stat_student(data, indices1, ...)

stat_t(data, indices1, ...)

stat_fisher(data, indices1, ...)

stat_f(data, indices1, ...)

stat_mean(data, indices1, ...)

stat_hotelling(data, indices1, ...)

stat_bs(data, indices1, ...)

stat_student_ip(data, indices1, ...)

stat_t_ip(data, indices1, ...)

stat_fisher_ip(data, indices1, ...)

stat_f_ip(data, indices1, ...)

stat_bg_ip(data, indices1, ...)

stat_energy_ip(data, indices1, alpha = 1L, ...)

stat_cq_ip(data, indices1, ...)

stat_mod_ip(data, indices1, ...)

stat_dom_ip(data, indices1, standardize = TRUE, ...)
}
\arguments{
\item{data}{Either a list of the \code{n1 + n2} concatenated observations with the
original \code{n1} observations from the first sample on top and the original
\code{n2} observations from the second sample below. Or a dissimilarity matrix
stored as a \code{\link[stats]{dist}} object for all inter-point statistics
whose function name should end with \verb{_ip()}.}

\item{indices1}{An integer vector specifying the indices in \code{data} that are
considered to belong to the first sample.}

\item{...}{Extra parameters specific to some statistics.}

\item{alpha}{A scalar value specifying the power to which the dissimilarities
should be elevated in the computation of the inter-point energy statistic.
Default is \code{1L}.}

\item{standardize}{A boolean specifying whether the distance between medoids
in the \code{\link{stat_dom_ip}} function should be normalized by the
pooled corresponding variances. Default is \code{TRUE}.}
}
\value{
A real scalar giving the value of test statistic for the permutation
specified by the integer vector \code{indices}.
}
\description{
This is a collection of functions that provide test statistics to be used
into the permutation scheme for performing two-sample testing. These test
statistics can be divided into two categories: traditional statistics that
use empirical moments and inter-point statistics that only rely on pairwise
dissimilarities between data points.
}
\section{Traditional Test Statistics}{

\itemize{
\item \code{\link{stat_hotelling}} implements Hotelling's \eqn{T^2} statistic for
multivariate data with \eqn{p < n}.
\item \code{\link{stat_student}} or \code{\link{stat_t}} implements Student's
statistic (originally assuming equal variances and thus using the pooled
empirical variance estimator). See \code{\link[stats]{t.test}} for details.
\item \code{\link{stat_welch}} implements Student-Welch statistic which is
essentially a modification of Student's statistic accounting for unequal
variances. See \code{\link[stats]{t.test}} for details.
\item \code{\link{stat_fisher}} or \code{\link{stat_f}} implements Fisher's
variance ratio statistic. See \code{\link[stats]{var.test}} for details.
\item \code{\link{stat_mean}} implements a statistic that computes the difference
between the means.
\item \code{\link{stat_bs}} implements the statistic proposed by Bai & Saranadasa
(1996) for high-dimensional multivariate data.
}
}

\section{Inter-Point Test Statistics}{

\itemize{
\item \code{\link{stat_student_ip}} or \code{\link{stat_t_ip}} implements a
Student-like test statistic based on inter-point distances only as described
in Lovato et al. (2020).
\item \code{\link{stat_fisher_ip}} or \code{\link{stat_f_ip}} implements a
Fisher-like test statistic based on inter-point distances only as described
in Lovato et al. (2020).
\item \code{\link{stat_bg_ip}} implements the statistic proposed by Biswas &
Ghosh (2014).
\item \code{\link{stat_energy_ip}} implements the class of energy-based
statistics as described in Székely & Rizzo (2013);
\item \code{\link{stat_cq_ip}} implements the statistic proposed by Chen & Qin
(2010).
\item \code{\link{stat_mod_ip}} implements a statistic that computes the mean of
inter-point distances.
\item \code{\link{stat_dom_ip}} implements a statistic that computes the distance
between the medoids of the two samples, possibly standardized by the pooled
corresponding variances.
}
}

\examples{
n <- 10L
mx <- 0
sigma <- 1
delta <- 10
my <- mx + delta
x <- rnorm(n = n, mean = mx, sd = sigma)
y <- rnorm(n = n, mean = my, sd = sigma)
D <- dist(c(x, y))

x <- as.list(x)
y <- as.list(y)

stat_welch(c(x, y), 1:n)
stat_t(c(x, y), 1:n)
stat_f(c(x, y), 1:n)
stat_mean(c(x, y), 1:n)
stat_hotelling(c(x, y), 1:n)
stat_bs(c(x, y), 1:n)

stat_t_ip(D, 1:n)
stat_f_ip(D, 1:n)
stat_bg_ip(D, 1:n)
stat_energy_ip(D, 1:n)
stat_cq_ip(D, 1:n)
stat_mod_ip(D, 1:n)
stat_dom_ip(D, 1:n)
}
\references{
Bai, Z., & Saranadasa, H. (1996). Effect of high dimension: by an example of
a two sample problem. Statistica Sinica, 311-329.

Lovato, I., Pini, A., Stamm, A., & Vantini, S. (2020). Model-free two-sample
test for network-valued data. Computational Statistics & Data Analysis, 144,
106896.

Biswas, M., & Ghosh, A. K. (2014). A nonparametric two-sample test applicable
to high dimensional data. Journal of Multivariate Analysis, 123, 160-171.

Székely, G. J., & Rizzo, M. L. (2013). Energy statistics: A class of
statistics based on distances. Journal of statistical planning and inference,
143(8), 1249-1272.

Chen, S. X., & Qin, Y. L. (2010). A two-sample test for high-dimensional data
with applications to gene-set testing. The Annals of Statistics, 38(2),
808-835.
}
