% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sjPlotClusterAnalysis.R
\name{sjc.kgap}
\alias{sjc.kgap}
\title{Compute gap statistics for k-means-cluster}
\usage{
sjc.kgap(x, max = 10, B = 100, SE.factor = 1, method = "Tibs2001SEmax",
  plotResults = TRUE)
}
\arguments{
\item{x}{matrix, where rows are observations and columns are individual dimensions, 
to compute and plot the gap statistic (according to a uniform reference distribution).}

\item{max}{maximum number of clusters to consider, must be at least two. Default
is 10.}

\item{B}{integer, number of Monte Carlo ("bootstrap") samples. Default is 100.}

\item{SE.factor}{[When \code{method} contains "SE"] Determining the optimal 
number of clusters, Tibshirani et al. proposed the "1 S.E."-rule. 
Using an SE.factor f, the "f S.E."-rule is used, more generally.}

\item{method}{character string indicating how the "optimal" number of clusters, 
k^, is computed from the gap statistics (and their standard deviations), 
or more generally how the location k^ of the maximum of f[k] should be 
determined. Default is \code{"Tibs2001SEmax"}. Possible value are:
\describe{
  \item{\code{"globalmax"}}{simply corresponds to the global maximum, i.e., is which.max(f).}
  \item{\code{"firstmax"}}{gives the location of the first local maximum.}
  \item{\code{"Tibs2001SEmax"}}{uses the criterion, Tibshirani et al(2001) proposed: "the smallest k such that f(k) >= f(k+1) - s_{k+1}". Note that this chooses k = 1 when all standard deviations are larger than the differences f(k+1) - f(k).}
  \item{\code{"firstSEmax"}}{is the location of the first f() value which is not larger than the first local maximum minus SE.factor * SE.f[], i.e, within an "f S.E." range of that maximum (see also SE.factor).}
  \item{\code{"globalSEmax"}}{(used in Dudoit and Fridlyand (2002), supposedly following Tibshirani's proposition) is the location of the first f() value which is not larger than the global maximum minus SE.factor * SE.f[], i.e, within an "f S.E." range of that maximum (see also SE.factor).}
  }}

\item{plotResults}{logical, if \code{TRUE} (default), a graph visualiting the gap statistic will
be plotted. Use \code{FALSE} to omit the plot.}
}
\value{
An object containing the used data frame for plotting, the ggplot object
          and the number of found cluster.
}
\description{
An implementation of the gap statistic algorithm from Tibshirani, Walther, and Hastie's
               "Estimating the number of clusters in a data set via the gap statistic".
               This function calls the \code{\link[cluster]{clusGap}}-function of the
               \pkg{cluster}-package to calculate the data for the plot.
}
\examples{
\dontrun{
# plot gap statistic and determine best number of clusters
# in mtcars dataset
sjc.kgap(mtcars)

# and in iris dataset
sjc.kgap(iris[,1:4])}

}
\references{
\itemize{
             \item Tibshirani R, Walther G, Hastie T (2001) Estimating the number of clusters in a data set via gap statistic. J. R. Statist. Soc. B, 63, Part 2, pp. 411-423
             \item Maechler, M., Rousseeuw, P., Struyf, A., Hubert, M., Hornik, K.(2013). cluster: Cluster Analysis Basics and Extensions. R package version 1.14.4. (\href{https://cran.r-project.org/package=cluster}{web})
            }
}
\seealso{
\code{\link{sjc.elbow}}
}
