% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Cluster.R
\name{Cluster}
\alias{Cluster}
\title{K-means Clustering}
\usage{
Cluster(var, weights, nclusters = NULL, index = "sdindex", posdates = 1)
}
\arguments{
\item{var}{An array with any number of dimensions, one of them (the 
'posdates'th) corresponding to time with either area-averages over a 
series of domains or the grid points for any sptial grid structure (x), 
(y), (z), (x,y), (x,y,z), (y,z), ...}

\item{weights}{A vector/array of multiplicative weights based on the areas 
covering each domain/region or grid-cell of var; the dimensions of weights 
vector must be equal to the dimensions of 'var' without the 
'posdates'th dimension.}

\item{nclusters}{This is positive integer K that must be bigger than 1. 
K is the number of clusters to be computed, or K initial cluster centers 
to be used in the method. Default is NULL and then user has to specify 
which index from NbClust and the associated criteria for selecting the 
optimal number of clusters will be used for K-means clustering of var.}

\item{index}{A validity index from NbClust package that can be used to 
determine optimal K if K is not specified as positive integer bigger than 
1 or initial/seed cluster centers in nclusters. 'sdindex' is deafult 
(Halkidi et al. 2001, JIIS). Other indices also available in NBClust are 
"kl", "ch", "hartigan", "ccc", "scott", "marriot", "trcovw", "tracew", 
"friedman", "rubin", "cindex", "db", "silhouette", "duda", "pseudot2", 
"beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain", 
"gamma", "gplus", "tau", "dunn", "hubert", "sdindex", and "sdbw". 
One can also use all of them with the option 'alllong' or almost all indices 
except gap, gamma, gplus and tau with 'all', when the optimal number of 
clusters K is detremined by the majority rule (the maximum of histogram of 
the results of all indices with finite solutions). Use of some indices on 
a big and/or unstructured dataset can be computationally intense and/or 
could lead to numerical singularity.}

\item{posdates}{The index of the dimension that corresponds to time in the 
provided array in the parameter 'var', the first by default.}
}
\value{
\item{cluster}{
 A vector (time series) of integers indicating the occurrence 
 of a cluster, i.e., when 'certain data member in time is allocated to a 
 specific cluster (e.g., 2 1 3 1 1 1 ..).
}
\item{centers}{
 A matrix of cluster centres or centroids (e.g. 
 [1:K, 1:spatial degrees of freedom]).
}
\item{totss}{
 The total sum of squares.
}
\item{withinss}{
 A vector of within-cluster sum of squares, one component 
 per cluster.
}
\item{tot.withinss}{
 Total within-cluster sum of squares, 
 i.e., sum(withinss).
}
\item{betweenss}{
 The between-cluster sum of squares, i.e. totss-tot.withinss.
}
\item{size}{
The number of points in each cluster.
}
}
\description{
This function computes cluster centers and their time series of occurrences, 
with the K-means clustering method using Euclidean distance, of an array of 
input data with any number of dimensions, one of them (the 'posdates'th) 
corresponding to time. By default the first dimension is expected to 
correspond to time. Specifically, it partitions the array along time axis in 
K groups or clusters in which each space vector/array belongs to (i.e., is a 
member of) the cluster with the nearest center or centroid. This function 
relies on the NbClust package (Charrad et al., 2014 JSS).
}
\examples{
# Generating synthetic data
a1 <- array(dim = c(200, 4))
mean1 <- 0
sd1 <- 0.3 

c0 <- seq(1, 200)
c1 <- sort(sample(x = 1:200, size = sample(x = 50:150, size = 1), replace = FALSE))
x1 <- c(1, 1, 1, 1)
for (i1 in c1) {
 a1[i1, ] <- x1 + rnorm(4, mean = mean1, sd = sd1)
}

c1p5 <- c0[!(c0 \\\%in\\\% c1)]
c2 <- c1p5[seq(1, length(c1p5), 2)] 
x2 <- c(2, 2, 4, 4)
for (i2 in c2) {
 a1[i2, ] <- x2 + rnorm(4, mean = mean1, sd = sd1)
}

c3 <- c1p5[seq(2, length(c1p5), 2)]
x3 <- c(3, 3, 1, 1)
for (i3 in c3) {
 a1[i3, ] <- x3 + rnorm(4, mean = mean1, sd = sd1)
}

# Computing the clusters
res1 <- Cluster(var = a1, weights = array(1, dim = dim(a1)[2]), nclusters = 3)
print(res1$cluster)
print(res1$centers)

res2 <- Cluster(var = a1, weights = array(1, dim = dim(a1)[2]))
print(res2$cluster)
print(res2$centers)
}
\author{
History:\cr
 1.0 # 2014-10 (N.S. Fuckar, \email{neven.fuckar@bsc.es}) - Original code
}
\references{
Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676.
}
\keyword{datagen}

