% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DSD_Gaussians.R
\name{DSD_Gaussians}
\alias{DSD_Gaussians}
\title{Mixture of Gaussians Data Stream Generator}
\usage{
DSD_Gaussians(
  k = 3,
  d = 2,
  p,
  mu,
  sigma,
  variance_limit = c(0.001, 0.002),
  separation = 6,
  space_limit = c(0, 1),
  noise = 0,
  noise_limit = space_limit,
  noise_separation = 3,
  separation_type = c("Euclidean", "Mahalanobis"),
  verbose = FALSE
)
}
\arguments{
\item{k}{Determines the number of clusters.}

\item{d}{Determines the number of dimensions.}

\item{p}{A vector of probabilities that determines the likelihood of
generated a data point from a particular cluster.}

\item{mu}{A matrix of means for each dimension of each cluster.}

\item{sigma}{A list of length \code{k} of covariance matrices.}

\item{variance_limit}{Lower and upper limit for the randomly generated variance when
creating cluster covariance matrices.}

\item{separation}{Minimum separation distance between clusters
(measured in standard deviations according to \code{separation_type}).}

\item{space_limit}{Defines the space bounds. All constructs are generated
inside these bounds. For clusters this means that their centroids must be
within these space bounds.}

\item{noise}{Noise probability between 0 and 1.  Noise is uniformly
distributed within noise range (see below).}

\item{noise_limit}{A matrix with d rows and 2 columns. The first column
contains the minimum values and the second column contains the maximum
values for noise.}

\item{noise_separation}{Minimum separation distance between cluster centers and noise
points (measured in standard deviations according to \code{separation_type}). \code{0} means separation is ignored.}

\item{separation_type}{The type of the separation distance calculation. It
can be either Euclidean distance or Mahalanobis distance.}

\item{verbose}{Report cluster and outlier generation process.}
}
\value{
Returns a  object of class \code{DSD_Gaussian} (subclass of \link{DSD_R}, \link{DSD}).
}
\description{
A data stream generator that produces a data stream with a mixture of static
Gaussians.
}
\details{
\code{DSD_Gaussians} creates a mixture of \code{k} static clusters in a \code{d}-dimensional
space. The cluster
centers \code{mu} and the covariance matrices \code{sigma} can be supplied
or will be randomly generated. The probability vector \code{p} defines for
each cluster the probability that the next data point will be chosen from it
(defaults to equal probability). Separation between generated clusters (and outliers; see below)
can be imposed by using
Euclidean or Mahalanobis distance, which is controlled by the
\code{separation_type} parameter. Separation value then is supplied in the
\code{separation} parameter.
The generation method is similar to the one
suggested by Jain and Dubes (1988).

Noise points which are uniformly chosen from \code{noise_limit} can be added.

Outlier points can be added. The outlier spatial positions
\code{predefined_outlier_space_positions} and the outlier stream positions
\code{predefined_outlier_stream_positions} can be supplied or will be
randomly generated. Cluster and outlier separation distance is determined by
and \code{outlier_virtual_variance} parameters. The
outlier virtual variance defines an empty space around outliers, which
separates them from their surrounding. Unlike noise, outliers are data
points of interest for end-users, and the goal of outlier detectors is to
find them in data streams. For more details, read the "Introduction to
\pkg{stream}" vignette.
}
\examples{
# Example 1: create data stream with three clusters in 3-dimensional data space
#            with 5 times sqrt(variance_limit) separation.
set.seed(1)
stream1 <- DSD_Gaussians(k = 3, d = 3)
stream1

get_points(stream1, n = 5)
plot(stream1, xlim = c(0, 1), ylim = c(0, 1))


# Example 2: create data stream with specified cluster positions,
# 5\% noise in a given bounding box and
# with different densities (1 to 9 between the two clusters)
stream2 <- DSD_Gaussians(k = 2, d = 2,
    mu = rbind(c(-.5, -.5), c(.5, .5)),
    p = c(.1, .9),
    variance_limit = c(0.02, 0.04),
    noise = 0.05,
    noise_limit = rbind(c(-1, 1), c(-1, 1)))

get_points(stream2, n = 5)
plot(stream2, xlim = c(-1, 1), ylim = c(-1, 1))


# Example 3: create 4 clusters and noise separated by a Mahalanobis
# distance. Distance to noise is increased to 6 standard deviations to make them
# easier detectable outliers.
stream3 <- DSD_Gaussians(k = 4, d = 2,
  separation_type = "Mahalanobis",
  space_limit = c(5, 20),
  variance_limit = c(1, 2),
  noise = 0.05,
  noise_limit = c(0, 25),
  noise_separation = 6
  )
plot(stream3)
}
\references{
Jain and Dubes (1988) Algorithms for clustering data,
Prentice-Hall, Inc., Upper Saddle River, NJ, USA.
}
\seealso{
Other DSD: 
\code{\link{DSD}()},
\code{\link{DSD_BarsAndGaussians}()},
\code{\link{DSD_Benchmark}()},
\code{\link{DSD_Cubes}()},
\code{\link{DSD_MG}()},
\code{\link{DSD_Memory}()},
\code{\link{DSD_Mixture}()},
\code{\link{DSD_NULL}()},
\code{\link{DSD_ReadDB}()},
\code{\link{DSD_ReadStream}()},
\code{\link{DSD_Target}()},
\code{\link{DSD_UniformNoise}()},
\code{\link{DSD_mlbenchData}()},
\code{\link{DSD_mlbenchGenerator}()},
\code{\link{DSF}()},
\code{\link{animate_data}()},
\code{\link{close_stream}()},
\code{\link{get_points}()},
\code{\link{plot.DSD}()},
\code{\link{reset_stream}()}
}
\author{
Michael Hahsler
}
\concept{DSD}
