% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/greedy_ensemble.R
\name{greedy_ensemble}
\alias{greedy_ensemble}
\title{Computes an ensemble score using the greedy algorithm proposed by Schubert et al (2012)}
\usage{
greedy_ensemble(X, kk = 5)
}
\arguments{
\item{X}{The input data containing the outlier scores in a dataframe, matrix or tibble format. Rows contain observations and columns contain outlier detection methods.}

\item{kk}{The number of estimated outliers.}
}
\value{
A list with the components:
\item{\code{scores}}{The ensemble scores.}
\item{\code{methods}}{The methods that are chosen for the ensemble. }
\item{\code{chosen}}{The chosen subset of original anomaly scores.}
}
\description{
This function computes an ensemble score using the greedy algorithm in the paper titled Evaluation of Outlier Rankings and Outlier Scores by Schubert et al (2012) <doi:10.1137/1.9781611972825.90>. The greedy ensemble is detailed in  Section 4.3.
}
\examples{
set.seed(123)
if (requireNamespace("dbscan", quietly = TRUE)){
X <- data.frame(x1 = rnorm(200), x2 = rnorm(200))
X[199, ] <- c(4, 4)
X[200, ] <- c(-3, 5)
# Using different parameters of lof for anomaly detection
y1 <- dbscan::lof(X, minPts = 10)
y2 <- dbscan::lof(X, minPts = 20)
knnobj <- dbscan::kNN(X, k = 20)
# Using different KNN distances as anomaly scores
y3 <- knnobj$dist[ ,10]
y4 <- knnobj$dist[ ,20]
# Dense points are less anomalous. Hence 1 - pointdensity is used.
y5 <- 1 - dbscan::pointdensity(X, eps = 0.8, type = "gaussian")
y6 <- 1 - dbscan::pointdensity(X, eps = 0.5, type = "gaussian")
Y <- cbind.data.frame(y1, y2, y3, y4, y5, y6)
ens <- greedy_ensemble(Y, kk=5)
ens$scores
}

}
