% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/treat_outliers.R
\name{treat_outliers}
\alias{treat_outliers}
\title{Treats Outliers in Numerical Data}
\usage{
treat_outliers(x, percentile = 0.01, truncate = FALSE, by = NULL,
  ...)
}
\arguments{
\item{x}{Data that is coercible into a numeric vector or matrix.
If it is a data frame then all numerical variables
of the data frame are coerced into a matrix.}

\item{percentile}{A numeric scalar.
  The percentile below which observations
  are considered to be outliers. Is treated symmetrical so that
\code{c(percentile, 1-percentile)} are used as boundaries.
  Defaults to 0.01 and needs to be > 0 and < 0.5.}

\item{truncate}{A logical scalar. If TRUE then data are truncated
(i.e., set to NA if out of bounds). Defaults to FALSE.}

\item{by}{NULL or either a factor vector or a character string
identifying a factor variable in the data frame provided by x.
The factor indicated by 'by' is being used to identify groups
by which the outlier treatment is applied. Defaults to NULL (no grouping).
If provided, the resulting vector must not contain NAs and needs to be such so that
\code{length(byvec) == nrows(as.matrix(x))}.}

\item{...}{Additional parameters forwarded to \link[stats]{quantile} (notably, \code{type})}
}
\value{
A numeric vector or matrix containing the outlier-treated \code{x}.
  if a data frame was provided in \code{x}, a data frame with its numeric variables
  replaced by their outlier-treated values.
}
\description{
Treats numerical outliers either by winsorizing or by truncating.
}
\details{
All members of the numerical matrix are checked for finiteness and are
  set to NA if they are not finite. NAs are removed when calculating the outlier cut-offs.
}
\examples{
treat_outliers(seq(1:100), 0.05)
treat_outliers(seq(1:100), truncate = TRUE, 0.05)

# When you like the percentiles calculated like STATA's summary or pctile:
treat_outliers(seq(1:100), 0.05, type = 2)

df <- data.frame(a = seq(1:1000), b = rnorm(1000), c = sample(LETTERS[1:5], 1000, replace=TRUE))
winsorized_df <- treat_outliers(df)
summary(df)
summary(winsorized_df)

winsorized_df <- treat_outliers(df, 0.05, by="c")
by(df, df$c, summary)
by(winsorized_df, df$c, summary)

hist(treat_outliers(rnorm(1000)), breaks=100)
}
