% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CatSplitEnc.R
\name{catSplitEncoding}
\alias{catSplitEncoding}
\title{Encode categorical variables using split information of CART}
\usage{
catSplitEncoding(
  targetVariable,
  trainData,
  testData,
  problemType,
  datasetName,
  catVariables
)
}
\arguments{
\item{targetVariable}{target variable that we want to predict.}

\item{trainData}{training data.}

\item{testData}{testing data.}

\item{problemType}{classification or regression.}

\item{datasetName}{Name of the dataset, could be any string name.}

\item{catVariables}{List of categorical variables in the dataset.}
}
\value{
dataframe that is the encoding of categorical variables.
}
\description{
Encode categorical variables using split information of CART
}
\examples{
\donttest{
library("OpenML")
library("farff")
library("stringr")
library("stats")
library("data.table")
library("rpart")
library("catSplit")




# An example dataset from OpenML
datInfo <- getOMLDataSet(data.id = 41283, verbosity = 0)
targetVariable <- datInfo$target.features
dat <- datInfo$data
datasetName <- datInfo$desc$name
catVariables <- names(Filter(is.factor, dat))
# Remove target variable from catVariables
catVariables <- catVariables[!(catVariables \%in\% targetVariable)]
problemType <- "classification"
# Split dat to train and test sets
smp_size <- floor(0.75 * nrow(dat))
train_ind <- sample(seq_len(nrow(dat)), size = smp_size)
train <- as.data.frame.matrix(dat[train_ind, ])
test <- as.data.frame.matrix(dat[-train_ind, ])
# Outputs a list containing 2 files: encoding frame for train data, encoding frame for test data
train_and_test_cat = catSplitEncoding(targetVariable = targetVariable,
                                                 trainData = train,
                                                 testData = test,
                                                 problemType = problemType,
                                                 datasetName = datasetName,
                                                 catVariables = catVariables)
# Get transformed train and test sets from the output list
trainCat = train_and_test_cat[1]
testCat = train_and_test_cat[2]

# Drop categorical variables from the original train and test data
trainData <- train[!names(train) \%in\% catVariables]
testData <- test[!names(test) \%in\% catVariables]

# Merge encoding frame and original data
train <- cbind(trainCat, trainData)
test <- cbind(testCat, testData)
}

}
