% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataset_methods.R
\name{dataset_prepare}
\alias{dataset_prepare}
\title{Prepare a dataset for analysis}
\usage{
dataset_prepare(
  dataset,
  x,
  y = NULL,
  named = TRUE,
  named_features = FALSE,
  parallel_records = NULL,
  batch_size = NULL,
  num_parallel_batches = NULL,
  drop_remainder = FALSE
)
}
\arguments{
\item{dataset}{A dataset}

\item{x}{Features to include. When \code{named_features} is \code{FALSE} all features
will be stacked into a single tensor so must have an identical data type.}

\item{y}{(Optional). Response variable.}

\item{named}{\code{TRUE} to name the dataset elements "x" and "y", \code{FALSE} to
not name the dataset elements.}

\item{named_features}{\code{TRUE} to yield features as a named list; \code{FALSE} to
stack features into a single array. Note that in the case of \code{FALSE} (the
default) all features will be stacked into a single 2D tensor so need to
have the same underlying data type.}

\item{parallel_records}{(Optional) An integer, representing the number of
records to decode in parallel. If not specified, records will be
processed sequentially.}

\item{batch_size}{(Optional). Batch size if you would like to fuse the
\code{dataset_prepare()} operation together with a \code{dataset_batch()} (fusing
generally improves overall training performance).}

\item{num_parallel_batches}{(Optional) An integer, representing the number of batches
to create in parallel. On one hand, higher values can help mitigate the effect of
stragglers. On the other hand, higher values can increase contention if CPU is
scarce.}

\item{drop_remainder}{(Optional.) A boolean, representing whether the last
batch should be dropped in the case it has fewer than \code{batch_size}
elements; the default behavior is not to drop the smaller batch.}
}
\value{
A dataset. The dataset will have a structure of either:
\itemize{
\item When \code{named_features} is \code{TRUE}: \code{list(x = list(feature_name = feature_values, ...), y = response_values)}
\item When \code{named_features} is \code{FALSE}: \code{list(x = features_array, y = response_values)},
where \code{features_array} is a Rank 2 array of \verb{(batch_size, num_features)}.
}

Note that the \code{y} element will be omitted when \code{y} is \code{NULL}.
}
\description{
Transform a dataset with named columns into a list with features (\code{x}) and
response (\code{y}) elements.
}
\seealso{
\link[=input_fn.tf_dataset]{input_fn()} for use with \pkg{tfestimators}.
}
