% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/etl_cleanup.R, R/etl_create.R, R/etl_extract.R, R/etl_load.R, R/etl_transform.R
\name{etl_cleanup}
\alias{etl_cleanup}
\alias{etl_cleanup.default}
\alias{etl_create}
\alias{etl_create.default}
\alias{etl_extract}
\alias{etl_extract.default}
\alias{etl_extract.etl_mtcars}
\alias{etl_load}
\alias{etl_load.default}
\alias{etl_load.etl_mtcars}
\alias{etl_transform}
\alias{etl_transform.default}
\alias{etl_transform.etl_mtcars}
\alias{etl_update}
\alias{etl_update.default}
\title{ETL functions for working with medium sized data}
\usage{
etl_cleanup(obj, ...)

\method{etl_cleanup}{default}(obj, delete_raw = FALSE, delete_load = FALSE,
  pattern = "\\\\.(csv|zip)$", ...)

etl_create(obj, ...)

\method{etl_create}{default}(obj, ...)

etl_update(obj, ...)

\method{etl_update}{default}(obj, ...)

etl_extract(obj, ...)

\method{etl_extract}{default}(obj, ...)

\method{etl_extract}{etl_mtcars}(obj, ...)

etl_load(obj, ...)

\method{etl_load}{default}(obj, ...)

\method{etl_load}{etl_mtcars}(obj, ...)

etl_transform(obj, ...)

\method{etl_transform}{default}(obj, ...)

\method{etl_transform}{etl_mtcars}(obj, ...)
}
\arguments{
\item{obj}{an \code{\link{etl}} object}

\item{...}{arguments passed to methods}

\item{delete_raw}{should files be deleted from the \code{raw_dir}?}

\item{delete_load}{should files be deleted from the \code{load_dir}?}

\item{pattern}{regular expression matching file names to be deleted. By default,
this matches filenames ending in \code{.csv} and \code{.zip}.}
}
\value{
Each one of these functions returns an \code{\link{etl}} object, invisibly.
}
\description{
These generic functions provide a systematic approach
for performing ETL (exchange-transform-load) operations on medium
sized data.
}
\details{
The purposes of these functions are to download data from a
particular data source from the Internet, process it, and load it
into a SQL database server.

There are five primary functions:
\describe{
 \item{\code{\link{etl_init}}}{Initialize the database schema.}
 \item{etl_extract}{Download data from the Internet and store it locally in
 its raw form.}
 \item{etl_transform}{Manipulate the raw data such that it can be loaded
 into a database table. Usually, this means converting the raw data to
 (a series of) CSV files, which are also stored locally.}
 \item{etl_load}{Load the transformed data into the database.}
 \item{etl_cleanup}{Perform housekeeping, such as deleting unnecessary
 raw data files.}
}

Additionally, two convenience functions chain these operations together:
\describe{
 \item{etl_create}{Run all five functions in succession.
 This is useful when you want
 to create the database from scratch.}
 \item{etl_update}{Run the \code{etl_extract}-\code{etl_transform}-\code{etl_load} functions
 in succession.
 This is useful
 when the database already exists, but you want to insert some new data. }
}
}
\examples{

\dontrun{
if (require(RPostgreSQL)) {
  db <- src_postgres(dbname = "mtcars", user = "postgres", host = "localhost")
  cars <- etl("mtcars", db)
}
if (require(RMySQL)) {
  db <- src_mysql(dbname = "mtcars", user = "r-user", host = "localhost", password = "mypass")
  cars <- etl("mtcars", db)
}
}
cars <- etl("mtcars")
cars \%>\%
 etl_extract() \%>\%
 etl_transform() \%>\%
 etl_load() \%>\%
 etl_cleanup()
cars

cars \%>\%
 tbl(from = "mtcars") \%>\%
 group_by(cyl) \%>\%
 summarise(N = n(), mean_mpg = mean(mpg))

 # do it all in one step, and peek at the SQL creation script
 cars \%>\%
   etl_create(echo = TRUE)
 # specify a directory for the data
 \dontrun{
 cars <- etl("mtcars", dir = "~/dumps/mtcars/")
 str(cars)
 }
cars <- etl("mtcars")
# Do it step-by-step
cars \%>\%
  etl_extract() \%>\%
  etl_transform() \%>\%
  etl_load()

# Note the somewhat imprecise data types for the columns. These are the default.
tbl(cars, "mtcars")

# But you can also specify your own schema if you want
schema <- system.file("sql", "init.sqlite", package = "etl")
etl_load(cars, schema)
}
\seealso{
\code{\link{etl}}, \code{\link{etl_init}}
}

