% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/univariateTable.R
\name{univariateTable}
\alias{univariateTable}
\alias{utable}
\title{Univariate table}
\usage{
univariateTable(
  formula,
  data = parent.frame(),
  summary.format = "mean(x) (sd(x))",
  Q.format = "median(x) [iqr(x)]",
  freq.format = "count(x) (percent(x))",
  column.percent = TRUE,
  digits = c(1, 1, 3),
  big.mark = ",",
  short.groupnames,
  compare.groups = TRUE,
  show.totals = TRUE,
  n = "inNames",
  outcome = NULL,
  ...
)
}
\arguments{
\item{formula}{Formula specifying the grouping variable (strata)
on the left hand side (can be omitted) and on the right hand side
the variables for which to obtain (descriptive) statistics.}

\item{data}{Data set in which formula is evaluated}

\item{summary.format}{Format for the numeric (non-factor)
variables. Default is mean (SD).  If different formats are
desired, either special Q can be used or the function is called
multiple times and the results are rbinded. See examples.}

\item{Q.format}{Format for quantile summary of numerical
variables: Default is median (inter quartile range).}

\item{freq.format}{Format for categorical variables. Default is
count (percentage).}

\item{column.percent}{Logical, if \code{TRUE} and the default
freq.format is used then column percentages are given instead of
row percentages for categorical variables (factors).}

\item{digits}{Number of digits}

\item{big.mark}{For formatting large numbers (i.e., greater than 1,000). \code{""} turn this off.}

\item{short.groupnames}{If \code{TRUE} group names are abbreviated.}

\item{compare.groups}{Method used to compare groups. If
\code{"logistic"} and there are exactly two groups logistic
regression is used instead of t-tests and Wilcoxon rank tests to
compare numeric variables across groups.}

\item{show.totals}{If \code{TRUE} show a column with totals.}

\item{n}{If \code{TRUE} show the number of subjects as a separate
row.  If equal to \code{"inNames"}, show the numbers in
parentheses in the column names. If \code{FALSE} do not show
number of subjects.}

\item{outcome}{Outcome data used to calculate p-values when
compare groups method is \code{'logistic'} or \code{'cox'}.}

\item{...}{saved as part of the result to be passed on to
\code{labelUnits}}
}
\value{
List with one summary table element for each variable on the right hand side of formula.
The summary tables can be combined with \code{rbind}. The function \code{summary.univariateTable}
combines the tables, and shows p-values in custom format.
}
\description{
Categorical variables are summarized using counts and frequencies and compared .
}
\details{
This function can generate the baseline demographic characteristics
that forms table 1 in many publications. It is also useful for generating
other tables of univariate statistics.

The result of the function is an object (list) which containe the various data
generated. In most applications the \code{summary} function should be applied which generates
a data.frame with a (nearly) publication ready table. Standard manipulation can be
used to modify, add or remove columns/rows and for users not accustomed to R the table
generated can be exported to a text file which can be read by other software, e.g., via
write.csv(table,file="path/to/results/table.csv")

By default, continuous variables are summarized by means and standard deviations
and compared with t-tests. When continuous variables are summarized by medians
and interquartile ranges the 
Deviations from the above defaults are obtained when the
arguments summary.format and freq.format are combined with suitable
summary functions.
}
\examples{
data(Diabetes)
library(data.table)
univariateTable(~age,data=Diabetes)
univariateTable(~gender,data=Diabetes)
univariateTable(~age+gender+ height+weight,data=Diabetes)
## same thing but less typing
utable(~age+gender+ height+weight,data=Diabetes)

## summary by location: 
univariateTable(location~Q(age)+gender+height+weight,data=Diabetes)
## continuous variables marked with Q() are (by default) summarized
## with median (IQR) and kruskal.test (with two groups equivalent to wilcox.test)
## variables not marked with Q() are (by default) summarized
## with mean (sd) and anova.glm(...,test="Chisq")
## the p-value of anova(glm()) with only two groups is similar
## but not exactly equal to that of a t.test
## categorical variables are (by default) summarized by count
## (percent) and chi-square tests (\code{chisq.test}). When \code{compare.groups ='logistic'}
## anova(glm(...,family=binomial,test="Chisq")) is used to calculate p-values.

## export result to csv
table1 = summary(univariateTable(location~age+gender+height+weight,data=Diabetes),
show.pvalues=FALSE)
# write.csv(table1,file="~/table1.csv",rownames=FALSE)

## change labels and values
utable(location~age+gender+height+weight,data=Diabetes,
       age="Age (years)",gender="Sex",
       gender.female="Female",
       gender.male="Male",
       height="Body height (inches)",
       weight="Body weight (pounds)")

## Use quantiles and rank tests for some variables and mean and standard deviation for others
univariateTable(gender~Q(age)+location+Q(BMI)+height+weight,
                data=Diabetes)

## Factor with more than 2 levels
Diabetes$AgeGroups <- cut(Diabetes$age,
                          c(19,29,39,49,59,69,92),
                          include.lowest=TRUE)
univariateTable(location~AgeGroups+gender+height+weight,
                data=Diabetes)

## Row percent
univariateTable(location~gender+age+AgeGroups,
                data=Diabetes,
                column.percent=FALSE)

## change of frequency format
univariateTable(location~gender+age+AgeGroups,
                data=Diabetes,
                column.percent=FALSE,
                freq.format="percent(x) (n=count(x))")

## changing Labels
u <- univariateTable(location~gender+AgeGroups+ height + weight,
                     data=Diabetes,
                     column.percent=TRUE,
                     freq.format="count(x) (percent(x))")
summary(u,"AgeGroups"="Age (years)","height"="Height (inches)")

## more than two groups
Diabetes$frame=factor(Diabetes$frame,levels=c("small","medium","large"))
univariateTable(frame~gender+BMI+age,data=Diabetes)

Diabetes$sex=as.numeric(Diabetes$gender)
univariateTable(frame~sex+gender+BMI+age,
                data=Diabetes,freq.format="count(x) (percent(x))")

## multiple summary formats
## suppose we want for some reason mean (range) for age
## and median (range) for BMI.
## method 1:
univariateTable(frame~Q(age)+BMI,
                data=Diabetes,
                Q.format="mean(x) (range(x))",
                summary.format="median(x) (range(x))")
## method 2:
u1 <- summary(univariateTable(frame~age,
                              data=na.omit(Diabetes),
                              summary.format="mean(x) (range(x))"))
u2 <- summary(univariateTable(frame~BMI,
                              data=na.omit(Diabetes),
                              summary.format="median(x) (range(x))"))
publish(rbind(u1,u2),digits=2)

## Large number format (big.mark)
Diabetes$AGE <- 1000*Diabetes$age
u3 <- summary(univariateTable(frame~AGE,
                              data=Diabetes,big.mark="'"))



}
\seealso{
summary.univariateTable, publish.univariateTable
}
\author{
Thomas A. Gerds
}
