% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ff.R
\name{ff.formula}
\alias{ff.formula}
\title{Fuzzy forests algorithm}
\usage{
\method{ff}{formula}(formula, data = NULL, module_membership, ...)
}
\arguments{
\item{formula}{Formula object.}

\item{data}{data used in the analysis.}

\item{module_membership}{A character vector giving the module membership
of each feature.}

\item{...}{Additional arguments}
}
\value{
An object of type \code{\link[fuzzyforest]{fuzzy_forest}}.  This
object is a list containing useful output of fuzzy forests.
In particular it contains a data.frame with list of selected features.
It also includes the random forest fit using the selected features.
}
\description{
Implements formula interface for \code{\link[fuzzyforest]{ff}}.
}
\note{
See \code{\link[fuzzyforest]{ff}} for additional arguments.
Note that the matrix, \code{Z}, of features that do not go through
the screening step must specified separately from the formula.
\code{test_features} and \code{test_y} are not supported in formula
interface.  As in the \code{randomForest} package, for large data sets
the formula interface may be substantially slower.

This work was partially funded by NSF IIS 1251151 and AMFAR 8721SC.
}
\examples{
#ff requires that the partition of the covariates be previously determined.
#ff is also handy if the user wants to test out multiple settings of WGCNA
#prior to running fuzzy forests.
library(mvtnorm)
gen_mod <- function(n, p, corr) {
  sigma <- matrix(corr, nrow=p, ncol=p)
  diag(sigma) <- 1
  X <- rmvnorm(n, sigma=sigma)
  return(X)
}

gen_X <- function(n, mod_sizes, corr){
  m <- length(mod_sizes)
  X_list <- vector("list", length = m)
  for(i in 1:m){
    X_list[[i]] <- gen_mod(n, mod_sizes[i], corr[i])
  }
  X <- do.call("cbind", X_list)
  return(X)
}

err_sd <- .5
n <- 500
mod_sizes <- rep(25, 4)
corr <- rep(.8, 4)
X <- gen_X(n, mod_sizes, corr)
beta <- rep(0, 100)
beta[c(1:4, 76:79)] <- 5
y <- X\%*\%beta + rnorm(n, sd=err_sd)
X <- as.data.frame(X)
dat <- as.data.frame(cbind(y, X))

Xtest <- gen_X(n, mod_sizes, corr)
ytest <- Xtest\%*\%beta + rnorm(n, sd=err_sd)
Xtest <- as.data.frame(Xtest)

cdist <- as.dist(1 - cor(X))
hclust_fit <- hclust(cdist, method="ward.D")
groups <- cutree(hclust_fit, k=4)

screen_c <- screen_control(keep_fraction = .25,
                           ntree_factor = 1,
                           min_ntree = 250)
select_c <- select_control(number_selected = 10,
                           ntree_factor = 1,
                           min_ntree = 250)
\donttest{
ff_fit <- ff(y ~ ., data=dat,
             module_membership = groups,
             screen_params = screen_c,
             select_params = select_c,
             final_ntree = 250)
#extract variable importance rankings
vims <- ff_fit$feature_list

#plot results
modplot(ff_fit)

#obtain predicted values for a new test set
preds <- predict(ff_fit, new_data=Xtest)

#estimate test set error
test_err <- sqrt(sum((ytest - preds)^2)/n)
}
}
\references{
Leo Breiman (2001). Random Forests. Machine Learning, 45(1), 5-32.

Daniel Conn, Tuck Ngun, Christina M. Ramirez (2015). Fuzzy Forests: a New
WGCNA Based Random Forest Algorithm for Correlated, High-Dimensional Data,
Journal of Statistical Software, Manuscript in progress.

Bin Zhang and Steve Horvath (2005) "A General Framework for Weighted Gene
Co-Expression Network Analysis", Statistical Applications in Genetics and
Molecular Biology: Vol. 4: No. 1, Article 17
}
\seealso{
\code{\link[fuzzyforest]{ff}},
         \code{\link[fuzzyforest]{print.fuzzy_forest}},
         \code{\link[fuzzyforest]{predict.fuzzy_forest}},
         \code{\link[fuzzyforest]{modplot}}
}

