% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/main.R
\name{select_genes}
\alias{select_genes}
\title{Selects genes using the EMMIXgene algorithm.}
\usage{
select_genes(
  dat,
  filename,
  random_starts = 4,
  max_it = 100,
  ll_thresh = 8,
  min_clust_size = 8,
  tol = 1e-04,
  start_method = "both",
  three = FALSE
)
}
\arguments{
\item{dat}{A matrix or dataframe containing gene expression data.
Rows are genes and columns are samples. Must supply one of filename and dat.}

\item{filename}{Name of file containing gene data. Can be either .csv 
or space separated .dat. Rows are genes and columns are samples. 
Must supply one of filename and dat.}

\item{random_starts}{The number of random initializations used per gene when
fitting mixtures of t-distributions. Initialization uses k-means by default.}

\item{max_it}{The maximum number of iterations per mixture fit. 
Default value is 100.}

\item{ll_thresh}{The difference in -2 log lambda used as a threshold 
for selecting between g=1 and g=2 for each gene. Default value is 8,
which was chosen arbitrarily in the original paper.}

\item{min_clust_size}{The minimum number of observations per cluster
used when fitting mixtures of t-distributions for each gene.
Default value is 8.}

\item{tol}{Tolerance value used for detecting convergence of EMMIX fits.}

\item{start_method}{Default value is "both". 
Can also choose "random" for purely random starts.}

\item{three}{Also test g=2 vs g=3 where appropriate. Defaults to FALSE.}
}
\value{
An EMMIXgene object containing:
\item{stat}{The difference in log-likelihood for g=1 
and g=2 for each gene (or for g=2 and g=3 where relevant).}
\item{g}{The selected number of components for each gene.}
\item{it}{The number of iterations for each genes selected fit.} 
\item{selected}{An indicator for each genes selected status}
\item{ranks}{selected gene ids ranked by stat}
\item{genes}{A dataframe of selected genes.}
\item{all_genes}{Returns dat or contents of filename.}
}
\description{
Follows the gene selection methodology of 
G. J. McLachlan, R. W. Bean, D. Peel; A mixture model-based approach to the 
clustering of microarray expression data , Bioinformatics, Volume 18, 
Issue 3, 1 March 2002, Pages 413–422,
https://doi.org/10.1093/bioinformatics/18.3.413
}
\examples{
#only run on first 100 genes for speed
alon_sel <- select_genes(alon_data[seq_len(100), ]) 

}
