% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/biclustermd.R, R/globalvars.R
\docType{package}
\name{biclustermd}
\alias{biclustermd}
\alias{biclustermd-package}
\title{Bicluster data with non-random missing values}
\usage{
biclustermd(data, row_clusters = floor(sqrt(nrow(data))),
  col_clusters = floor(sqrt(ncol(data))), miss_val = mean(data, na.rm =
  TRUE), miss_val_sd = 1, similarity = "Rand",
  row_min_num = floor(nrow(data)/row_clusters),
  col_min_num = floor(ncol(data)/col_clusters), row_num_to_move = 1,
  col_num_to_move = 1, row_shuffles = 1, col_shuffles = 1,
  max.iter = 100, verbose = FALSE)
}
\arguments{
\item{data}{Dataset to bicluster. Must to be a data matrix with only numbers
and missing values in the data set. It should have row names and column names.}

\item{row_clusters}{The number of clusters to partition the rows into. The
default is \code{floor(sqrt(nrow(data)))}.}

\item{col_clusters}{The number of clusters to partition the columns into. The
default is \code{floor(sqrt(ncol(data)))}.}

\item{miss_val}{Value or function to put in empty cells of the prototype matrix.
If a value, a random normal variable with sd = \code{miss_val_sd} is used each
iteration. By default, this equals the mean of \code{data}.}

\item{miss_val_sd}{Standard deviation of the normal distribution \code{miss_val} follows
if \code{miss_val} is a number. By default this equals 1.}

\item{similarity}{The metric used to compare two successive clusterings. Can be
"Rand" (default), "HA" for the Hubert and Arabie adjusted Rand index or "Jaccard".
See \link[phyclust]{RRand} and \link[clusteval]{cluster_similarity} for details.}

\item{row_min_num}{Minimum row prototype size in order to be eligible to be
chosen when filling an empty row prototype. Default is \code{floor(nrow(data) / row_clusters)}.}

\item{col_min_num}{Minimum column prototype size in order to be eligible to be
chosen when filling an empty row prototype. Default is \code{floor(ncol(data) / col_clusters)}.}

\item{row_num_to_move}{Number of rows to remove from the sampled prototype to
put in the empty row prototype. Default is 1.}

\item{col_num_to_move}{Number of columns to remove from the sampled prototype to
put in the empty column prototype. Default is 1.}

\item{row_shuffles}{Number of times to shuffle rows in each iteration. Default is 1.}

\item{col_shuffles}{Number of times to shuffle columns in each iteration. Default is 1.}

\item{max.iter}{Maximum number of iterations to let the algorithm run for.}

\item{verbose}{Logical. If TRUE, will report progress.}
}
\value{
A list of class \code{biclustermd}:
\item{params }{a list of all arguments passed to the function, including defaults.}
\item{data }{the inputted two way table of data.}
\item{P0 }{the initial column partition matrix.}
\item{Q0 }{the initial row partition matrix.}
\item{InitialSSE }{the SSE of the original partitioning.}
\item{P }{the final column partition matrix.}
\item{Q }{the final row partition matrix.}
\item{SSE }{a matrix of class biclustermd_sse detailing the SSE recorded at the end of each iteration.}
\item{Similarities }{a data frame of class biclustermd_sim detailing the
value of row and column similarity measures recorded at the end of each
iteration. Contains information for all three similarity measures.
This carries an attribute \code{"used"} which provides the similarity
measure used as the stopping condition for the algorithm.}
\item{iteration }{the number of iterations the algorithm ran for, whether \code{max.iter} was reached or convergence was achieved.}
\item{A }{the final prototype matrix which gives the average of each bicluster.}
}
\description{
Bicluster data with non-random missing values

Biclustering with Missing Data
}
\examples{
data("synthetic")
# default parameters
bc <- biclustermd(synthetic)
bc
autoplot(bc)

# providing the true number of row and column clusters
bc <- biclustermd(synthetic, col_clusters = 3, row_clusters = 2)
bc
autoplot(bc)

# an example with the nycflights13::flights dataset
library(nycflights13)
data("flights")

library(dplyr)
flights_bcd <- flights \%>\%
  select(month, dest, arr_delay)

flights_bcd <- flights_bcd \%>\%
  group_by(month, dest) \%>\%
  summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) \%>\%
  spread(dest, mean_arr_delay) \%>\%
  as.data.frame()

rownames(flights_bcd) <- flights_bcd$month
flights_bcd <- as.matrix(flights_bcd[, -1])

flights_bc <- biclustermd(data = flights_bcd, col_clusters = 6, row_clusters = 4,
                  row_min_num = 3, col_min_num = 5,
                  max.iter = 20, verbose = TRUE)
flights_bc

}
\references{
Li, J., Reisner, J., Pham, H., Olafsson, S., and Vardeman, S. (2020) \emph{Biclustering with Missing Data. Information Sciences, 510, 304–316.}
}
\seealso{
\code{\link{rep_biclustermd}}, \code{\link{tune_biclustermd}}
}
