% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build_lcm.R
\name{build_lcm}
\alias{build_lcm}
\title{Build a Latent Class Model}
\usage{
build_lcm(observations, n_clusters, emission_probs, formula, data,
  coefficients, cluster_names = NULL, channel_names = NULL)
}
\arguments{
\item{observations}{An \code{stslist} object (see \code{\link[TraMineR]{seqdef}}) containing
the sequences, or a list of such objects (one for each channel).}

\item{n_clusters}{A scalar giving the number of clusters/submodels 
(not used if starting values for model parameters are given with \code{emission_probs}).}

\item{emission_probs}{A matrix containing emission probabilities for each class by rows, 
or in case of multichannel data a list of such matrices. 
Note that the matrices must have dimensions k x s where k is the number of 
latent classes and s is the number of unique symbols (observed states) in the 
data. Emission probabilities should follow the ordering of the alphabet of 
observations (\code{alphabet(observations)}, returned as \code{symbol_names}).}

\item{formula}{Covariates as an object of class \code{\link{formula}}, 
left side omitted.}

\item{data}{An optional data frame, list or environment containing the variables 
in the model. If not found in data, the variables are taken from 
\code{environment(formula)}.}

\item{coefficients}{An optional \eqn{k x l} matrix of regression coefficients for 
time-constant covariates for mixture probabilities, where \eqn{l} is the number 
of clusters and \eqn{k} is the number of covariates. A logit-link is used for
mixture probabilities. The first column is set to zero.}

\item{cluster_names}{A vector of optional names for the classes/clusters.}

\item{channel_names}{A vector of optional names for the channels.}
}
\value{
Object of class \code{mhmm} with the following elements:
\describe{
   \item{\code{observations}}{State sequence object or a list of such containing the data.}
   \item{\code{transition_probs}}{A matrix of transition probabilities.}
   \item{\code{emission_probs}}{A matrix or a list of matrices of emission probabilities.}
   \item{\code{initial_probs}}{A vector of initial probabilities.}
   \item{\code{coefficients}}{A matrix of parameter coefficients for covariates (covariates in rows, clusters in columns).}
   \item{\code{X}}{Covariate values for each subject.}
   \item{\code{cluster_names}}{Names for clusters.}
   \item{\code{state_names}}{Names for hidden states.}
   \item{\code{symbol_names}}{Names for observed states.}
   \item{\code{channel_names}}{Names for channels of sequence data}
   \item{\code{length_of_sequences}}{(Maximum) length of sequences.}
   \item{\code{n_sequences}}{Number of sequences.}
   \item{\code{n_symbols}}{Number of observed states (in each channel).}
   \item{\code{n_states}}{Number of hidden states.}
   \item{\code{n_channels}}{Number of channels.}
   \item{\code{n_covariates}}{Number of covariates.}
   \item{\code{n_clusters}}{Number of clusters.}
}
}
\description{
Function \code{build_lcm} is a shortcut for constructing a latent class model 
as a restricted case of an \code{mhmm} object.
}
\examples{
# Simulate observations from two classes
set.seed(123)
obs <- seqdef(rbind(
  matrix(sample(letters[1:3], 5000, TRUE, prob = c(0.1, 0.6, 0.3)), 500, 10),
  matrix(sample(letters[1:3], 2000, TRUE, prob = c(0.4, 0.4, 0.2)), 200, 10)))

# Initialize the model
set.seed(9087)
model <- build_lcm(obs, n_clusters = 2)

# Estimate model parameters
fit <- fit_model(model)

# How many of the observations were correctly classified:
sum(summary(fit$model)$most_probable_cluster == rep(c("Class 2", "Class 1"), times = c(500, 200)))

############################################################
\dontrun{
# LCM for longitudinal data

# Define sequence data
data("mvad", package = "TraMineR")
mvad_alphabet <- c("employment", "FE", "HE", "joblessness", "school",
  "training")
mvad_labels <- c("employment", "further education", "higher education",
  "joblessness", "school", "training")
mvad_scodes <- c("EM", "FE", "HE", "JL", "SC", "TR")
mvad_seq <- seqdef(mvad, 17:86, alphabet = mvad_alphabet, states = mvad_scodes,
  labels = mvad_labels, xtstep = 6)
  
# Initialize the LCM with random starting values
set.seed(7654)
init_lcm_mvad1 <- build_lcm(observations = mvad_seq,
  n_clusters = 2, formula = ~male, data = mvad)

# Own starting values for emission probabilities
emiss <- rbind(rep(1/6, 6), rep(1/6, 6))

# LCM with own starting values
init_lcm_mvad2 <- build_lcm(observations = mvad_seq,
  emission_probs = emiss, formula = ~male, data = mvad)

# Estimate model parameters (EM algorithm with random restarts)
lcm_mvad <- fit_model(init_lcm_mvad1, 
  control_em = list(restart = list(times = 5)))$model

# Plot the LCM
plot(lcm_mvad, interactive = FALSE, ncol = 2)

###################################################################

# Binomial regression (comparison to glm)

require("MASS")
data("birthwt")

model <- build_lcm(
  observations = seqdef(birthwt$low), emission_probs = diag(2), 
  formula = ~age + lwt + smoke + ht, data = birthwt)
fit <- fit_model(model)
summary(fit$model)
summary(glm(low ~ age + lwt + smoke + ht, binomial, data = birthwt))


# Multinomial regression (comparison to multinom)

require("nnet")

set.seed(123)
n <- 100
X <- cbind(1, x1 = runif(n, 0, 1), x2 =  runif(n, 0, 1))
coefs <- cbind(0,c(-2, 5, -2), c(0, -2, 2))
pr <- exp(X \%*\% coefs)  + rnorm(n*3)
pr <- pr/rowSums(pr)
y <- apply(pr, 1, which.max)
table(y)

model <- build_lcm(
  observations = seqdef(y), emission_probs = diag(3), 
  formula = ~x1 + x2,  data = data.frame(X[, -1]))
fit <- fit_model(model)
summary(fit$model)
summary(multinom(y ~ x1 + x2, data = data.frame(X[,-1])))
}
}
\seealso{
\code{\link{fit_model}} for estimating model parameters;
\code{\link{summary.mhmm}} for a summary of a mixture model; 
\code{\link{separate_mhmm}} for organizing an \code{mhmm} object into a list of 
separate \code{hmm} objects; and \code{\link{plot.mhmm}} for plotting 
mixture models.
}
