\name{submodels}

\alias{submodels}

\title{Compute All Correctness-Preserving Submodels of a QCA Reference Model}

\description{
This evaluation function computes all correctness-preserving submodels of a QCA reference 
model. It has initially been programmed for \href{https://www.researchgate.net/publication/280555425_Often_Trusted_But_Never_Properly_Tested_Evaluating_Qualitative_Comparative_Analysis}{Baumgartner and Thiem (2015)} to test 
the correctness of QCA's three search strategies (conservative/complex, intermediate, 
parsimonious).
}

\usage{
submodels(expression, noflevels = c(), test = TRUE)
}

\arguments{
  \item{expression}{A string representing a csQCA or an fsQCA model, or a csQCA 
                    or fsQCA solution object of class 'qca' (created by the 
                    \code{\link{eQMC}} function).}
  \item{noflevels}{A numeric vector specifying the number of levels for each
                   factor (experimental, can be ignored).}
  \item{test}{Logical, test whether \code{expression} is a causal structure.}
}

\details{
This function has initially been programmed for \href{https://www.researchgate.net/publication/280555425_Often_Trusted_But_Never_Properly_Tested_Evaluating_Qualitative_Comparative_Analysis}{Baumgartner and Thiem (2015)} to 
test the correctness of QCA's three solution types (conservative/complex, 
intermediate, parsimonious). It computes all submodels of a csQCA or an fsQCA 
reference model that do not violate the criterion of correctness (mvQCA models 
are not yet supported). The following expression structures can be used: 
\code{"A*B + C*D <=> Y"} or \code{"AB + CD <=> Y"}. Empty spaces and the type 
of conditional operator (\code{<->}/\code{<=>}/\code{->}/\code{=>}) are irrelevant,
but only single letters are allowed for exogenous factors. The full model need not 
be provided; the antecedent also suffices (e.g., \code{"AB + CD"}). 

Objects of class 'qca', which are returned by the \code{\link{eQMC}} function, 
are also accepted, provided that all exogenous factors have a single-letter label 
(set the argument \code{use.letters} to \code{TRUE} in the function call to 
\code{\link{eQMC}} if original factor labels are not single letters). 

The argument \code{noflevels} expects a numeric vector of the number of factor
levels with a \code{names} attribute. Currently, this argument is experimental
and can be ignored.

The argument \code{test} specifies whether \code{expression} should be pre-tested 
for its causal interpretability before forming submodels. The value to this argument 
does not affect whether basic tests for likely typos in expressions such as 
\code{"abb <-> C"} or \code{"abB <-> C"} are performed. If \code{expression} is 
an object of class 'qca', \code{test} will be set to \code{FALSE} because QCA 
models generated by the \code{eQMC} function at default argument settings are 
always causally interpretable.

Note that for highly complex models containing many conjuncts within many 
disjuncts, computing times tend to increase considerably.
}

\value{A list with the following four main components:\cr
  \item{model}{The reference model.}
  \item{noflevels}{The number of levels for each factor in the factor frame 
        of the model.}
  \item{outcome}{The outcome specified as part of the expression or a pseudo
                 outcome if only an antecedent was specified.}
  \item{submodels}{A character vector of all correctness-preserving submodels.}
}

\author{
Alrik Thiem (\href{http://www.alrik-thiem.net}{Personal Website}; \href{https://www.researchgate.net/profile/Alrik_Thiem}{ResearchGate Website})
}

\section{Contributors (alphabetical)}{\tabular{ll}{
Baumgartner, Michael\tab: development, testing\cr
Thiem, Alrik        \tab: development, documentation, programming, testing
}}

\references{
Baumgartner, Michael, and Alrik Thiem. 2015. \emph{Often Trusted but Never 
(Properly) Tested: Evaluating Qualitative Comparative Analysis}. Paper presented 
at the 12th Conference of the European Sociological Association, 25-28 August, 
Czech Technical University, Prague (Czech Republic). \href{https://www.researchgate.net/publication/280555425_Often_Trusted_But_Never_Properly_Tested_Evaluating_Qualitative_Comparative_Analysis}{Link}.
}

\seealso{\code{\link{eQMC}}, \code{\link{limitedDiversity}}}

\examples{
\dontrun{
# provide a) a full model as an equivalence and inspect its submodels
models1 <- submodels("a*B + B*c + D <-> Z") 
models1$submodels

# ... b) a full model with a negated outcome
# submodels
models2 <- submodels("AcD + BCD + abcd <=> e")
length(models2$submodels)

# ... c) or only an antecedent
models3 <- submodels("aB + Bc + D")
models3$submodels

# directly provide an object of class 'qca' generated by the 'eQMC' function,
# even when the solution comprises multiple models; specify 
# 'use.letters = TRUE' when the original exogenous factors have multi-letter 
# labels; for example:
data(d.represent)
sol1 <- eQMC(d.represent, outcome = "WNP", neg.out = TRUE, use.letters = TRUE)
sol1
# M1: ae + cde + (bdE) <=> wnp 
# M2: ae + cde + (bcd) <=> wnp 
# M3: ae + cde + (Abc) <=> wnp
# M1 has 138 submodels, M2 has 129, and M3 has 139 submodels
models4 <- submodels(sol1)
sapply(models4, "[")

# when original labels of exogenous factors already consist of single 
# letters only, 'use.letters = TRUE' need not be specified
data(d.napoleon)
sol2 <- eQMC(d.napoleon, outcome = "O")
sol2
models5 <- submodels(sol2)
sapply(models5, "[")

# prior testing is recommended because non-causal models can sometimes only
# be identified computationally
submodels("aB + Ac + Ad + bc + bd + CD")

# can a + AbC => Y be an acceptable QCA solution as Schneider and Wagemann 
# (2012, p. 108) argue? No, because in Boolean algebra, it holds that
# F + fG = (F + f) * (F + G) = 1*(F + G) = F + G by the laws of distribution,
# complementarity, and identity
submodels("a + AbC => Y", test = TRUE)

# proof that the conservative/complex solution type of QCA is incorrect, 
# using model 3 from above (see Baumgartner and Thiem (2015) for more details)

# 1. build saturated truth table on the basis of model 3: aB + Bc + D
tt <- data.frame(mintermMatrix(rep(2, 5)))
dimnames(tt) <- list(as.character(1:32), c(LETTERS[1:4], "OUT"))
tt <- tt[pmax(pmin(1 - tt$A, tt$B), pmin(tt$B, 1 - tt$C), tt$D) == tt$OUT, ]

# 2. use function 'limitedDiversity' to generate all conservative/complex
# solutions for all 16 + 120 scenarios of one/two dropped minterm/s
sollist.cs <- vector("list", 2)
sollist.cs <- lapply(1:2, function (x) {
  limitedDiversity(tt, outcome = "OUT", sol.type = "cs", n.drop = x)
  }
)

# 3. compute in how many scenarios a correctness-preserving submodel of 
# model 3 was part of the solution (43.75\% for one dropped minterm and 
# 16.67\% for two dropped minterms)
cs.correct <- numeric(2)
cs.correct <- sapply(1:2, function (x) {round((sum(unlist(lapply(
  sollist.cs[[x]][[2]], function (y) {any(models3$submodels \%in\% y)}
  ))) / choose(16, x))*100, 2)}
)
cs.correct
}
}

\keyword{functions}
