% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/popsize.R
\name{popsize}
\alias{popsize}
\title{Estimate total population size and capture probability using user provided set of models or user provided nuisance estimates.}
\usage{
popsize(
  data,
  K = 2,
  j,
  k,
  margin = 0.005,
  filterrows = FALSE,
  nfolds = 5,
  funcname = c("rangerlogit"),
  sl.lib = c("SL.gam", "SL.glm", "SL.glm.interaction", "SL.ranger", "SL.glmnet"),
  getnuis,
  q1mat,
  q2mat,
  q12mat,
  idfold,
  TMLE = TRUE,
  PLUGIN = TRUE,
  Nmin = 100,
  ...
)
}
\arguments{
\item{data}{The data frame in capture-recapture format with \code{K} lists for which total population is to be estimated.
The first K columns are the capture history indicators for the \code{K} lists. The remaining columns are covariates in numeric format.}

\item{K}{The number of lists that are present in the data.}

\item{j}{The first list to be used for estimation.}

\item{k}{The secod list to be used in the estimation.}

\item{margin}{The minimum value the estimates can attain to bound them away from zero.}

\item{filterrows}{A logical value denoting whether to remove all rows with only zeroes.}

\item{nfolds}{The number of folds to be used for cross fitting.}

\item{funcname}{The vector of estimation function names to obtain the population size.}

\item{sl.lib}{Algorithm library for \code{\link[=qhat_sl]{qhat_sl()}}. See \code{\link[SuperLearner:listWrappers]{SuperLearner::listWrappers()}}. Default library includes "gam", "glm", "glmnet", "glm.interaction", "ranger".}

\item{getnuis}{A list object with the nuisance function estimates and the fold assignment of the rows for cross-fitting or a data.frame with the nuisance estimates.}

\item{q1mat}{A dataframe with capture probabilities for the first list.}

\item{q2mat}{A dataframe with capture probabilities for the second list.}

\item{q12mat}{A dataframe with capture probabilities for both the lists simultaneously.}

\item{idfold}{The fold assignment of each row during estimation.}

\item{TMLE}{The logical value to indicate whether TMLE has to be computed.}

\item{PLUGIN}{The logical value to indicate whether the plug-in estimates are returned.}

\item{Nmin}{The cutoff for minimum sample size to perform doubly robust estimation. Otherwise, Petersen estimator is returned.}

\item{...}{Any extra arguments passed into the function. See \code{\link[=qhat_rangerlogit]{qhat_rangerlogit()}}, \code{\link[=qhat_sl]{qhat_sl()}}, \code{\link[=tmle]{tmle()}}.}
}
\value{
A list of estimates containing the following components for each list-pair, model and method (PI = plug-in, DR = doubly-robust, TMLE = targeted maximum likelihood estimate):
\item{result}{  A dataframe of the below estimated quantities.
\itemize{
\item{psi}{  The estimated capture probability.}
\item{sigma}{  The efficiency bound.}
\item{n}{  The estimated population size n.}
\item{sigman}{  The estimated standard deviation of the population size.}
\item{cin.l}{  The estimated lower bound of a 95\% confidence interval of \code{n}.}
\item{cin.u}{  The estimated upper bound of a 95\% confidence interval of \code{n}.}}}
\item{N}{  The number of data points used in the estimation after removing rows with missing data.}
\item{ifvals}{  The estimated influence function values for the observed data.}
\item{nuis}{  The estimated nuisance functions (q12, q1, q2) for each element in funcname.}
\item{nuistmle}{  The estimated nuisance functions (q12, q1, q2) from tmle for each element in funcname.}
\item{idfold}{  The division of the rows into sets (folds) for cross-fitting.}
}
\description{
Estimate total population size and capture probability using user provided set of models or user provided nuisance estimates.
}
\examples{
\donttest{
data = simuldata(1000, l = 3)$data
qhat = popsize(data = data, funcname = c("logit", "gam"), nfolds = 2, margin = 0.005)
psin_estimate = popsize(data = data, getnuis = qhat$nuis, idfold = qhat$idfold)

data = simuldata(n = 6000, l = 3)$data
psin_estimate = popsize(data = data[,1:2])
#this returns the basic plug-in estimate since covariates are absent.

psin_estimate = popsize(data = data, funcname = c("gam", "rangerlogit"))
}
}
\references{
Bickel, P. J., Klaassen, C. A., Bickel, P. J., Ritov, Y., Klaassen, J., Wellner, J. A., and Ritov, Y. (1993). Efficient and adaptive estimation for semiparametric models, volume 4. \emph{Johns Hopkins University Press Baltimore}

van der Vaart, A. (2002a). Part iii: Semiparameric statistics. Lectures on Probability Theory and Statistics, pages 331-457

van der Laan, M. J. and Robins, J. M. (2003). Unified methods for censored longitudinal data and causality. \emph{Springer Science & Business Media}

Tsiatis, A. (2006). Semiparametric theory and missing data \emph{springer. New York}

Kennedy, E. H. (2016). Semiparametric theory and empirical processes in causal inference. \emph{Statistical causal inferences and their applications in public health research}, pages 141-167. \emph{Springer}

Das, M., Kennedy, E. H., & Jewell, N.P. (2021). Doubly robust capture-recapture methods for estimating population size. \emph{arXiv preprint} \emph{arXiv:2104.14091}.
}
