% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mclapply.R
\docType{data}
\name{mclapply}
\alias{mclapply}
\alias{crash_dumps}
\title{parallel::mclapply Wrapper for Better Performance, Error Handling, Seeding
and UX}
\format{
\code{crash_dumps} is an initially empty environment used to store
  the return values of \code{mclapply} (see below) including
  \link[=etry]{crash dumps} in case of non-fatal errors and if
  \code{mc.dump.frames != "no" & mc.allow.error == FALSE}.
}
\usage{
mclapply(
  X,
  FUN,
  ...,
  mc.preschedule = TRUE,
  mc.set.seed = NA,
  mc.silent = FALSE,
  mc.cores = getOption("mc.cores", 2L),
  mc.cleanup = TRUE,
  mc.allow.recursive = TRUE,
  affinity.list = NULL,
  mc.use.names = TRUE,
  mc.allow.fatal = FALSE,
  mc.allow.error = FALSE,
  mc.retry = 0L,
  mc.retry.silent = FALSE,
  mc.retry.fixed.seed = FALSE,
  mc.fail.early = isFALSE(mc.allow.error) && mc.retry == 0L,
  mc.dump.frames = c("partial", "full", "full_global", "no"),
  mc.dumpto = ifelse(interactive(), "last.dump", "file://last.dump.rds"),
  mc.stdout = c("capture", "output", "ignore"),
  mc.warnings = c("m_signal", "signal", "m_output", "output", "m_ignore", "ignore",
    "stop"),
  mc.messages = c("m_signal", "signal", "m_output", "output", "m_ignore", "ignore"),
  mc.conditions = c("signal", "ignore"),
  mc.system.time = FALSE,
  mc.compress.chars = TRUE,
  mc.compress.altreps = c("if_allocated", "yes", "no"),
  mc.share.vectors = getOption("bettermc.use_shm", TRUE),
  mc.share.altreps = c("no", "yes", "if_allocated"),
  mc.share.copy = TRUE,
  mc.shm.ipc = getOption("bettermc.use_shm", TRUE),
  mc.force.fork = FALSE,
  mc.progress = interactive()
)

crash_dumps  # environment with crash dumps created by mclapply (cf. mc.dumpto)
}
\arguments{
\item{X}{a vector (atomic or list) or an expressions vector.  Other
    objects (including classed objects) will be coerced by
    \code{\link{as.list}}.}

\item{FUN}{the function to be applied to (\code{mclapply}) each
    element of \code{X} or (\code{mcmapply}) in parallel to \code{\dots}.}

\item{...}{For \code{mclapply}, optional arguments to \code{FUN}.
    For \code{mcmapply} and \code{mcMap}, vector or list inputs: see
    \code{\link{mapply}}.}

\item{mc.preschedule}{if set to \code{TRUE} then the computation is
    first divided to (at most) as many jobs are there are cores and then
    the jobs are started, each job possibly covering more than one
    value.  If set to \code{FALSE} then one job is forked for each value
    of \code{X}.  The former is better for short computations or large
    number of values in \code{X}, the latter is better for jobs that
    have high variance of completion time and not too many values of
    \code{X} compared to \code{mc.cores}.}

\item{mc.set.seed}{\code{TRUE} or \code{FALSE} are directly handled by
  \code{\link[parallel:mclapply]{parallel::mclapply}}. \code{bettermc} also
  supports two additional values: \code{NA} (the default) - seed every
  invocation of \code{FUN} differently but in a reproducible way based on the
  current state of the random number generator in the parent process.
  integerish value - call \code{set.seed(mc.set.seed)} in the parent and then
  continue as if \code{mc.set.seed} was \code{NA}.

  In both (\code{NA}- and integerish-) cases, the state of the random number
  generator, i.e. the object \code{.Random.seed} in the global environment,
  is restored at the end of the function to what it was when \code{mclapply}
  was called. If the random number generator is not yet initialized in the
  current session, it is initialized internally (by calling \code{runif(1)})
  and the resulting state is what gets restored later. In particular, this
  means that the seed supplied as \code{mc.set.seed} does \emph{not} seed the
  code following the call to \code{mclapply}. All this ensures that arguments
  like \code{mc.cores}, \code{mc.force.fork} etc. can be adjusted without
  affecting the state of the RNG outside of \code{mclapply}.}

\item{mc.silent}{if set to \code{TRUE} then all output on
    \file{stdout} will be suppressed for all parallel processes forked
    (\file{stderr} is not affected).}

\item{mc.cores}{The number of cores to use, i.e.\ifelse{latex}{\out{~}}{ }{}at most how many
    child processes will be run simultaneously.   The option is
    initialized from environment variable \env{MC_CORES} if set.  Must
    be at least one, and parallelization requires at least two cores.}

\item{mc.cleanup}{if set to \code{TRUE} then all children that have
    been forked by this function will be killed (by sending
    \code{SIGTERM}) before this function returns.  Under normal
    circumstances \code{mclapply} waits for the children to deliver
    results, so this option usually has only effect when \code{mclapply}
    is interrupted. If set to \code{FALSE} then child processes are
    collected, but not forcefully terminated.  As a special case this
    argument can be set to the number of the signal that should be used
    to kill the children instead of \code{SIGTERM}.}

\item{mc.allow.recursive}{Unless true, calling \code{mclapply} in a
    child process will use the child and not fork again.}

\item{affinity.list}{a vector (atomic or list) containing the CPU
    affinity mask for each element of \code{X}.  The CPU affinity mask
    describes on which CPU (core or hyperthread unit) a given item is
    allowed to run, see \code{\link[parallel]{mcaffinity}}.  To use this parameter
    prescheduling has to be deactivated (\code{mc.preschedule = FALSE}).}

\item{mc.use.names}{if \code{TRUE} and if \code{X} is character, use \code{X}
as names for the result unless it had names already.}

\item{mc.allow.fatal}{should fatal errors in child processes make
  \code{mclapply} fail (\code{FALSE}, default) or merely trigger a warning
  (\code{TRUE})?

  \code{TRUE} returns objects of classes \code{c("fatal-error", "try-error")}
  for failed invocations. Hence, in contrast to
  \code{\link[parallel:mclapply]{parallel::mclapply}}, it is OK for
  \code{FUN} to return \code{NULL}.

  \code{NA} returns the same as \code{TRUE}, but without a warning.

  \code{mc.allow.fatal} can also be \code{NULL}. In this case \code{NULL} is
  returned (and a warning is signaled), which corresponds to the behavior of
  \code{\link[parallel:mclapply]{parallel::mclapply}}.}

\item{mc.allow.error}{should non-fatal errors in \code{FUN} make
\code{mclapply} fail (\code{FALSE}, default) or merely trigger a warning
(\code{TRUE})? In the latter case, errors are stored as class
\code{c("etry-error", "try-error")} objects, which contain full tracebacks
and potentially crash dumps (c.f. \code{mc.dump.frames} and
\code{\link{etry}}). \code{NA} returns the same as \code{TRUE}, but without
a warning.}

\item{mc.retry}{\code{abs(mc.retry)} is the maximum number of retries of
  failed applications of \code{FUN} in case of both fatal and non-fatal
  errors. This is useful if we expect \code{FUN} to fail either randomly
  (e.g. non-convergence of a model) or temporarily (e.g. database
  connections). Additionally, if \code{mc.retry <= -1}, the value of
  \code{mc.cores} is gradually decreased with each retry to a minimum of 1 (2
  if \code{mc.force.fork = TRUE}). This is useful if we expect failures due
  to too many parallel processes, e.g. the Linux Out Of Memory Killer
  sacrificing some of the child processes.

  The environment variable "BMC_RETRY" indicates the current retry. A value
  of "0" means first try, a value of "1" first \emph{re}try, etc.}

\item{mc.retry.silent}{should the messages indicating both fatal and
non-fatal failures during all but the last retry be suppressed
(\code{TRUE}) or not (\code{FALSE}, default)?}

\item{mc.retry.fixed.seed}{should \code{FUN} for a particular element of
\code{X} always be invoked with the same fixed seed (\code{TRUE}) or should
a different seed be used on each try (\code{FALSE}, default)? Only
effective if \code{mc.set.seed} is \code{NA} or a number.}

\item{mc.fail.early}{should we try to fail fast after encountering the first
(non-fatal) error in \code{FUN}? Such errors will be recorded as objects of
classes \code{c("fail-early-error", "try-error")}.}

\item{mc.dump.frames}{should we \code{\link[utils]{dump.frames}} on non-fatal
errors in \code{FUN}? The default "partial" omits the frames (roughly) up
to the call of \code{FUN}. See \code{\link{etry}} for the other options.}

\item{mc.dumpto}{where to save the result including the dumped frames if
\code{mc.dump.frames != "no" & mc.allow.error == FALSE}? Either the name of
the variable to create in the environment \code{bettermc::crash_dumps} or a
path (prefixed with "file://") where to save the object.}

\item{mc.stdout}{how should standard output from \code{FUN} be handled?
"capture" captures the output (in the child processes) and prints it in the
parent process after \emph{all} calls of \code{FUN} of the current try (cf.
\code{mc.retry}), such that it can be captured, sinked etc. there. "output"
\emph{immediately} forwards the output to stdout of the parent; it cannot
be captured, sinked etc. there. "ignore" means that the output is not
forwarded in any way to the parent process. For consistency, all of this
also applies if \code{FUN} is called directly from the main process, e.g.
because \code{mc.cores = 1}.}

\item{mc.warnings, mc.messages, mc.conditions}{how should warnings, messages
and other conditions signaled by \code{FUN} be handled? "signal" records
all warnings/messages/conditions (in the child processes) and signals them
in the master process after \emph{all} calls of \code{FUN} of the current
try (cf. \code{mc.retry}). "stop" converts warnings (only) into non-fatal
errors in the child processes directly. "output" \emph{immediately}
forwards the messages to stderr of the parent; no condition is signaled in
the parent process nor is the output capturable/sinkable. "ignore" means
that the conditions are not forwarded in any way to the parent process.
Options prefixed with "m" additionally try to invoke the
"muffleWarning"/"muffleMessage" restart in the child process. Note that, if
\code{FUN} is called directly from the main process, conditions might be
signaled twice in the main process, depending on these arguments.}

\item{mc.system.time}{should \code{\link{system.time}} be used to measure
CPU (and other) times used by the invocations of \code{FUN}. If
\code{TRUE}, the list returned will have an attribute "system_times", which
itself is a list of the same length as \code{X} containing the time
measurements.}

\item{mc.compress.chars}{should character vectors be compressed using
\code{\link{char_map}} before returning them from the child process? Can
also be the minimum length of character vectors for which to enable
compression. This generally increases performance because (de)serialization
of character vectors is particularly expensive.}

\item{mc.compress.altreps}{should a character vector be compressed if it is
an ALTREP? The default "if_allocated" only does so if the regular
representation was already created. This was chosen as the default because
in this case is is the regular representation which would be serialized.}

\item{mc.share.vectors}{should non-character \code{\link[base]{atomic}}
vectors, S3 objects based hereon and factors be returned from the child
processes using POSIX shared memory (cf. \code{\link{copy2shm}})? Can also
be the minimum length of vectors for which to use shared memory. This
generally increases performance because shared memory is a much faster form
of inter process communication compared to pipes and we do not need to
serialize the vectors.}

\item{mc.share.altreps}{should a non-character vector be returned from the
child process using POSIX shared memory if it is an ALTREP?}

\item{mc.share.copy}{should the parent process use a vector placed in shared
memory due to \code{mc.share.vectors} directly (\code{FALSE}) or rather a
copy of it (\code{TRUE})? See \code{\link{copy2shm}} for the implications.}

\item{mc.shm.ipc}{should the results be returned from the child processes
using POSIX shared memory (cf. \code{\link{copy2shm}})?}

\item{mc.force.fork}{should it be ensured that \code{FUN} is always called in
a forked child process, even if \code{length(X) == 1}? This is useful if we
use forking to protect the main R process from fatal errors, memory
corruption, memory leaks etc. occurring in \code{FUN}. This feature
requires that \code{mc.cores >= 2} and also ensures that the effective
value for \code{mc.cores} never drops to less than 2 as a result of
\code{mc.retry} being negative.}

\item{mc.progress}{should a progress bar be printed to stderr of the parent
process (package \code{progress} must be installed)?}
}
\value{
\code{mclapply} returns a list of the same length as X and named by
  X. In case of fatal/non-fatal errors and depending on
  \code{mc.allow.fatal}/\code{mc.allow.error}/\code{mc.fail.early}, some of
  the elements might inherit from
  "fatal-error"/\link[=etry]{"etry-error"}/"fail-early-error" and "try-error"
  or be \code{NULL}.
}
\description{
This wrapper for \code{\link[parallel:mclapply]{parallel::mclapply}} adds the
following features: \itemize{ \item reliably detect if a child process failed
with a fatal error or if it was killed. \item get tracebacks after non-fatal
errors in child processes. \item retry on fatal and non-fatal errors. \item
fail early after non-fatal errors in child processes. \item get crash dumps
from failed child processes. \item capture output from child processes. \item
track warnings, messages and other conditions signaled in the child
processes. \item return results from child processes using POSIX shared
memory to improve performance. \item compress character vectors in results to
improve performance. \item reproducibly seed all function calls. \item
display a progress bar.}
}
\section{POSIX Shared Memory}{
 The shared memory objects created by
  \code{mclapply} are named as follows (this may be subject to change):
  \code{/bmc_ppid_timestamp_idx_cntr} (e.g.
  \code{/bmc_21479_1601366973201_16_10}), with \describe{\item{ppid}{the
  process id of the parent process.}\item{timestamp}{the time at which
  \code{mclapply} was invoked (in milliseconds since epoch; on macOS: seconds
  since epoch, due to its 31-character limit w.r.t. POSIX
  names).}\item{idx}{the index of the current element of \code{X}
  (1-based).}\item{cntr}{an internal counter (1-based) referring to all the
  objects created due to \code{mc.share.vectors} for the current value of
  \code{X}; a value of \code{0} is used for the object created due to
  \code{mc.shm.ipc}.}}

  \code{bettermc::mclapply} does not err if copying data to shared memory
  fails. It will rather only print a message and return results the usual
  way.

  POSIX shared memory has (at least) kernel persistence, i.e. it is not
  automatically freed due to process termination, except if the object is/was
  unlinked. \code{bettermc} tries hard to not leave any byte behind, but it
  could happen that unlinking is incomplete if the parent process is
  terminated while \code{bettermc::mclapply} is running.

  On Linux you can generally inspect the (not-unlinked) objects currently
  stored in shared memory by listing the files under \emph{/dev/shm}.
}

\section{(Linux) Size of POSIX Shared Memory}{
 On Linux, POSIX shared memory
  is implemented using a
  \emph{\href{https://man7.org/linux/man-pages/man5/tmpfs.5.html}{tmpfs}}
  typically mounted under \code{/dev/shm}. If not changed by the
  distribution, the default size of it is 50\% of physical RAM. It can be
  changed (temporarily) by remounting it with a different value for the
  \emph{size} option, e.g. \code{mount -o "remount,size=90\%" /dev/shm}.
}

\section{(Linux) POSIX Shared Memory and Transparent Hugepage Support}{
 When
  allocating a shared memory object of at least
  \code{getOption("bettermc.hugepage_limit", 104857600)} bytes of size
  (default is 100 MiB), we use
  \href{https://man7.org/linux/man-pages/man2/madvise.2.html}{\code{madvise}}\code{(...,
   MADV_HUGEPAGE)} to request the allocation of
  \href{https://www.kernel.org/doc/Documentation/vm/transhuge.txt}{(transparent)
   huge pages}. For this to have any effect, the
  \emph{\href{https://man7.org/linux/man-pages/man5/tmpfs.5.html}{tmpfs}}
  used to implement POSIX shared memory on Linux (typically mounted under
  \code{/dev/shm}) must be (re)mounted with option \emph{huge=advise}, i.e.
  \code{mount -o remount,huge=advise /dev/shm}. (The default is
  \code{huge=never}, but this might be distribution-specific.)
}

\section{Windows Support}{
 On Windows, otherwise valid values for various
  arguments are silently replaced as follows:
\preformatted{  mc.cores <- 1L
  mc.share.vectors <- Inf
  mc.shm.ipc <- FALSE
  mc.force.fork <- FALSE
  mc.progress <- FALSE
  if (mc.stdout == "output") mc.stdout <- "ignore"
  if (mc.warnings == "output") mc.warnings <- "ignore"
  if (mc.messages == "output") mc.messages <- "ignore"}

  \bold{Note:} \code{\link[parallel:mclapply]{parallel::mclapply}} demands
  \code{mc.cores} to be exactly 1 on Windows; \code{bettermc::mclapply} sets
  it to 1 on Windows.

  Furthermore, \code{\link[parallel:mclapply]{parallel::mclapply}} ignores
  the following arguments on Windows: \code{mc.preschedule, mc.silent,
  mc.cleanup, mc.allow.recursive, affinity.list}. For \code{mc.set.seed},
  only the values \code{TRUE} and \code{FALSE} are ignored (by
  \code{\link[parallel:mclapply]{parallel::mclapply}}); the other values are
  handled by \code{bettermc::mclapply} as documented above.
}

\section{Lifecycle}{

  \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options:
   alt='[Stable]'}}}{\strong{[Stable]}}
}

\seealso{
\code{\link{copy2shm}}, \code{\link{char_map}},
  \code{\link[parallel:mclapply]{parallel::mclapply}}
}
\keyword{datasets}
