help.search <-
function(pattern, fields = c("alias", "concept", "title"),
	 apropos, keyword, whatis, ignore.case = TRUE,
	 package = NULL, lib.loc = NULL,
	 help.db = getOption("help.db"),
	 verbose = getOption("verbose"),
	 rebuild = FALSE, agrep = NULL)
{
    ### Argument handling.
    TABLE <- c("alias", "concept", "keyword", "name", "title")

    if(!missing(pattern)) {
	if(!is.character(pattern) || (length(pattern) > 1))
	    stop(sQuote("pattern"), " must be a single character string")
	i <- pmatch(fields, TABLE)
	if(any(is.na(i)))
	    stop("incorrect field specification")
	else
	    fields <- TABLE[i]
    } else if(!missing(apropos)) {
	if(!is.character(apropos) || (length(apropos) > 1))
	    stop(sQuote("apropos"), " must be a single character string")
	else {
	    pattern <- apropos
	    fields <- c("alias", "title")
	}
    } else if(!missing(keyword)) {
	if(!is.character(keyword) || (length(keyword) > 1))
	    stop(sQuote("keyword"), " must be a single character string")
	else {
	    pattern <- keyword
	    fields <- "keyword"
            if(is.null(agrep)) agrep <- FALSE
	}
    } else if(!missing(whatis)) {
	if(!is.character(whatis) || (length(whatis) > 1))
	    stop(sQuote("whatis"), " must be a single character string")
	else {
	    pattern <- whatis
	    fields <- "alias"
	}
    } else {
	stop("don't know what to search")
    }

    if(is.null(lib.loc))
	lib.loc <- .libPaths()

    ## <FIXME>
    ## Currently, the information used for help.search is stored in
    ## package-level CONTENTS files.  As it is expensive to build the
    ## help.search db, we use a global file cache for this information
    ## if possible.  This is wrong because multiple processes or threads
    ## use the same cache (no locking!), and we should really save the
    ## information in one (thread-local) global table, e.g. as a local
    ## variable in the environment of help.search(), or something that
    ## can go in a 'shelf' (but not necessarily to a specific file) if
    ## memory usage is an issue.  Argh.
    ## </FIXME>

    ### Set up the help db.
    if(is.null(help.db) || !file.exists(help.db))
	rebuild <- TRUE
    else if(!rebuild) {
	## Try using the saved help db.
        db <- try(.readRDS(file = help.db), silent = TRUE)
        if(inherits(db, "try-error"))
            load(file = help.db)
	## If not a list (pre 1.7 format), rebuild.
	if(!is.list(db) ||
        ## If no information on concepts (pre 1.8 format), rebuild.
           length(db) < 4 ||
	## Need to find out whether this has the info we need.
	## Note that when looking for packages in libraries we always
	## use the first location found.  Hence if the library search
	## path changes we might find different versions of a package.
	## Thus we need to rebuild the help db in case the specified
	## library path is different from the one used when building the
	## help db (stored as its "LibPaths" attribute).
           !identical(lib.loc, attr(db, "LibPaths")) ||
	## We also need to rebuild the help db in case an existing dir
	## in the library path was modified more recently than the db,
	## as packages might have been installed or removed.
           any(file.info(help.db)$mtime <
	       file.info(lib.loc[file.exists(lib.loc)])$mtime)
           )
	    rebuild <- TRUE
    }
    if(rebuild) {
	## Check whether we can save the help db lateron.
	save.db <- FALSE
        dir <- file.path(tempdir(), ".R")
	dbfile <- file.path(dir, "help.db")
	if((tools::fileTest("-d", dir)
            || ((unlink(dir) == 0) && dir.create(dir)))
	   && (unlink(dbfile) == 0))
	    save.db <- TRUE

        ## If we cannot save the help db only use the given packages.
        ## <FIXME>
        ## Why don't we just use the given packages?  The current logic
        ## for rebuilding cannot figure out that rebuilding is needed
        ## the next time (unless we use the same given packages) ...
        packagesInHelpDB <- if(!is.null(package) && !save.db)
            package
        else
            .packages(all.available = TRUE, lib.loc = lib.loc)
        ## </FIXME>

	## Create the help db.
	contentsDCFFields <-
	    c("Entry", "Aliases", "Description", "Keywords")
        np <- 0
	if(verbose)
	    cat("Packages:\n")

        ## Starting with R 1.8.0, prebuilt hsearch indices are available
        ## in Meta/hsearch.rds, and the code to build this from the Rd
        ## contents (as obtained from both new and old style Rd indices)
        ## has been moved to tools:::.buildHsearchIndex(), which creates
        ## a per-package list of base, aliases and keywords information.
        ## When building the global index, it again (see e.g. also the
        ## code in tools:::Rdcontents()), it seems most efficient to
        ## create a list *matrix* (dbMat below), stuff the individual
        ## indices into its rows, and finally create the base, aliases
        ## and keyword information in rbind() calls on the columns.
        ## This is *much* more efficient than building incrementally.
        dbMat <- vector("list", length(packagesInHelpDB) * 4)
        dim(dbMat) <- c(length(packagesInHelpDB), 4)

	for(p in packagesInHelpDB) {
            np <- np + 1
	    if(verbose)
		cat("", p, if((np %% 5) == 0) "\n")
	    path <- .find.package(p, lib.loc, quiet = TRUE)
	    if(length(path) == 0)
		stop("could not find package ", sQuote(p))

            if(file.exists(hsearchFile <-
                           file.path(path, "Meta", "hsearch.rds"))) {
                hDB <- .readRDS(hsearchFile)
            }
            else {
                hDB <- contents <- NULL
                ## Read the contents info from the respective Rd meta
                ## files.
                if(file.exists(contentsFile <-
                               file.path(path, "Meta", "Rd.rds"))) {
                    contents <- .readRDS(contentsFile)
                }
                else if(file.exists(contentsFile
                                    <- file.path(path, "CONTENTS"))) {
                    contents <-
                        read.dcf(contentsFile,
                                 fields = contentsDCFFields)
                }
                ## If we found Rd contents information ...
                if(!is.null(contents)) {
                    ## build the hsearch index from it;
                    hDB <- tools:::.buildHsearchIndex(contents, p,
                                                      dirname(path))
                }
                else {
                    ## otherwise, issue a warning.
                    warning("No Rd contents for package ",
                            sQuote(p), " in ", sQuote(dirname(path)))
                }
            }
            if(!is.null(hDB)) {
                ## Put the hsearch index for the np-th package into the
                ## np-th row of the matrix used for aggregating.
                dbMat[np, seq(along = hDB)] <- hDB
            }
        }

        if(verbose)
	    cat(ifelse(np %% 5 == 0, "\n", "\n\n"))

        ## Create the global base, aliases and keywords tables via calls
        ## to rbind() on the columns of the matrix used for aggregating.
        db <- list(Base = do.call("rbind", dbMat[, 1]),
                   Aliases = do.call("rbind", dbMat[, 2]),
                   Keywords = do.call("rbind", dbMat[, 3]),
                   Concepts = do.call("rbind", dbMat[, 4]))
        if(is.null(db$Concepts))
            db$Concepts <-
                matrix(character(), nc = 3,
                       dimnames = list(NULL,
                       c("Concepts", "ID", "Package")))
        ## And finally, make the IDs globally unique by prefixing them
        ## with the number of the package in the global index.
        for(i in which(sapply(db, NROW) > 0)) {
            db[[i]][, "ID"] <-
                paste(rep.int(seq(along = packagesInHelpDB),
                              sapply(dbMat[, i], NROW)),
                      db[[i]][, "ID"],
                      sep = "/")
        }

	## Maybe save the help db
	## <FIXME>
	## Shouldn't we serialize instead?
	if(save.db) {
	    attr(db, "LibPaths") <- lib.loc
	    .saveRDS(db, file = dbfile)
	    options(help.db = dbfile)
	}
	## </FIXME>
    }

    ### Matching.
    if(verbose)
	cat("Database of ",
	    NROW(db$Base), " Rd objects (",
	    NROW(db$Aliases), " aliases, ",
            NROW(db$Concepts), " concepts, ",
	    NROW(db$Keywords), " keywords),\n",
	    sep = "")
    if(!is.null(package)) {
	## Argument 'package' was given but we built a larger help db to
	## save for future invocations.	 Need to check that all given
	## packages exist, and only search the given ones.
	posInHelpDB <-
	    match(package, unique(db$Base[, "Package"]), nomatch = 0)
	if(any(posInHelpDB) == 0)
	    stop("could not find package ",
                 sQuote(package[posInHelpDB == 0][1]))
	db <-
	    lapply(db,
		   function(x) {
		       x[x[, "Package"] %in% package, , drop = FALSE]
		   })
    }

    ## <FIXME>
    ## No need continuing if there are no objects in the data base.
    ## But shouldn't we return something of class "hsearch"?
    if(!length(db$Base)) return(invisible())
    ## </FIXME>

    ## If agrep is NULL (default), we want to use fuzzy matching iff
    ## 'pattern' contains no characters special to regular expressions.
    ## We use the following crude approximation: if pattern contains
    ## only alphanumeric characters or whitespace or a '-', it is taken
    ## 'as is', and fuzzy matching is used unless turned off explicitly,
    ## or pattern has very few (currently, less than 5) characters.
    if(is.null(agrep) || is.na(agrep))
	agrep <-
	    ((regexpr("^([[:alnum:]]|[[:space:]]|-)+$", pattern) > 0)
             && (nchar(pattern) > 4))
    if(is.logical(agrep)) {
	if(agrep)
	    max.distance <- 0.1
    }
    else if(is.numeric(agrep) || is.list(agrep)) {
	max.distance <- agrep
	agrep <- TRUE
    }
    else
	stop("incorrect agrep specification")

    searchFun <- function(x) {
	if(agrep)
	    agrep(pattern, x, ignore.case = ignore.case,
		  max.distance = max.distance)
	else
	    grep(pattern, x, ignore.case = ignore.case)
    }
    dbBase <- db$Base
    searchDbField <- function(field) {
	switch(field,
	       alias = {
		   aliases <- db$Aliases
		   match(aliases[searchFun(aliases[, "Aliases"]),
                                 "ID"],
			 dbBase[, "ID"])
	       },
	       concept = {
		   concepts <- db$Concepts
		   match(concepts[searchFun(concepts[, "Concepts"]),
                                  "ID"],
			 dbBase[, "ID"])
	       },

	       keyword = {
		   keywords <- db$Keywords
		   match(keywords[searchFun(keywords[, "Keywords"]),
				  "ID"],
			 dbBase[, "ID"])
	       },
	       searchFun(db$Base[, field]))
    }

    i <- NULL
    for(f in fields) i <- c(i, searchDbField(f))
    db <- dbBase[sort(unique(i)),
		 c("topic", "title", "Package", "LibPath"),
		 drop = FALSE]
    if(verbose) cat("matched", NROW(db), "objects.\n")

    ## Retval.
    y <- list(pattern = pattern, fields = fields,
              type = if(agrep) "fuzzy" else "regexp",
              matches = db)
    class(y) <- "hsearch"
    y
}

print.hsearch <-
function(x, ...)
{
    fields <- paste(x$fields, collapse = " or ")
    type <- switch(x$type, fuzzy = "fuzzy", "regular expression")
    db <- x$matches
    if(NROW(db) > 0) {
	outFile <- tempfile()
	outConn <- file(outFile, open = "w")
	writeLines(c(strwrap(paste("Help files with", fields,
                                   "matching", sQuote(x$pattern),
                                   "using", type, "matching:")),
                     "\n\n"),
		   outConn)
	dbnam <- paste(db[ , "topic"], "(",
		       db[, "Package"], ")",
		       sep = "")
	dbtit <- paste(db[ , "title"], sep = "")
	writeLines(formatDL(dbnam, dbtit), outConn)
        writeLines(c("\n\n",
                     strwrap(paste("Type 'help(FOO, package = PKG)' to",
                                   "inspect entry 'FOO(PKG) TITLE'."))),
                   outConn)
	close(outConn)
	file.show(outFile, delete.file = TRUE)
    } else {
	writeLines(strwrap(paste("No help files found with", fields,
                                 "matching", sQuote(x$pattern),
                                 "using", type, "matching.")))
    }
}
