% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/bayes_clustering.R
\name{crimeClust_bayes}
\alias{crimeClust_bayes}
\title{Bayesian model-based partially-supervised clustering for crime series identification}
\usage{
crimeClust_bayes(crimeID, spatial, t1, t2, Xcat, Xnorm, maxcriminals = 1000,
  iters = 10000, burn = 5000, plot = TRUE, update = 100, seed = NULL,
  use_space = TRUE, use_time = TRUE, use_cats = TRUE)
}
\arguments{
\item{crimeID}{n-vector of criminal IDs for the n crimes in the dataset.
  For unsolved crimes, the value should be \code{NA}.}

\item{spatial}{(n x 2) matrix of spatial locations, represent missing locations
  with \code{NA}}

\item{t1}{earliest possible time for crime}

\item{t2}{latest possible time for crime. Crime occurred between \code{t1}
  and \code{t2}.}

\item{Xcat}{(n x q) matrix of categorical crime features.  Each column is a
  variable, such as mode of entry.  The different factors (window, door, etc)
  should be coded as integers 1,2,\dots,m.}

\item{Xnorm}{(n x p) matrix of continuous crime features.}

\item{maxcriminals}{maximum number of clusters in the model.}

\item{iters}{Number of MCMC samples to generate.}

\item{burn}{Number of MCMC samples to discard as burn-in.}

\item{plot}{(logical) Should plots be produced during run.}

\item{update}{Number of MCMC iterations between graphical displays.}

\item{seed}{seed for random number generation}

\item{use_space}{(logical) should the spatial locations be used in clustering?}

\item{use_time}{(logical) should the event times be used in clustering?}

\item{use_cats}{(logical) should the categorical crime features be used in
  clustering?}
}
\value{
(list) p.equal is the (n x n) matrix of probabilities that each pair of
   crimes are committed by the same criminal.

   if \code{plot=TRUE}, then progress plots are produced.
}
\description{
Bayesian model-based partially-supervised clustering for crime series identification
}
\examples{
# Toy dataset with 12 crimes and three criminals.

 # Make IDs: Criminal 1 committed crimes 1-4, etc.
 id <- c(1,1,1,1,
         2,2,2,2,
                 3,3,3,3)

 # spatial locations of the crimes:
 s <- c(0.8,0.9,1.1,1.2,
        1.8,1.9,2.1,2.2,
        2.8,2.9,3.1,3.2)
 s <- cbind(0,s)

 # Categorical crime features, say mode of entry (1=door, 2=other) and
 # type of residence (1=apartment, 2=other)
 Mode <- c(1,1,1,1,  #Different distribution by criminal
           1,2,1,2,
           2,2,2,2)
 Type <- c(1,2,1,2,  #Same distribution for all criminals
           1,2,1,2,
           1,2,1,2)
 Xcat <- cbind(Mode,Type)

 # Times of the crimes
 t <- c(1,2,3,4,
        2,3,4,5,
        3,4,5,6)

 # Now let's pretend we don't know the criminal for crimes 1, 4, 6, 8, and 12.
 id <- c(NA,1,1,NA,2,NA,2,NA,3,3,3,NA)

 # Fit the model (nb: use much larger iters and burn on real problem)
 fit <- crimeClust_bayes(crimeID=id, spatial=s, t1=t,t2=t, Xcat=Xcat,
                   maxcriminals=12,iters=500,burn=100,update=100)

 # Plot the posterior probability matrix that each pair of crimes was
 # committed by the same criminal:
 if(require(fields,quietly=TRUE)){
 fields::image.plot(1:12,1:12,fit$p.equal,
            xlab="Crime",ylab="Crime",
            main="Probability crimes are from the same criminal")
 }

 # Extract the crimes with the largest posterior probability
 bayesPairs(fit$p.equal)
 bayesProb(fit$p.equal[1,])
}
\author{
Brian J. Reich
}
\references{
Reich, B. J. and Porter, M. D. (2015), Partially supervised spatiotemporal
   clustering for burglary crime series identification.
   \emph{Journal of the Royal Statistical Society: Series A (Statistics in Society)}.
   178:2, 465--480.
 \url{http://www4.stat.ncsu.edu/~reich/papers/CrimeClust.pdf}
}
\seealso{
\code{\link{bayesPairs}}
}

