% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/names_geo.R
\name{preproc}
\alias{preproc}
\alias{proc_zip}
\alias{proc_state}
\alias{proc_name}
\title{Preprocess Last Names and Geographic Identifiers}
\usage{
proc_zip(x)

proc_state(x)

proc_name(x, to_latin = TRUE)
}
\arguments{
\item{x}{A character vector of names or geographic identifiers to process}

\item{to_latin}{If \code{TRUE}, convert names to Latin characters only. Strongly
recommended if non-Latin characters are present, since these will not match
Census tables. However, the conversion is slightly time-consuming and so
can be disabled with this flag.}
}
\value{
A processed character vector
}
\description{
These functions are called automatically by \code{\link[=bisg]{bisg()}} but may be useful,
especially when geographic variables are included in a \code{\link[=birdie]{birdie()}} model.
\code{proc_zip()} and \code{proc_state()} preprocess their corresponding geographic
identifiers. States are partially matched to state names and abbreviations
and are returned as FIPS codes. ZIP codes are crosswalked to Census ZCTAs.
Missing identifiers are replaced with \code{"<none>"}.
\code{proc_name()} processes last names in accordance with Census processing rules
(\url{https://www2.census.gov/topics/genealogy/2010surnames/surnames.pdf}).
Names are converted to Latin characters, capitalized, stripped of prefixes
and suffixes, and otherwise standardized.
}
\section{Functions}{
\itemize{
\item \code{proc_zip()}: Match ZIP codes to ZCTAs and fill in missing values.

\item \code{proc_state()}: Match state names and abbreviations and fill in missing values.

\item \code{proc_name()}: Process names to a Census-standardized format.

}}
\examples{
proc_name("Smith Jr.")
proc_zip("00501")
proc_state("Washington")
}
\concept{preproc}
