% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parse_element.R, R/parse_person.R
\name{parseElement}
\alias{parseElement}
\alias{parsePmidStatus}
\alias{parseArticleId}
\alias{parseArticle}
\alias{parsePubHistory}
\alias{parseJournal}
\alias{parsePubType}
\alias{parseMesh}
\alias{parseKeyword}
\alias{parseGrant}
\alias{parseChemical}
\alias{parseDataBank}
\alias{parseComment}
\alias{parseAbstract}
\alias{parseAuthor}
\alias{parseInvestigator}
\title{Parse elements from a PubMed XML file}
\usage{
parsePmidStatus(rawXml, filename, con = NULL, tableSuffix = NULL)

parseArticleId(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseArticle(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parsePubHistory(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseJournal(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parsePubType(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseMesh(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseKeyword(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseGrant(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseChemical(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseDataBank(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseComment(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseAbstract(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseAuthor(pmXml, dPmid, con = NULL, tableSuffix = NULL)

parseInvestigator(pmXml, dPmid, con = NULL, tableSuffix = NULL)
}
\arguments{
\item{rawXml}{An xml document obtained by loading a PubMed XML file using
\code{\link[xml2:read_xml]{xml2::read_xml()}}.}

\item{filename}{A string that will be added to a column \code{xml_filename}.}

\item{con}{Connection to the database, created using \code{\link[DBI:dbConnect]{DBI::dbConnect()}}.}

\item{tableSuffix}{String to append to the table names.}

\item{pmXml}{An xml nodeset derived from \code{rawXml}, such as that returned by
\code{parsePmidStatus()}, where each node corresponds to a PMID.}

\item{dPmid}{A data.table with one row for each node of \code{pmXml}, should have
columns \code{pmid}, \code{version}, and possibly \code{xml_filename}.}
}
\value{
\code{parsePmidStatus()} returns a list of two objects. The first is an
xml nodeset in which each node corresponds to a PubmedArticle in the
\code{rawXml} object. The second is a data.table with columns \code{pmid}, \code{version},
\code{xml_filename}, and \code{status}, in which each row corresponds to a
PubmedArticle in the \code{rawXml} object or a deleted pmid. The \code{status} column
is parsed from the DeleteCitation and MedlineCitation sections.

The following functions return a data.table or list of data.tables with
columns from \code{dPmid} plus the columns specified.

\code{parseArticleId()}: a data.table with columns \code{id_type} and \code{id_value},
parsed from the ArticleIdList section. Only \code{id_type}s "doi"
and "pmc" are retained.

\code{parseArticle()}: a data.table with columns \code{title}, \code{pub_date}, and
\code{pub_model}, parsed from the Article section.

\code{parsePubHistory()}: a data.table with columns \code{pub_status} and \code{pub_date},
parsed from the History section.

\code{parseJournal()}: a data.table with columns \code{journal_name}, \code{journal_iso},
\code{pub_date}, \code{pub_year}, \code{pub_month}, \code{pub_day}, \code{medline_date}, \code{volume},
\code{issue}, and \code{cited_medium}, parsed from the Journal section.

\code{parsePubType()}: a data.table with columns \code{type_name} and \code{type_id},
parsed from the PublicationTypeList section.

\code{parseMesh()}: a list of three data.tables parsed mostly from the
MeshHeadingList section. The first has column \code{indexing_method} (parsed
from the MedlineCitation section), the second has columns \code{descriptor_pos},
\code{descriptor_name}, \code{descriptor_ui}, and \code{descriptor_major_topic}, the
third has columns \code{descriptor_pos}, \code{qualifier_name}, \code{qualifier_ui}, and
\code{qualifier_major_topic}.

\code{parseKeyword()}: a list of two data.tables parsed from the KeywordList
section. The first has column \code{list_owner}, the second has columns
\code{keyword_name} and \code{major_topic}.

\code{parseGrant()}: a list of two data.tables parsed from the GrantList
section. The first has column \code{complete}, the second has columns
\code{grant_id}, \code{acronym}, \code{agency}, and \code{country}.

\code{parseChemical()}: a data.table with columns \code{registry_number},
\code{substance_name}, and \code{substance_ui}, parsed from the ChemicalList section.

\code{parseDataBank()}: a data.table with columns \code{data_bank_name} and
\code{accession_number}, parsed from the DataBankList section.

\code{parseComment()}: a data.table with columns \code{ref_type} and \code{ref_pmid},
parsed from the CommentsCorrectionsList section.

\code{parseAbstract()}: a list of two data.tables parsed from the Abstract
section. The first has column \code{copyright}. The second has columns \code{text},
\code{label}, and \code{nlm_category}.

\code{parseAuthor()}: a list of data.tables parsed from the AuthorList section.
The first is for authors and has columns \code{author_pos}, \code{last_name},
\code{fore_name}, \code{initials}, \code{suffix}, \code{valid}, \code{equal_contrib}, and
\code{collective_name}. The second is for affiliations and has columns
\code{author_pos}, \code{affiliation_pos}, and \code{affiliation}. The third is for author
identifiers and has columns \code{author_pos}, \code{source}, and \code{identifier}. The
fourth is for author affiliation identifiers and has columns \code{author_pos},
\code{affiliation_pos}, \code{source}, and \code{identifier}. The fifth is for the author
list itself and has a column \code{complete}.

\code{parseInvestigator()}: a list of data.tables similar to those returned by
\code{parseAuthor()}, except parsed from the InvestigatorList section, with
column names containing "investigator" instead of "author", and where the
first data.table lacks columns for \code{equal_contrib} and \code{collective_name}
and the fifth data.table does not exist.
}
\description{
Elements are parsed according to the MEDLINE®PubMed® XML Element
Descriptions and their Attributes
\href{https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html}{here}.
These functions should not normally be called directly, as they are called by
\code{\link[=modifyPubmedDb]{modifyPubmedDb()}}.
}
\examples{
library('data.table')
library('xml2')

filename = 'pubmed20n1016.xml.gz'
rawXml = read_xml(system.file('extdata', filename, package = 'pmparser'))

pmidStatusList = parsePmidStatus(rawXml, filename)
pmXml = pmidStatusList[[1L]]
dPmidRaw = pmidStatusList[[2L]]
dPmid = dPmidRaw[status != 'Deleted', !'status']

dArticleId = parseArticleId(pmXml, dPmid)
dArticle = parseArticle(pmXml, dPmid)
dJournal = parseJournal(pmXml, dPmid)
dPubType = parsePubType(pmXml, dPmid)
dPubHistory = parsePubHistory(pmXml, dPmid)
meshRes = parseMesh(pmXml, dPmid)
keywordRes = parseKeyword(pmXml, dPmid)
grantRes = parseGrant(pmXml, dPmid)
dChemical = parseChemical(pmXml, dPmid)
dDataBank = parseDataBank(pmXml, dPmid)
dComment = parseComment(pmXml, dPmid)
abstractRes = parseAbstract(pmXml, dPmid)
authorRes = parseAuthor(pmXml, dPmid)
investigatorRes = parseInvestigator(pmXml, dPmid)

}
\seealso{
\code{\link[=getCitation]{getCitation()}}, \code{\link[=modifyPubmedDb]{modifyPubmedDb()}}
}
