% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/semantic_enrich.R
\name{metavariable_agg}
\alias{metavariable_agg}
\title{Aggregate Data by Metavariable}
\usage{
metavariable_agg(graph, data, label_attr = "name", normalize_vals = TRUE)
}
\arguments{
\item{graph}{Graph containing ontological and dataset nodes. Must be in
\code{\link{tidygraph}} format or coercible to this format. Must have been
processed using \code{\link{metavariable_info}}.}

\item{data}{Numeric data frame or matrix containing variables which are also
in \code{graph}.}

\item{label_attr}{Node attribute containing labels used for column names when
creating metavariable aggregations. Default: "name"}

\item{normalize_vals}{Should values be normalized before aggregation?
Default: TRUE}
}
\value{
\code{data} with semantic aggregations derived from common
  ontological ancestry (metavariables) appended as new columns, each
  prefixed with "MV_" and suffixed by their aggregation function (e.g. "_SUM").
}
\description{
Variables in a numeric data frame are aggregated into metavariables via
their most informative common ancestors identified in an ontological graph
object (see \code{\link{metavariable_info}}). Metavariables are appended to
the data frame.
}
\details{
Metavariables are created from the aggregation of data variables via their
most informative common ancestor (expected to have been calculated in
\code{\link{metavariable_info}}). Metavariables are labelled using the
syntax: \code{MV_[label_attr]_[Aggregation function]}. The data variables are
aggregated row-wise by their maximum, minimum, mean, sum, and product.
Metavariables with zero entropy (no information) are not appended to the
data. See examples for where this function should be applied in the semantic
enrichment workflow.
}
\note{
A warning may be shown regarding the '.add' argument being deprecated,
  this is believed to be an issue with
  \code{\link[tidygraph:tidygraph]{tidygraph}} which may be resolved in a
  future release: <https://github.com/thomasp85/tidygraph/issues/131>.
  Another warning may be shown regarding the 'neimode' argument being
  deprecated, this is believed to be an issue with
  \code{\link[tidygraph:tidygraph]{tidygraph}} which may be resolved in a
  future release: <https://github.com/thomasp85/tidygraph/issues/156>. These
  warning messages are not believed to have an effect on the functionality of
  'eHDPrep'.
}
\examples{
require(magrittr)
require(dplyr)
data(example_ontology)
data(example_mapping_file)
data(example_data)

#' # define datatypes
tibble::tribble(~"var", ~"datatype",
"patient_id", "id",
"tumoursize", "numeric",
"t_stage", "ordinal_tstage",
"n_stage", "ordinal_nstage",
"diabetes_merged", "character",
"hypertension", "factor",
"rural_urban", "factor",
"marital_status", "factor",
"SNP_a", "genotype",
"SNP_b", "genotype",
"free_text", "freetext") -> data_types

# create post-QC data
example_data \%>\%
  merge_cols(diabetes_type, diabetes, "diabetes_merged", rm_in_vars = TRUE) \%>\%
  apply_quality_ctrl(patient_id, data_types,
                     bin_cats =c("No" = "Yes", "rural" = "urban"),
                     to_numeric_matrix = TRUE) \%>\%
                     suppressMessages() ->
                     post_qc_data

# minimal example on first four coloums of example data:
dplyr::slice(example_ontology, 1:7,24) \%>\%
   join_vars_to_ontol(example_mapping_file[1:3,], root = "root") \%>\%
   metavariable_info() \%>\%
   metavariable_agg(post_qc_data[1:10,1:4]) -> res
# see Note section of documentation for information on possible warnings.

# summary of result:
tibble::glimpse(res)

\donttest{
# full example:
example_ontology \%>\%
   join_vars_to_ontol(example_mapping_file, root = "root") \%>\%
   metavariable_info() \%>\%
   metavariable_agg(post_qc_data) -> res
 # see Note section of documentation for information on possible warnings.

# summary of result:
tibble::glimpse(res)
}
}
\seealso{
Other semantic enrichment functions: 
\code{\link{join_vars_to_ontol}()},
\code{\link{metavariable_info}()},
\code{\link{metavariable_variable_descendants}()}
}
\concept{semantic enrichment functions}
