% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genDataGetPart.R
\name{genDataGetPart}
\alias{genDataGetPart}
\title{Extracting part of genetic data.}
\usage{
genDataGetPart(data.in = stop("No data given!", call. = FALSE),
  design = stop("Design type must be given!"), markers, indiv.ids, rows,
  cc, sex, file.out = "my_data_part", dir.out = ".",
  overwrite = NULL, ...)
}
\arguments{
\item{data.in}{The data object (in format as the output of \link{genDataRead}).}

\item{design}{The design used in the study - choose from:
  \itemize{
    \item \emph{triad} - (default), data includes genotypes of mother, father and child;
    \item \emph{cc} - classical case-control;
    \item \emph{cc.triad} - hybrid design: triads with cases and controls;
  }.

Any of the following can be given to narrow down the dataset:}

\item{markers}{Numeric vector with numbers indicating which markers to choose.}

\item{indiv.ids}{Character vector giving IDs of individuals. \strong{CAUTION:}
in a standard PED file, individual IDs are not unique, so this will select
all individuals with given IDs.}

\item{rows}{Numeric vector giving the positions - this will select only these rows.}

\item{cc}{One or more values to choose based on case-control status ('cc' column).}

\item{sex}{One or more values to choose based on the 'sex' column.}

\item{file.out}{The base for the output filename (default: "my_data_part").}

\item{dir.out}{The path to the directory where the output files will be saved.}

\item{overwrite}{Whether to overwrite the output files: if NULL (default), will prompt
the user to give answer; set to TRUE, will automatically overwrite any existing files;
and set to FALSE, will stop if the output files exist.}

\item{...}{If any additional covariate data are available in \code{data.in}, 
the user can choose based on values of these (see the Examples section).}
}
\value{
A list object with three elements:
  \itemize{
    \item \emph{cov.data} - a \code{data.frame} with covariate data (if available in
       the input file)
    \item \emph{gen.data} - a list with chunks of the genetic data; the data is divided
       column-wise, using 10,000 columns per chunk; each element of this list is a
       \link[ff]{ff} matrix
    \item \emph{aux} - a list with meta-data and important parameters.
  }
  This now contains only the selected subset of data.
}
\description{
This function enables to extract (and save for later use) part of genetic data
read in with \link{genDataRead}.
}
\details{
The genetic data from GWAS studies can be quite large, and thus the analysis
is time-consuming. If a user knows where they want to focus the analysis,
they can use this function to extract part of the entire dataset and use
only this part in subsequent Haplin analysis.
}
\section{Warning}{

No checks are performed when choosing a subset of the data - it is the user's
obligation to check whether the data subset contains correct number of individuals
(especially important when using the \code{triad} design study) and/or markers!
}

\examples{
  # The argument 'overwrite' is set to TRUE!
  # Read the data:
  examples.dir <- system.file( "extdata", package = "Haplin" )
  example.file <- paste0( examples.dir, "/HAPLIN.trialdata2.txt" )
  my.gen.data.read <- genDataRead( file.in = example.file, file.out = "trial_data",
   dir.out = ".", format = "haplin", allele.sep = "", n.vars = 2, cov.header = 
   c( "smoking", "sex" ), overwrite = TRUE )
  my.gen.data.read
  # Extract part with only men:
  men.subset <- genDataGetPart( my.gen.data.read, design = "triad", sex = 1,
    dir.out = ".", file.out = "gen_data_men_only", overwrite = TRUE )
  men.subset
  # Extract the part with only smoking women:
  women.smoke.subset <- genDataGetPart( my.gen.data.read, design = "triad",
    sex = 0, smoking = c( 1,2 ), overwrite = TRUE )
  women.smoke.subset 

}
