## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

vignette_file <- function(...) {
  candidates <- c(
    file.path(...),
    file.path("vignettes", ...),
    file.path("inst", "extdata", ...),
    file.path(Sys.getenv("PWD"), "inst", "extdata", ...),
    system.file("extdata", ..., package = "oncoPredict"),
    system.file("doc", ..., package = "oncoPredict")
  )
  candidates <- candidates[nzchar(candidates) & file.exists(candidates)]
  if (!length(candidates)) {
    stop("Could not find vignette file: ", file.path(...), call. = FALSE)
  }
  candidates[[1]]
}

## ----setup--------------------------------------------------------------------
library(oncoPredict)

#This vignette demonstrates how to prepare predicted drug response and mutation
#data for mutation-based IDWAS with idwas(cnv=FALSE).

#Determine the parameters of the idwas() function...
#Set the drug_prediction parameter.
#Make sure rownames() are samples, and colnames() are drugs. Also make sure this data is a data frame.
drug_prediction<-as.data.frame(read.table(vignette_file("DrugPredictions.txt"), header=TRUE, row.names=1))
#In this example, replace '.' with '-' so the TCGA sample identifiers match the
#format used in the mutation data.
colnames(drug_prediction)<-gsub(".", "-", colnames(drug_prediction), fixed=T)
#Make sure the sample identifiers in the 'drug prediction' data are of similar form as the sample identifiers in the 'data' parameter.
cols=colnames(drug_prediction)
colnames(drug_prediction)<-substring(cols, 3, nchar(cols))
drug_prediction<-as.data.frame(t(drug_prediction))


## ----mutation-download, eval=FALSE--------------------------------------------
# library(TCGAbiolinks)
# 
# query_maf <- GDCquery(project = "TCGA-GBM",
#                       data.category = "Simple Nucleotide Variation",
#                       access = "open",
#                       data.type = "Simple somatic mutation",
#                       legacy = TRUE)
# 
# GDCdownload(query_maf)
# maf <- GDCprepare(query_maf)

## ----mutation-formatting, eval=FALSE------------------------------------------
# #Make sure this data is a data frame with mutation annotations in columns.
# #For idwas(cnv=FALSE), the data should include Variant_Classification,
# #Hugo_Symbol, and Tumor_Sample_Barcode.
# data<-as.data.frame(maf)
# samps<-data$Tumor_Sample_Barcode
# data$Tumor_Sample_Barcode<-substr(samps,1,nchar(samps)-12) #Make sure these sample ids are of the same form as the sample ids in your prediction data.
# 
# #Determine the number of samples you want mutations to occur in. The default is 10.
# n=10
# 
# #Indicate whether or not you would like to test CNA amplification data. If TRUE, you will test CNA amplifications. If FALSE, you will test mutation data.
# cnv=FALSE