## ----eval=TRUE,echo=FALSE,out.width="100%",fig.cap="Predictor design."--------
knitr::opts_chunk$set(crop=NULL)
knitr::include_graphics("vignette_psn.png")

## ----eval=TRUE,echo=FALSE,out.width="80%",fig.cap="Workflow."-----------------
knitr::include_graphics("vignette_workflow.png")

## ----eval=TRUE----------------------------------------------------------------
suppressWarnings(suppressMessages(require(netDx)))

## ----eval=TRUE----------------------------------------------------------------
suppressMessages(library(curatedTCGAData))

## ----eval=TRUE----------------------------------------------------------------
curatedTCGAData(diseaseCode="BRCA", assays="*",dry.run=TRUE,version="1.1.38")

## ----eval=TRUE----------------------------------------------------------------
brca <- suppressMessages(
   curatedTCGAData("BRCA",
               c("mRNAArray","miRNA*","Methylation_methyl27*"),
	dry.run=FALSE,version="1.1.38"))

## ---- class.source="codeblock",eval=TRUE--------------------------------------
brca

## ---- class.source="codeblock",eval=TRUE--------------------------------------
summary(assays(brca))

## ---- class.source="codeblock",eval=TRUE--------------------------------------
names(assays(brca))

## ---- class.source="codeblock",eval=TRUE--------------------------------------
mir <- assays(brca)[["BRCA_miRNASeqGene-20160128"]]
head(mir[,1:5])

## ---- class.source="codeblock",eval=TRUE--------------------------------------
pheno <- colData(brca)
colnames(pheno)[1:20]
head(pheno[,1:5])

## ----eval=TRUE----------------------------------------------------------------
source("prepare_data.R")
brca <- prepareData(brca,setBinary=TRUE)

## ----eval=TRUE----------------------------------------------------------------
pID <- colData(brca)$patientID
colData(brca)$ID <- pID

## ----eval=TRUE----------------------------------------------------------------
set.seed(123)
dsets <- subsampleValidationData(brca,pctValidation=0.1)
brca <- dsets$trainMAE
holdout <- dsets$validationMAE

## ----eval=TRUE----------------------------------------------------------------
groupList <- list()

# genes in mRNA data are grouped by pathways
pathFile <- sprintf("%s/extdata/pathway_ex3.gmt", path.package("netDx"))
pathList <- suppressMessages(readPathways(pathFile))
groupList[["BRCA_mRNAArray-20160128"]] <- pathList

## ---- class.source="codeblock",eval=TRUE--------------------------------------
summary(groupList)

## ---- class.source="codeblock",eval=TRUE--------------------------------------
names(groupList[["BRCA_mRNAArray-20160128"]])

## ----eval=TRUE----------------------------------------------------------------
length(groupList[["BRCA_mRNAArray-20160128"]][[1]])

## ----eval=TRUE----------------------------------------------------------------
head(groupList[["BRCA_mRNAArray-20160128"]][[1]])

## ----eval=TRUE----------------------------------------------------------------
groupList[["clinical"]] <- list(
    age="patient.age_at_initial_pathologic_diagnosis",
	  stage="STAGE"
)

## ----eval=TRUE----------------------------------------------------------------
tmp <- list(rownames(experiments(brca)[[1]]));
names(tmp) <- names(brca)[1]
groupList[[names(brca)[[1]]]] <- tmp

tmp <- list(rownames(experiments(brca)[[2]]));
names(tmp) <- names(brca)[2]
groupList[[names(brca)[2]]] <- tmp

tmp <- list(rownames(experiments(brca)[[3]]));
names(tmp) <- names(brca)[3]
groupList[[names(brca)[3]]] <- tmp

## ----eval=TRUE----------------------------------------------------------------
sims <- list(
  a="pearsonCorr",
  b="normDiff",
  c="pearsonCorr",
  d="pearsonCorr"
  )

# map layer names to sims
names(sims) <- names(groupList)

## ----eval=TRUE----------------------------------------------------------------
nco <- round(parallel::detectCores()*0.75) # use 75% available cores
message(sprintf("Using %i of %i cores", nco, parallel::detectCores()))

outDir <- paste(tempdir(),"pred_output",sep=getFileSep()) # use absolute path
if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
numSplits <- 2L

## ----eval=TRUE----------------------------------------------------------------
t0 <- Sys.time()
model <- suppressMessages(
buildPredictor(
	dataList=brca,			## your data
	groupList=groupList,	## grouping strategy
	sims=sims,
	outDir=outDir, 			## output directory
	trainProp=0.8,			## pct of samples to use to train model in
							    ## each split
	numSplits=2L,			 ## number of train/test splits
 	featSelCutoff=1L,		## threshold for calling something
							    ## feature-selected
	featScoreMax=2L,	## max score for feature selection
 numCores=nco,			  ## set higher for parallelizing
 debugMode=FALSE,
 keepAllData=FALSE,	    ## set to TRUE for debugging
 logging="none"     ## set to "default" for messages
  ))
t1 <- Sys.time()
print(t1-t0)

## ----eval=TRUE----------------------------------------------------------------
results <- getResults(
    model,
    unique(colData(brca)$STATUS),
    featureSelCutoff=2L,
    featureSelPct=0.50
  )

## ---- class.source="codeblock",eval=TRUE--------------------------------------
summary(results)

## ---- class.source="codeblock",eval=TRUE--------------------------------------
results$performance

## ---- class.source="codeblock", eval=TRUE-------------------------------------
results$featureScores

## ---- class.source="codeblock",eval=TRUE--------------------------------------
confMat <- confusionMatrix(model)

## ---- class.source="codeblock",eval=TRUE--------------------------------------
results$selectedFeatures

## ----eval=TRUE----------------------------------------------------------------
outDir <- paste(tempdir(), randAlphanumString(), 
  sep = getFileSep())
if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
dir.create(outDir)

predModel <- suppressMessages(
  predict(trainMAE=brca, testMAE=holdout, 
    groupList=groupList, 
    selectedFeatures=results$selectedFeatures, 
    sims=sims,
    outDir=outDir, verbose = FALSE)
)

## ----eval=TRUE----------------------------------------------------------------
perf <- getPerformance(predModel, 
  unique(colData(brca)$STATUS))
summary(perf)

## ----eval=TRUE----------------------------------------------------------------
plotPerf_multi(list(perf$rocCurve),
  plotTitle = sprintf(
    "BRCA Validation: %i samples", 
    nrow(colData(holdout))))
plotPerf_multi(list(perf$prCurve), 
  plotType = "PR",
  plotTitle = sprintf(
    "BRCA Validation: %i samples", 
    nrow(colData(holdout))))


## ---- class.source="codeblock",fig.width=8,fig.height=8, eval=TRUE------------
## this call doesn't work in Rstudio; for now we've commented this out and saved the PSN file. 
psn <- suppressMessages(getPSN(
  brca,
  groupList,
  sims=sims,
  selectedFeatures=results$selectedFeatures
))

## -----------------------------------------------------------------------------
library(Rtsne)
tsne <- tSNEPlotter(
	psn$patientSimNetwork_unpruned, 
	colData(brca)
	)

## ----eval=TRUE----------------------------------------------------------------
sessionInfo()