## ----global_options, include=FALSE--------------------------------------------
knitr::opts_chunk$set(fig.width = 6,
                      fig.height = 6,
                      fig.path = 'figures/')

## ----load, message = FALSE----------------------------------------------------
library(ropls)

## ----sacurine, message = FALSE------------------------------------------------
data(sacurine)
names(sacurine)

## ----attach_code, message = FALSE---------------------------------------------
attach(sacurine)

## ----strF---------------------------------------------------------------------
view(dataMatrix)
view(sampleMetadata)
view(variableMetadata)

## ----pca_code, eval = FALSE---------------------------------------------------
#  sacurine.pca <- opls(dataMatrix)

## ----pca_result, echo = FALSE-------------------------------------------------
sacurine.pca <- opls(dataMatrix, fig.pdfC = "none")

## ----pca_figure, echo = FALSE, fig.show = 'hold'------------------------------
plot(sacurine.pca)

## ----pca-col------------------------------------------------------------------
genderFc <- sampleMetadata[, "gender"]
plot(sacurine.pca,
     typeVc = "x-score",
     parAsColFcVn = genderFc)

## ----pca-col-personalized-----------------------------------------------------
plot(sacurine.pca,
     typeVc = "x-score",
     parAsColFcVn = genderFc,
     parLabVc = as.character(sampleMetadata[, "age"]),
     parPaletteVc = c("green4", "magenta"))

## ----plsda--------------------------------------------------------------------
sacurine.plsda <- opls(dataMatrix, genderFc)

## ----oplsda-------------------------------------------------------------------
sacurine.oplsda <- opls(dataMatrix, genderFc,
                        predI = 1, orthoI = NA)

## ----oplsda_subset, warning=FALSE---------------------------------------------
sacurine.oplsda <- opls(dataMatrix, genderFc,
                        predI = 1, orthoI = NA,
                        subset = "odd")

## ----train--------------------------------------------------------------------
trainVi <- getSubsetVi(sacurine.oplsda)
confusion_train.tb <- table(genderFc[trainVi], fitted(sacurine.oplsda))
confusion_train.tb

## ----test---------------------------------------------------------------------
confusion_test.tb <- table(genderFc[-trainVi],
                           predict(sacurine.oplsda, dataMatrix[-trainVi, ]))
confusion_test.tb

## ----overfit, echo = FALSE----------------------------------------------------
set.seed(123)
obsI <- 20
featVi <- c(2, 20, 200)
featMaxI <- max(featVi)
xRandMN <- matrix(runif(obsI * featMaxI), nrow = obsI)
yRandVn <- sample(c(rep(0, obsI / 2), rep(1, obsI / 2)))

layout(matrix(1:4, nrow = 2, byrow = TRUE))
for (featI in featVi) {
  randPlsi <- opls(xRandMN[, 1:featI], yRandVn,
                   predI = 2,
                   permI = ifelse(featI == featMaxI, 100, 0),
                   fig.pdfC = "none",
                   info.txtC = "none")
  plot(randPlsi, typeVc = "x-score",
       parCexN = 1.3, parTitleL = FALSE,
       parCexMetricN = 0.5)
  mtext(featI/obsI, font = 2, line = 2)
  if (featI == featMaxI)
    plot(randPlsi,
         typeVc = "permutation",
         parCexN = 1.3)
}
mtext(" obs./feat. ratio:",
      adj = 0, at = 0, font = 2,
      line = -2, outer = TRUE)

## ----vip----------------------------------------------------------------------
ageVn <- sampleMetadata[, "age"]

pvaVn <- apply(dataMatrix, 2,
               function(feaVn) cor.test(ageVn, feaVn)[["p.value"]])

vipVn <- getVipVn(opls(dataMatrix, ageVn,
                       predI = 1, orthoI = NA,
                       fig.pdfC = "none"))

quantVn <- qnorm(1 - pvaVn / 2)
rmsQuantN <- sqrt(mean(quantVn^2))

opar <- par(font = 2, font.axis = 2, font.lab = 2,
            las = 1,
            mar = c(5.1, 4.6, 4.1, 2.1),
            lwd = 2, pch = 16)

plot(pvaVn, vipVn,
     col = "red",
     pch = 16,
     xlab = "p-value", ylab = "VIP", xaxs = "i", yaxs = "i")

box(lwd = 2)

curve(qnorm(1 - x / 2) / rmsQuantN, 0, 1, add = TRUE, col = "red", lwd = 3)

abline(h = 1, col = "blue")
abline(v = 0.05, col = "blue")

par(opar)

## ----get_se-------------------------------------------------------------------
data(sacurine)
sac.se <- sacurine[["se"]]

## ----se_plsda, echo = TRUE, results = "hide"----------------------------------
sac.se <- opls(sac.se, "gender")

## ----se_updated, message = FALSE----------------------------------------------
library(SummarizedExperiment)
SummarizedExperiment::colData(sac.se)
SummarizedExperiment::rowData(sac.se)

## ----se_model-----------------------------------------------------------------
sac_opls.ls <- getOpls(sac.se)
names(sac_opls.ls)
plot(sac_opls.ls[["gender_PLSDA"]], typeVc = "x-score")

## ----get_eset-----------------------------------------------------------------
data("sacurine")
sac.set <- sacurine[["eset"]]
# viewing the ExpressionSet
# ropls::view(sac.set)

## ----eset_plsda, echo = TRUE, results = "hide"--------------------------------
# performing the PLS-DA
sac.plsda <- opls(sac.set, "gender")

## ----eset_updated-------------------------------------------------------------
sac.set <- getEset(sac.plsda)
library(Biobase)
head(Biobase::pData(sac.set))

## ----nci60_mae, message = FALSE-----------------------------------------------
data("NCI60")
nci.mae <- NCI60[["mae"]]

## ----mae_plsda----------------------------------------------------------------
nci.mae <- opls(nci.mae, "cancer",
                predI = 2, fig.pdfC = "none")

## ----mae_updated--------------------------------------------------------------
SummarizedExperiment::colData(nci.mae[["agilent"]])

## ----mae_model----------------------------------------------------------------
mae_opls.ls <- getOpls(nci.mae)
names(mae_opls.ls)
plot(mae_opls.ls[["agilent"]][["cancer_PLSDA"]], typeVc = "x-score")

## ----get_mds------------------------------------------------------------------
data("NCI60")
nci.mds <- NCI60[["mds"]]

## ----mds_plsda, echo = TRUE, results = "hide"---------------------------------
# Restricting to the "agilent" and "hgu95" datasets
nci.mds <- nci.mds[, c("agilent", "hgu95")]
# Restricting to the 'ME' and 'LE' cancer types
library(Biobase)
sample_names.vc <- Biobase::sampleNames(nci.mds[["agilent"]])
cancer_type.vc <- Biobase::pData(nci.mds[["agilent"]])[, "cancer"]
nci.mds <- nci.mds[sample_names.vc[cancer_type.vc %in% c("ME", "LE")], ]
# Building PLS-DA models for the cancer type
nci.plsda <- ropls::opls(nci.mds, "cancer", predI = 2)

## ----mds_getmset--------------------------------------------------------------
nci.mds <- ropls::getMset(nci.plsda)

## ----phenomis, eval = FALSE---------------------------------------------------
#  install.packages("devtools")
#  devtools::install_github("https://github.com/odisce/phenomis")
#  data(sacurine)
#  sacurine.se <- sacurine[["se"]]
#  library(phenomis)
#  phenomis::writing(sacurine.se, dir.c = getwd())

## ----detach-------------------------------------------------------------------
detach(sacurine)

## ----se_build-----------------------------------------------------------------
# Preparing the data (matrix) and sample and variable metadata (data frames):
data(sacurine, package = "ropls")
data.mn <- sacurine[["dataMatrix"]] # matrix: samples x variables
samp.df <- sacurine[["sampleMetadata"]] # data frame: samples x sample metadata
feat.df <- sacurine[["variableMetadata"]] # data frame: features x feature metadata

# Creating the SummarizedExperiment (package SummarizedExperiment)
library(SummarizedExperiment)
sac.se <- SummarizedExperiment(assays = list(sacurine = t(data.mn)),
                               colData = samp.df,
                               rowData = feat.df)
# note that colData and rowData main format is DataFrame, but data frames are accepted when building the object
stopifnot(validObject(sac.se))

# Viewing the SummarizedExperiment
# ropls::view(sac.se)

## ----mae_build_load-----------------------------------------------------------
data("NCI60_4arrays", package = "omicade4")

## ----mae_build, message = FALSE, warning=FALSE--------------------------------
library(MultiAssayExperiment)
# Building the individual SummarizedExperiment instances
experiment.ls <- list()
sampleMap.ls <- list()
for (set.c in names(NCI60_4arrays)) {
  # Getting the data and metadata
  assay.mn <- as.matrix(NCI60_4arrays[[set.c]])
  coldata.df <- data.frame(row.names = colnames(assay.mn),
                           .id = colnames(assay.mn),
                           stringsAsFactors = FALSE) # the 'cancer' information will be stored in the main colData of the mae, not the individual SummarizedExperiments
  rowdata.df <- data.frame(row.names = rownames(assay.mn),
                           name = rownames(assay.mn),
                           stringsAsFactors = FALSE)
  # Building the SummarizedExperiment
  assay.ls <- list(se = assay.mn)
  names(assay.ls) <- set.c
  se <- SummarizedExperiment(assays = assay.ls,
                             colData = coldata.df,
                             rowData = rowdata.df)
  experiment.ls[[set.c]] <- se
  sampleMap.ls[[set.c]] <- data.frame(primary = colnames(se),
                                      colname = colnames(se)) # both datasets use identical sample names
}
sampleMap <- listToMap(sampleMap.ls)

# The sample metadata are stored in the colData data frame (both datasets have the same samples)
stopifnot(identical(colnames(NCI60_4arrays[[1]]),
                    colnames(NCI60_4arrays[[2]])))
sample_names.vc <- colnames(NCI60_4arrays[[1]])
colData.df <- data.frame(row.names = sample_names.vc,
                         cancer = substr(sample_names.vc, 1, 2))

nci.mae <- MultiAssayExperiment(experiments = experiment.ls,
                                colData = colData.df,
                                sampleMap = sampleMap)

stopifnot(validObject(nci.mae))

## ----eset_build, message = FALSE, warning = FALSE-----------------------------
# Preparing the data (matrix) and sample and variable metadata (data frames):
data(sacurine)
data.mn <- sacurine[["dataMatrix"]] # matrix: samples x variables
samp.df <- sacurine[["sampleMetadata"]] # data frame: samples x sample metadata
feat.df <- sacurine[["variableMetadata"]] # data frame: features x feature metadata
# Creating the ExpressionSet (package Biobase)
sac.set <- Biobase::ExpressionSet(assayData = t(data.mn))
Biobase::pData(sac.set) <- samp.df
Biobase::fData(sac.set) <- feat.df
stopifnot(validObject(sac.set))
# Viewing the ExpressionSet
# ropls::view(sac.set)

## ----mset_build_load----------------------------------------------------------
data("NCI60_4arrays", package = "omicade4")

## ----mset_build, message = FALSE, warning=FALSE-------------------------------
library(MultiDataSet)
# Creating the MultiDataSet instance
nci.mds <- MultiDataSet::createMultiDataSet()
# Adding the two datasets as ExpressionSet instances
for (set.c in names(NCI60_4arrays)) {
  # Getting the data
  expr.mn <- as.matrix(NCI60_4arrays[[set.c]])
  pdata.df <- data.frame(row.names = colnames(expr.mn),
                        cancer = substr(colnames(expr.mn), 1, 2),
                        stringsAsFactors = FALSE)
  fdata.df <- data.frame(row.names = rownames(expr.mn),
                        name = rownames(expr.mn),
                        stringsAsFactors = FALSE)
  # Building the ExpressionSet
  eset <- Biobase::ExpressionSet(assayData = expr.mn,
                                 phenoData = new("AnnotatedDataFrame",
                                                 data = pdata.df),
                                 featureData = new("AnnotatedDataFrame",
                                                   data = fdata.df),
                                 experimentData = new("MIAME",
                                                      title = set.c))
  # Adding to the MultiDataSet
  nci.mds <- MultiDataSet::add_eset(nci.mds, eset, dataset.type = set.c,
                                     GRanges = NA, warnings = FALSE)
}
stopifnot(validObject(nci.mds))

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()