## ----style, echo = FALSE, results = 'asis'--------------------------------------------------------
BiocStyle::markdown()
options(width = 100, max.print = 1000)
knitr::opts_chunk$set(
    eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), 
    tidy.opts = list(width.cutoff = 100), tidy = FALSE)

## ----setting, eval=TRUE, echo=FALSE---------------------------------------------------------------
if (file.exists("spr_project")) unlink("spr_project", recursive = TRUE)
is_win <- Sys.info()[['sysname']] != "Windows"

## ----load_library, eval=TRUE, include=FALSE-------------------------------------------------------
suppressPackageStartupMessages({
    library(systemPipeR)
})

## ----utilities, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Important functionalities of systemPipeR. (A) Illustration of workflow design concepts, and (B) examples of visualization functionalities for NGS data.", warning=FALSE----
knitr::include_graphics("images/utilities.png")

## ----sysargslistImage, warning= FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Workflow management class. Workflows are defined and managed by the `SYSargsList` (`SAL`) control class. Components of `SAL` include `SYSargs2` and/or `LineWise` for defining CL- and R-based workflow steps, respectively. The former are constructed from a `targets` and two CWL *param* files, and the latter comprises mainly R code.", warning=FALSE----

knitr::include_graphics("images/SYSargsList.png")

## ----sprandCWL, warning=FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Parameter files. Illustration how the different fields in cwl, yml and targets files are connected to assemble command-line calls, here for 'Hello World' example.", warning=FALSE----

knitr::include_graphics("images/SPR_CWL_hello.png")

## ----general, warning= FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Overview of `systemPipeR` workflows management instances. (A) A typical analysis workflow requires multiple software tools (red), metadata for describing the input (green) and output data, and analysis reports for interpreting the results (purple). B) The environment provides utilities for designing and building workflows containing R and/or command-line steps, for managing the workflow runs. C) Options are provided to execute single or multiple workflow steps. This includes a high level of scalability, functionalities for checkpointing, and generating of technical and scientific reports.", warning=FALSE----

knitr::include_graphics("images/general.png")

## ----install, eval=FALSE--------------------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager")
# BiocManager::install("systemPipeR")
# BiocManager::install("systemPipeRdata")

## ----eval=FALSE-----------------------------------------------------------------------------------
# systemPipeRdata::genWorkenvir(workflow = "rnaseq")
# setwd("rnaseq")

## ----eval=FALSE-----------------------------------------------------------------------------------
# library(systemPipeR)
# # Initialize workflow project
# sal <- SPRproject()
# ## Creating directory '/home/myuser/systemPipeR/rnaseq/.SPRproject'
# ## Creating file '/home/myuser/systemPipeR/rnaseq/.SPRproject/SYSargsList.yml'
# sal <- importWF(sal, file_path = "systemPipeRNAseq.Rmd") # import into sal the WF steps defined by chosen Rmd file
# 
# ## The following print statements, issued during the import, are shortened for brevity
# ## Import messages for first 3 of 20 steps total
# ## Parse chunk code
# ## Now importing step 'load_SPR'
# ## Now importing step 'preprocessing'
# ## Now importing step 'trimming'
# ## Now importing step '...'
# ## ...
# 
# ## Now check if required CL tools are installed
# ## Messages for 4 of 7 CL tools total
# ##        step_name         tool in_path
# ## 1       trimming  trimmomatic    TRUE
# ## 2   hisat2_index hisat2-build    TRUE
# ## 3 hisat2_mapping       hisat2    TRUE
# ## 4 hisat2_mapping     samtools    TRUE
# ## ...

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal
# ## Instance of 'SYSargsList':
# ##     WF Steps:
# ##        1. load_SPR --> Status: Pending
# ##        2. preprocessing --> Status: Pending
# ##            Total Files: 36 | Existing: 0 | Missing: 36
# ##          2.1. preprocessReads-pe
# ##              cmdlist: 18 | Pending: 18
# ##        3. trimming --> Status: Pending
# ##            Total Files: 72 | Existing: 0 | Missing: 72
# ##        4. - 20. not shown here for brevity

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal <- runWF(sal)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal

## ----wf-status-image, warning=FALSE, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Status check of workflow. The run status flags of each workflow step are given in its summary view.", warning=FALSE----

knitr::include_graphics("images/runwf.png")

## ----eval=FALSE-----------------------------------------------------------------------------------
# plotWF(sal)

## ----rnaseq-toplogy, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Toplogy graph of RNA-Seq workflow.", warning=FALSE----
knitr::include_graphics("images/plotWF.png")

## ----eval=FALSE-----------------------------------------------------------------------------------
# # Scietific report
# sal <- renderReport(sal)
# rmarkdown::render("systemPipeRNAseq.Rmd", clean = TRUE, output_format = "BiocStyle::html_document")
# 
# # Technical (log) report
# sal <- renderLogs(sal)

## ----dir, eval=TRUE, echo=FALSE, warning= FALSE, out.width="100%", fig.align = "center", fig.cap= "Directory structure of workflows.", warning=FALSE----
knitr::include_graphics("images/spr_project.png")  

## ----targetsSE, eval=TRUE-------------------------------------------------------------------------
targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR") 
showDF(read.delim(targetspath, comment.char = "#"))

## ----targetsPE, eval=TRUE-------------------------------------------------------------------------
targetspath <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
showDF(read.delim(targetspath, comment.char = "#"))

## ----comment_lines, echo=TRUE---------------------------------------------------------------------
readLines(targetspath)[1:4]

## ----targetscomp, eval=TRUE-----------------------------------------------------------------------
readComp(file = targetspath, format = "vector", delim = "-")

## ----targetsFig, eval=TRUE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "_`systemPipeR`_ automatically creates the downstream `targets` files based on the previous steps outfiles. A) Usually, users provide the initial `targets` files, and this step will generate some outfiles, as demonstrated on B. Then, those files are used to build the new `targets` files as inputs in the next step. _`systemPipeR`_ (C) manages this connectivity among the steps automatically for the users.", warning=FALSE----
knitr::include_graphics("images/targets_con.png")  

## ----cleaning1, eval=TRUE, include=FALSE----------------------------------------------------------
if (file.exists(".SPRproject")) unlink(".SPRproject", recursive = TRUE)
## NOTE: Removes previous project created in the quick-start section

## ----SPRproject1a, eval=FALSE---------------------------------------------------------------------
# sal <- SPRproject()

## ----SPRproject1, eval=TRUE-----------------------------------------------------------------------
sal <- SPRproject(projPath = getwd(), overwrite = TRUE) 

## ----SPRproject_dir, eval=FALSE-------------------------------------------------------------------
# sal <- SPRproject(data = "data", param = "param", results = "results")

## ----SPRproject_logs, eval=FALSE------------------------------------------------------------------
# sal <- SPRproject(logs.dir= ".SPRproject", sys.file=".SPRproject/SYSargsList.yml")

## ----SPRproject_env, eval=FALSE-------------------------------------------------------------------
# sal <- SPRproject(envir = new.env())

## ----projectInfo, eval=TRUE-----------------------------------------------------------------------
sal
projectInfo(sal)

## ----length, eval=TRUE----------------------------------------------------------------------------
length(sal)

## ----sal_check, eval=TRUE-------------------------------------------------------------------------
sal

## ----firstStep_R, eval=TRUE-----------------------------------------------------------------------
appendStep(sal) <- LineWise(code = {
                              mapply(function(x, y) write.csv(x, y),
                                     split(iris, factor(iris$Species)),
                                     file.path("results", paste0(names(split(iris, factor(iris$Species))), ".csv"))
                                     ) 
                            },
                            step_name = "export_iris")

## ----show, eval=TRUE------------------------------------------------------------------------------
sal

## ----codeLine, eval=TRUE--------------------------------------------------------------------------
codeLine(sal)

## ----gzip_secondStep, eval=TRUE-------------------------------------------------------------------
targetspath <- system.file("extdata/cwl/gunzip", "targets_gunzip.txt", package = "systemPipeR")
appendStep(sal) <- SYSargsList(step_name = "gzip", 
                      targets = targetspath, dir = TRUE,
                      wf_file = "gunzip/workflow_gzip.cwl", input_file = "gunzip/gzip.yml",
                      dir_path = system.file("extdata/cwl", package = "systemPipeR"),
                      inputvars = c(FileName = "_FILE_PATH_", SampleName = "_SampleName_"), 
                      dependency = "export_iris")

## -------------------------------------------------------------------------------------------------
sal

## -------------------------------------------------------------------------------------------------
cmdlist(sal, step = "gzip")
# cmdlist(sal, step = "gzip", targets=c("SE"))

## -------------------------------------------------------------------------------------------------
# outfiles(sal) # output files of all steps in sal
outfiles(sal)['gzip'] # output files of 'gzip' step
# colnames(outfiles(sal)$gzip) # returns column name passed on to `inputvars`

## ----gunzip, eval=TRUE----------------------------------------------------------------------------
appendStep(sal) <- SYSargsList(step_name = "gunzip", 
                      targets = "gzip", dir = TRUE,
                      wf_file = "gunzip/workflow_gunzip.cwl", input_file = "gunzip/gunzip.yml",
                      dir_path = system.file("extdata/cwl", package = "systemPipeR"),
                      inputvars = c(gzip_file = "_FILE_PATH_", SampleName = "_SampleName_"), 
                      rm_targets_col = "FileName", 
                      dependency = "gzip")

## -------------------------------------------------------------------------------------------------
sal

## ----targetsWF_3, eval=TRUE-----------------------------------------------------------------------
targetsWF(sal['gunzip'])

## ----outfiles_2, eval=TRUE------------------------------------------------------------------------
outfiles(sal['gunzip'])

## ----eval=TRUE------------------------------------------------------------------------------------
cmdlist(sal["gunzip"], targets = 1)

## ----getColumn, eval=TRUE-------------------------------------------------------------------------
getColumn(sal, step = "gunzip", 'outfiles')

## ----iris_stats, eval=TRUE------------------------------------------------------------------------
appendStep(sal) <- LineWise(code = {
                    df <- lapply(getColumn(sal, step = "gunzip", 'outfiles'), function(x) read.delim(x, sep = ",")[-1])
                    df <- do.call(rbind, df)
                    stats <- data.frame(cbind(mean = apply(df[,1:4], 2, mean), sd = apply(df[,1:4], 2, sd)))
                    stats$size <- rownames(stats)
                    
                    plot <- ggplot2::ggplot(stats, ggplot2::aes(x = size, y = mean, fill = size)) + 
                      ggplot2::geom_bar(stat = "identity", color = "black", position = ggplot2::position_dodge()) +
                      ggplot2::geom_errorbar(ggplot2::aes(ymin = mean-sd, ymax = mean+sd), width = .2, position = ggplot2::position_dodge(.9)) 
                    },
                    step_name = "iris_stats", 
                    dependency = "gzip")

## -------------------------------------------------------------------------------------------------
sal

## ----importWF_rmd, eval=TRUE----------------------------------------------------------------------
sal_rmd <- SPRproject(logs.dir = ".SPRproject_rmd") 

sal_rmd <- importWF(sal_rmd, 
                file_path = system.file("extdata", "spr_simple_wf.Rmd", package = "systemPipeR"))

## ----importWF_details, eval=FALSE-----------------------------------------------------------------
# sal_rmd
# stepsWF(sal_rmd)
# dependency(sal_rmd)
# cmdlist(sal_rmd)
# codeLine(sal_rmd)
# targetsWF(sal_rmd)
# statusWF(sal_rmd)

## ----fromFile_example_rules_cmd, eval=FALSE-------------------------------------------------------
# targetspath <- system.file("extdata/cwl/example/targets_example.txt", package = "systemPipeR")
# appendStep(sal) <- SYSargsList(step_name = "Example",
#                       targets = targetspath,
#                       wf_file = "example/example.cwl", input_file = "example/example.yml",
#                       dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                       inputvars = c(Message = "_STRING_", SampleName = "_SAMPLE_"))

## ----fromFile_example_rules_r, eval=FALSE---------------------------------------------------------
# appendStep(sal) <- LineWise(code = {
#                               library(systemPipeR)
#                             },
#                             step_name = "load_lib")

## ----runWF, eval=is_win---------------------------------------------------------------------------
sal <- runWF(sal)

## ----runWF_error, eval=FALSE----------------------------------------------------------------------
# sal <- runWF(sal, steps = c(1,3))

## ----runWF_force, eval=FALSE----------------------------------------------------------------------
# sal <- runWF(sal, force = TRUE, warning.stop = FALSE, error.stop = TRUE)

## ----runWF_env, eval=FALSE------------------------------------------------------------------------
# viewEnvir(sal)

## ----runWF_saveenv, eval=FALSE--------------------------------------------------------------------
# sal <- runWF(sal, saveEnv = TRUE)

## ----show_statusWF_details1, eval=TRUE------------------------------------------------------------
sal

## ----show_statusWF_details2, eval=FALSE-----------------------------------------------------------
# stepsWF(sal)
# dependency(sal)
# cmdlist(sal)
# codeLine(sal)
# targetsWF(sal)
# statusWF(sal)
# projectInfo(sal)

## ----save_sal, eval=FALSE-------------------------------------------------------------------------
# sal <- write_SYSargsList(sal)

## ----module_cmds, eval=FALSE----------------------------------------------------------------------
# module(action_type="load", module_name="hisat2")
# moduleload("hisat2") # alternative command
# moduleunload("hisat2")
# modulelist() # list software loaded into current session
# moduleAvail() # list all software available in module system

## ----list_module, eval=FALSE----------------------------------------------------------------------
# listCmdModules(sal)
# listCmdTools(sal)

## ----runWF_cluster, eval=FALSE--------------------------------------------------------------------
# resources <- list(conffile=".batchtools.conf.R",
#                   template="batchtools.slurm.tmpl",
#                   Njobs=18,
#                   walltime=120, ## in minutes
#                   ntasks=1,
#                   ncpus=4,
#                   memory=1024, ## in Mb
#                   partition = "short"
#                   )
# sal <- addResources(sal, step=c("hisat2_mapping"), resources = resources)
# sal <- runWF(sal)

## ----eval=TRUE------------------------------------------------------------------------------------
plotWF(sal, show_legend = TRUE, width = "80%", rstudio = TRUE)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal <- renderReport(sal)
# 
# rmarkdown::render("my.Rmd", clean = TRUE, output_format = "BiocStyle::html_document")

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal <- renderLogs(sal)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal2rmd(sal)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal2bash(sal)

## ----SPR_resume, eval=FALSE-----------------------------------------------------------------------
# sal <- SPRproject(resume = TRUE)

## ----resume_load, eval=FALSE----------------------------------------------------------------------
# sal <- SPRproject(resume = TRUE, load.envir = TRUE)

## ----envir, eval=FALSE----------------------------------------------------------------------------
# viewEnvir(sal)
# copyEnvir(sal, list="plot", new.env = globalenv())

## ----restart_load, eval=FALSE---------------------------------------------------------------------
# sal <- SPRproject(restart = TRUE)

## ----SPR_overwrite, eval=FALSE--------------------------------------------------------------------
# sal <- SPRproject(overwrite = TRUE)

## -------------------------------------------------------------------------------------------------
length(sal)

## -------------------------------------------------------------------------------------------------
stepName(sal)

## -------------------------------------------------------------------------------------------------
listCmdTools(sal)

## -------------------------------------------------------------------------------------------------
listCmdModules(sal)
modules(stepsWF(sal)$gzip)

## -------------------------------------------------------------------------------------------------
names(sal)

## -------------------------------------------------------------------------------------------------
stepsWF(sal)

## -------------------------------------------------------------------------------------------------
names(stepsWF(sal)$gzip)

## -------------------------------------------------------------------------------------------------
statusWF(sal)

## -------------------------------------------------------------------------------------------------
targetsWF(sal[2])

## ----eval=FALSE-----------------------------------------------------------------------------------
# targetsheader(sal, step = "Quality")

## -------------------------------------------------------------------------------------------------
outfiles(sal[2])

## -------------------------------------------------------------------------------------------------
dependency(sal)

## -------------------------------------------------------------------------------------------------
SampleName(sal, step = "gzip")

## -------------------------------------------------------------------------------------------------
getColumn(sal, "outfiles", step = "gzip", column = "gzip_file")
getColumn(sal, "targetsWF", step = "gzip", column = "FileName")

## -------------------------------------------------------------------------------------------------
yamlinput(sal, step = "gzip")

## -------------------------------------------------------------------------------------------------
cmdlist(sal, step = c(2,3), targets = 1)

## -------------------------------------------------------------------------------------------------
codeLine(sal, step = "export_iris")

## -------------------------------------------------------------------------------------------------
viewEnvir(sal)

## -------------------------------------------------------------------------------------------------
copyEnvir(sal, list = c("plot"), new.env = globalenv(), silent = FALSE)

## -------------------------------------------------------------------------------------------------
length(sal)
sal[1:2]

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal_sub <- subset(sal, subset_steps = c(2,3), input_targets = c("SE", "VE"), keep_steps = TRUE)
# stepsWF(sal_sub)
# targetsWF(sal_sub)
# outfiles(sal_sub)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal[1] + sal[2] + sal[3]

## ----eval=TRUE------------------------------------------------------------------------------------
## create a copy of sal for testing
sal_c <- sal
## view original value, here restricted to 'ext' slot
yamlinput(sal_c, step = "gzip")$ext
## Replace value under 'ext' 
yamlinput(sal_c, step = "gzip", paramName = "ext") <- "txt.gz"
## view modified value, here restricted to 'ext' slot
yamlinput(sal_c, step = "gzip")$ext
## Evaluate resulting CL call
cmdlist(sal_c, step = "gzip", targets = 1)

## ----sal_lw_rep, eval=TRUE------------------------------------------------------------------------
appendCodeLine(sal_c, step = "export_iris", after = 1) <- "log_cal_100 <- log(100)"
codeLine(sal_c, step = "export_iris")

## ----sal_lw_rep2, eval=TRUE-----------------------------------------------------------------------
replaceCodeLine(sal_c, step="export_iris", line = 2) <- LineWise(code={
                    log_cal_100 <- log(50)
                    })
codeLine(sal_c, step = "export_iris")

## -------------------------------------------------------------------------------------------------
renameStep(sal_c, c(1, 2)) <- c("newStep2", "newIndex")
sal_c
names(outfiles(sal_c))
names(targetsWF(sal_c))
dependency(sal_c)

## ----eval=FALSE-----------------------------------------------------------------------------------
# sal_test <- sal[c(1,2)]
# replaceStep(sal_test, step = 1, step_name = "gunzip" ) <- sal[3]
# sal_test

## -------------------------------------------------------------------------------------------------
sal_test <- sal[-2]
sal_test

## -------------------------------------------------------------------------------------------------
dir_path <- system.file("extdata/cwl", package = "systemPipeR")
cwl <- yaml::read_yaml(file.path(dir_path, "example/example.cwl"))

## -------------------------------------------------------------------------------------------------
cwl[1:2]

## -------------------------------------------------------------------------------------------------
cwl[3]

## -------------------------------------------------------------------------------------------------
cwl[4]

## -------------------------------------------------------------------------------------------------
cwl[5]

## -------------------------------------------------------------------------------------------------
cwl[6]

## -------------------------------------------------------------------------------------------------
cwl.wf <- yaml::read_yaml(file.path(dir_path, "example/workflow_example.cwl"))

## -------------------------------------------------------------------------------------------------
cwl.wf[1:2]

## -------------------------------------------------------------------------------------------------
cwl.wf[3]

## -------------------------------------------------------------------------------------------------
cwl.wf[4]

## -------------------------------------------------------------------------------------------------
cwl.wf[5]

## -------------------------------------------------------------------------------------------------
yaml::read_yaml(file.path(dir_path, "example/example_single.yml"))

## ----fromFile, eval=TRUE--------------------------------------------------------------------------
HW <- SYSargsList(wf_file = "example/workflow_example.cwl", 
                  input_file = "example/example_single.yml", 
                  dir_path = system.file("extdata/cwl", package = "systemPipeR"))
HW
cmdlist(HW)

## -------------------------------------------------------------------------------------------------
yml <- yaml::read_yaml(file.path(dir_path, "example/example.yml"))
yml

## -------------------------------------------------------------------------------------------------
targetspath <- system.file("extdata/cwl/example/targets_example.txt", package = "systemPipeR")
read.delim(targetspath, comment.char = "#")

## ----fromFile_example, eval=TRUE------------------------------------------------------------------
HW_mul <- SYSargsList(step_name = "echo", 
                      targets=targetspath, 
                      wf_file="example/workflow_example.cwl", input_file="example/example.yml", 
                      dir_path = dir_path, 
                      inputvars = c(Message = "_STRING_", SampleName = "_SAMPLE_"))
HW_mul

## ----fromFile_example2, eval=TRUE-----------------------------------------------------------------
cmdlist(HW_mul)

## ----cmd_orig, eval=FALSE-------------------------------------------------------------------------
# hisat2 -S ./results/M1A.sam -x ./data/tair10.fasta -k 1 -min-intronlen 30 -max-intronlen 3000 -threads 4 -U ./data/SRR446027_1.fastq.gz

## ----cmd, eval=TRUE-------------------------------------------------------------------------------
command <- "
    hisat2 \
    -S <F, out: ./results/M1A.sam> \
    -x <F: ./data/tair10.fasta> \
    -k <int: 1> \
    -min-intronlen <int: 30> \
    -max-intronlen <int: 3000> \
    -threads <int: 4> \
    -U <F: ./data/SRR446027_1.fastq.gz>
"

## -------------------------------------------------------------------------------------------------
cmd <- createParam(command, writeParamFiles = TRUE, overwrite=TRUE, confirm=TRUE) 

## -------------------------------------------------------------------------------------------------
cmdlist(cmd)

## ----saving, eval=FALSE---------------------------------------------------------------------------
# writeParamFiles(cmd, overwrite = TRUE)

## ----sysargs2, eval=TRUE, results="hide"----------------------------------------------------------
command2 <- "
    hisat2 \
    -S <F, out: _SampleName_.sam> \
    -x <F: ./data/tair10.fasta> \
    -k <int: 1> \
    -min-intronlen <int: 30> \
    -max-intronlen <int: 3000> \
    -threads <int: 4> \
    -U <F: _FASTQ_PATH1_>
"
WF <- createParam(command2, overwrite = TRUE, writeParamFiles = TRUE, confirm = TRUE) 
targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR")
WF_test <- loadWorkflow(targets = targetspath, wf_file="hisat2.cwl",
                   input_file="hisat2.yml", dir_path = "param/cwl/hisat2/")
WF_test <- renderWF(WF_test, inputvars = c(FileName = "_FASTQ_PATH1_"))

## ----sysargs2b, eval=TRUE-------------------------------------------------------------------------
WF_test
cmdlist(WF_test)[1:2]

## ----eval=FALSE-----------------------------------------------------------------------------------
# printParam(cmd, position = "baseCommand") ## Return baseCommand
# printParam(cmd, position = "outputs") ## Return outputs
# printParam(cmd, position = "inputs", index = 1:2) ## Return components by index
# printParam(cmd, position = "inputs", index = -1:-2) ## Negative index subsetting

## ----eval=FALSE-----------------------------------------------------------------------------------
# cmd2 <- subsetParam(cmd, position = "inputs", index = 1:2, trim = TRUE)
# cmdlist(cmd2)
# 
# cmd2 <- subsetParam(cmd, position = "inputs", index = c("S", "x"), trim = TRUE)
# cmdlist(cmd2)

## ----eval=FALSE-----------------------------------------------------------------------------------
# cmd3 <- replaceParam(cmd, "base", index = 1, replace = list(baseCommand = "bwa"))
# cmdlist(cmd3) ## Replacement changed baseCommand

## ----eval=FALSE-----------------------------------------------------------------------------------
# new_inputs <- new_inputs <- list(
#     "new_input1" = list(type = "File", preF="-b", yml ="myfile"),
#     "new_input2" = "-L <int: 4>"
# )
# cmd4 <- replaceParam(cmd, "inputs", index = 1:2, replace = new_inputs)
# cmdlist(cmd4)

## ----eval=FALSE-----------------------------------------------------------------------------------
# newIn <- new_inputs <- list(
#     "new_input1" = list(type = "File", preF="-b1", yml ="myfile1"),
#     "new_input2" = list(type = "File", preF="-b2", yml ="myfile2"),
#     "new_input3" = "-b3 <F: myfile3>"
# )
# cmd5 <- appendParam(cmd, "inputs", index = 1:2, append = new_inputs)
# cmdlist(cmd5)
# 
# cmd6 <- appendParam(cmd, "inputs", index = 1:2, after=0, append = new_inputs)
# cmdlist(cmd6)

## ----eval=FALSE-----------------------------------------------------------------------------------
# new_outs <- list(
#     "sam_out" = "<F: $(inputs.results_path)/test.sam>"
# )
# cmd7 <- replaceParam(cmd, "outputs", index = 1, replace = new_outs)
# output(cmd7)

## ----SYSargs2_structure, eval=TRUE----------------------------------------------------------------
library(systemPipeR)
targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR")
dir_path <- system.file("extdata/cwl", package = "systemPipeR")
WF <- loadWF(targets = targetspath, wf_file = "hisat2/hisat2-mapping-se.cwl",
                   input_file = "hisat2/hisat2-mapping-se.yml",
                   dir_path = dir_path)

WF <- renderWF(WF, inputvars = c(FileName = "_FASTQ_PATH1_", 
                                 SampleName = "_SampleName_"))

## ----cmdlist, eval=TRUE---------------------------------------------------------------------------
cmdlist(WF)[1]

## ----names_WF, eval=TRUE--------------------------------------------------------------------------
names(WF)

## ----output_WF, eval=TRUE-------------------------------------------------------------------------
output(WF)[1]

## ----targets_WF, eval=TRUE------------------------------------------------------------------------
targets(WF)[1]
as(WF, "DataFrame")

## ----module_WF, eval=TRUE-------------------------------------------------------------------------
modules(WF)

## ----other_WF, eval=FALSE-------------------------------------------------------------------------
# files(WF)
# inputvars(WF)

## ----lw, eval=TRUE--------------------------------------------------------------------------------
rmd <- system.file("extdata", "spr_simple_lw.Rmd", package = "systemPipeR")
sal_lw <- SPRproject(overwrite = TRUE)
sal_lw <- importWF(sal_lw, rmd, verbose = FALSE)
codeLine(sal_lw)

## ----lw_coerce, eval=TRUE-------------------------------------------------------------------------
lw <- stepsWF(sal_lw)[[2]]
## Coerce
ll <- as(lw, "list")
class(ll)
lw <- as(ll, "LineWise")
lw

## ----lw_access, eval=TRUE-------------------------------------------------------------------------
length(lw)
names(lw)
codeLine(lw)
codeChunkStart(lw)
rmdPath(lw)

## ----lw_sub, eval=TRUE----------------------------------------------------------------------------
l <- lw[2]
codeLine(l)
l_sub <- lw[-2]
codeLine(l_sub)

## ----lw_rep, eval=TRUE----------------------------------------------------------------------------
replaceCodeLine(lw, line = 2) <- "5+5"
codeLine(lw)
appendCodeLine(lw, after = 0) <- "6+7"
codeLine(lw)

## ----sal_rep_append, eval=FALSE-------------------------------------------------------------------
# replaceCodeLine(sal_lw, step = 2, line = 2) <- LineWise(code={
#                                                              "5+5"
#                                                                 })
# codeLine(sal_lw, step = 2)
# 
# appendCodeLine(sal_lw, step = 2) <- "66+55"
# codeLine(sal_lw, step = 2)
# 
# appendCodeLine(sal_lw, step = 1, after = 1) <- "66+55"
# codeLine(sal_lw, step = 1)

## ----table_tools, echo=FALSE, message=FALSE-------------------------------------------------------
library(magrittr)
SPR_software <- system.file("extdata", "SPR_software.csv", package = "systemPipeR")
software <- read.delim(SPR_software, sep = ",", comment.char = "#")
colors <- colorRampPalette((c("darkseagreen", "indianred1")))(length(unique(software$Category)))
id <- as.numeric(c((unique(software$Category))))
software %>%
  dplyr::mutate(Step = kableExtra::cell_spec(Step, color = "white", bold = TRUE,
   background = factor(Category, id, colors))) %>%
   dplyr::select(Tool, Step, Description) %>%
   dplyr::arrange(Tool) %>% 
  kableExtra::kable("html", escape = FALSE, col.names = c("Tool Name", "Description", "Step")) %>%
    kableExtra::kable_material(c("striped", "hover", "condensed")) %>%
    kableExtra::scroll_box(width = "80%", height = "500px")

## ----cleaning3, eval=TRUE, include=FALSE----------------------------------------------------------
if (file.exists(".SPRproject")) unlink(".SPRproject", recursive = TRUE)
## NOTE: Removing previous project create in the quick starts section

## ----SPRproject2, eval=FALSE----------------------------------------------------------------------
# sal <- SPRproject(projPath = getwd(), overwrite = TRUE)

## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------------
# appendStep(sal) <- LineWise({
#                             library(systemPipeR)
#                             },
#                             step_name = "load_SPR")

## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE-------------------------------------------
# targetspath <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
# appendStep(sal) <- SYSargsList(
#     step_name = "preprocessing",
#     targets = targetspath, dir = TRUE,
#     wf_file = "preprocessReads/preprocessReads-pe.cwl",
#     input_file = "preprocessReads/preprocessReads-pe.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("load_SPR"))

## ----custom_preprocessing_function, eval=FALSE----------------------------------------------------
# appendStep(sal) <- LineWise(
#     code = {
#         filterFct <- function(fq, cutoff = 20, Nexceptions = 0) {
#             qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm = TRUE)
#             # Retains reads where Phred scores are >= cutoff with N exceptions
#             fq[qcount <= Nexceptions]
#         }
#         save(list = ls(), file = "param/customFCT.RData")
#     },
#     step_name = "custom_preprocessing_function",
#     dependency = "preprocessing"
# )

## ----editing_preprocessing, message=FALSE, eval=FALSE---------------------------------------------
# yamlinput(sal, "preprocessing")$Fct
# yamlinput(sal, "preprocessing", "Fct") <- "'filterFct(fq, cutoff=20, Nexceptions=0)'"
# yamlinput(sal, "preprocessing")$Fct ## check the new function
# cmdlist(sal, "preprocessing", targets = 1) ## check if the command line was updated with success

## ----trimGalore, eval=FALSE, spr=TRUE-------------------------------------------------------------
# targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR")
# appendStep(sal) <- SYSargsList(step_name = "trimGalore",
#                                targets = targetspath, dir = TRUE,
#                                wf_file = "trim_galore/trim_galore-se.cwl",
#                                input_file = "trim_galore/trim_galore-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_"),
#                                dependency = "load_SPR",
#                                run_step = "optional")

## ----trimmomatic, eval=FALSE, spr=TRUE------------------------------------------------------------
# targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR")
# appendStep(sal) <- SYSargsList(step_name = "trimmomatic",
#                                targets = targetspath, dir = TRUE,
#                                wf_file = "trimmomatic/trimmomatic-se.cwl",
#                                input_file = "trimmomatic/trimmomatic-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_"),
#                                dependency = "load_SPR",
#                                run_step = "optional")

## ----fastq_report, eval=FALSE, message=FALSE, spr=TRUE--------------------------------------------
# appendStep(sal) <- LineWise(code = {
#                 fastq <- getColumn(sal, step = "preprocessing", "targetsWF", column = 1)
#                 fqlist <- seeFastq(fastq = fastq, batchsize = 10000, klength = 8)
#                 pdf("./results/fastqReport.pdf", height = 18, width = 4*length(fqlist))
#                 seeFastqPlot(fqlist)
#                 dev.off()
#                 }, step_name = "fastq_report",
#                 dependency = "preprocessing")

## ----hisat_index, eval=FALSE, spr=TRUE------------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "hisat_index",
#                                targets = NULL, dir = FALSE,
#                                wf_file = "hisat2/hisat2-index.cwl",
#                                input_file = "hisat2/hisat2-index.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = NULL,
#                                dependency = "preprocessing")

## ----hisat_mapping_samtools, eval=FALSE, spr=TRUE-------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "hisat_mapping",
#                                targets = "preprocessing", dir = TRUE,
#                                wf_file = "workflow-hisat2/workflow_hisat2-se.cwl",
#                                input_file = "workflow-hisat2/workflow_hisat2-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars=c(FileName1="_FASTQ_PATH1_", SampleName="_SampleName_"),
#                                dependency = c("hisat_index"),
#                                run_session = "compute")

## ----bowtie_index, eval=FALSE, spr=TRUE-----------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "bowtie_index",
#                                targets = NULL, dir = FALSE,
#                                wf_file = "bowtie2/bowtie2-index.cwl",
#                                input_file = "bowtie2/bowtie2-index.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = NULL,
#                                dependency = "preprocessing",
#                                run_step = "optional")

## ----tophat2_mapping, eval=FALSE, spr=TRUE--------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "tophat2_mapping",
#                                targets = "preprocessing", dir = TRUE,
#                                wf_file = "tophat2/workflow_tophat2-mapping-se.cwl",
#                                input_file = "tophat2/tophat2-mapping-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"),
#                                dependency = c("bowtie_index"),
#                                run_session = "remote",
#                                run_step = "optional")

## ----bowtie2_mapping, eval=FALSE, spr=TRUE--------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "bowtie2_mapping",
#                                targets = "preprocessing", dir = TRUE,
#                                wf_file = "bowtie2/workflow_bowtie2-mapping-se.cwl",
#                                input_file = "bowtie2/bowtie2-mapping-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"),
#                                dependency = c("bowtie_index"),
#                                run_session = "remote",
#                                run_step = "optional")

## ----bwa_index, eval=FALSE, spr=TRUE--------------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "bwa_index",
#                                targets = NULL, dir = FALSE,
#                                wf_file = "bwa/bwa-index.cwl",
#                                input_file = "bwa/bwa-index.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = NULL,
#                                dependency = "preprocessing",
#                                run_step = "optional")

## ----bwa_mapping, eval=FALSE, spr=TRUE------------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "bwa_mapping",
#                                targets = "preprocessing", dir = TRUE,
#                                wf_file = "bwa/bwa-se.cwl",
#                                input_file = "bwa/bwa-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars=c(preprocessReads_se="_FASTQ_PATH1_", SampleName="_SampleName_"),
#                                dependency = c("bwa_index"),
#                                run_session = "remote",
#                                run_step = "optional")

## ----rsubread_index, eval=FALSE, spr=TRUE---------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "rsubread_index",
#                                targets = NULL, dir = FALSE,
#                                wf_file = "rsubread/rsubread-index.cwl",
#                                input_file = "rsubread/rsubread-index.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = NULL,
#                                dependency = "preprocessing",
#                                run_step = "optional")

## ----rsubread_mapping, eval=FALSE, spr=TRUE-------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "rsubread",
#                                targets = "preprocessing", dir = TRUE,
#                                wf_file = "rsubread/rsubread-mapping-se.cwl",
#                                input_file = "rsubread/rsubread-mapping-se.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars=c(FileName1="_FASTQ_PATH1_", SampleName="_SampleName_"),
#                                dependency = c("rsubread_index"),
#                                run_session = "compute",
#                                run_step = "optional")

## ----gsnap_index, eval=FALSE, spr=TRUE------------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "gsnap_index",
#                                targets = NULL, dir = FALSE,
#                                wf_file = "gsnap/gsnap-index.cwl",
#                                input_file = "gsnap/gsnap-index.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                inputvars = NULL,
#                                dependency = "preprocessing",
#                                run_step = "optional")

## ----gsnap_mapping, eval=FALSE, spr=TRUE----------------------------------------------------------
# appendStep(sal) <- SYSargsList(step_name = "gsnap",
#                                targets = "targetsPE.txt", dir = TRUE,
#                                wf_file = "gsnap/gsnap-mapping-pe.cwl",
#                                input_file = "gsnap/gsnap-mapping-pe.yml",
#                                dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#                                 inputvars = c(FileName1 = "_FASTQ_PATH1_",
#                                               FileName2 = "_FASTQ_PATH2_", SampleName = "_SampleName_"),
#                                dependency = c("gsnap_index"),
#                                run_session = "remote",
#                                run_step = "optional")

## ----bam_IGV, eval=FALSE, spr=TRUE----------------------------------------------------------------
# appendStep(sal) <- LineWise(
#     code = {
#         bampaths <- getColumn(sal, step = "hisat2_mapping", "outfiles",
#                   column = "samtools_sort_bam")
#         symLink2bam(
#             sysargs = bampaths, htmldir = c("~/.html/", "somedir/"),
#             urlbase = "https://cluster.hpcc.ucr.edu/<username>/",
#             urlfile = "./results/IGVurl.txt")
#     },
#     step_name = "bam_IGV",
#     dependency = "hisat_mapping",
#     run_step = "optional"
# )

## ----create_txdb, message=FALSE, eval=FALSE, spr=TRUE---------------------------------------------
# appendStep(sal) <- LineWise(code = {
#                             library(txdbmaker)
#                             txdb <- makeTxDbFromGFF(file="data/tair10.gff", format="gff",
#                                                     dataSource="TAIR", organism="Arabidopsis thaliana")
#                             saveDb(txdb, file="./data/tair10.sqlite")
#                             },
#                             step_name = "create_txdb",
#                             dependency = "hisat_mapping")

## ----read_counting, message=FALSE, eval=FALSE, spr=TRUE-------------------------------------------
# appendStep(sal) <- LineWise({
#                             library(BiocParallel)
#                             txdb <- loadDb("./data/tair10.sqlite")
#                             eByg <- exonsBy(txdb, by="gene")
#                             outpaths <- getColumn(sal, step = "hisat_mapping", 'outfiles', column = 2)
#                             bfl <- BamFileList(outpaths, yieldSize=50000, index=character())
#                             multicoreParam <- MulticoreParam(workers=4); register(multicoreParam); registered()
#                             counteByg <- bplapply(bfl, function(x) summarizeOverlaps(eByg, x, mode="Union",
#                                                                                      ignore.strand=TRUE,
#                                                                                      inter.feature=TRUE,
#                                                                                      singleEnd=TRUE))
#                             # Note: for strand-specific RNA-Seq set 'ignore.strand=FALSE' and for PE data set 'singleEnd=FALSE'
#                             countDFeByg <- sapply(seq(along=counteByg),
#                                                   function(x) assays(counteByg[[x]])$counts)
#                             rownames(countDFeByg) <- names(rowRanges(counteByg[[1]]))
#                             colnames(countDFeByg) <- names(bfl)
#                             rpkmDFeByg <- apply(countDFeByg, 2, function(x) returnRPKM(counts=x, ranges=eByg))
#                             write.table(countDFeByg, "results/countDFeByg.xls",
#                                         col.names=NA, quote=FALSE, sep="\t")
#                             write.table(rpkmDFeByg, "results/rpkmDFeByg.xls",
#                                         col.names=NA, quote=FALSE, sep="\t")
#                             },
#                             step_name = "read_counting",
#                             dependency = "create_txdb")

## ----align_stats, message=FALSE, eval=FALSE, spr=TRUE---------------------------------------------
# appendStep(sal) <- LineWise({
#                             read_statsDF <- alignStats(args)
#                             write.table(read_statsDF, "results/alignStats.xls",
#                                         row.names = FALSE, quote = FALSE, sep = "\t")
#                             },
#                             step_name = "align_stats",
#                             dependency = "hisat_mapping")

## ----align_stats2, eval=TRUE----------------------------------------------------------------------
read.table(system.file("extdata", "alignStats.xls", package = "systemPipeR"), header = TRUE)[1:4,]

## ----read_counting_mirna, message=FALSE, eval=FALSE, spr=TRUE-------------------------------------
# appendStep(sal) <- LineWise({
#                             system("wget https://www.mirbase.org/download/ath.gff3 -P ./data/")
#                             gff <- rtracklayer::import.gff("./data/ath.gff3")
#                             gff <- split(gff, elementMetadata(gff)$ID)
#                             bams <- getColumn(sal, step = "bowtie2_mapping", 'outfiles', column = 2)
#                             bfl <- BamFileList(bams, yieldSize=50000, index=character())
#                             countDFmiR <- summarizeOverlaps(gff, bfl, mode="Union",
#                                                             ignore.strand = FALSE, inter.feature = FALSE)
#                             countDFmiR <- assays(countDFmiR)$counts
#                             # Note: inter.feature=FALSE important since pre and mature miRNA ranges overlap
#                             rpkmDFmiR <- apply(countDFmiR, 2, function(x) returnRPKM(counts = x, ranges = gff))
#                             write.table(assays(countDFmiR)$counts, "results/countDFmiR.xls",
#                                         col.names=NA, quote=FALSE, sep="\t")
#                             write.table(rpkmDFmiR, "results/rpkmDFmiR.xls", col.names=NA, quote=FALSE, sep="\t")
#                             },
#                             step_name = "read_counting_mirna",
#                             dependency = "bowtie2_mapping")

## ----sample_tree_rlog, message=FALSE, eval=FALSE, spr=TRUE----------------------------------------
# appendStep(sal) <- LineWise({
#                             library(DESeq2, warn.conflicts=FALSE, quietly=TRUE)
#                             library(ape, warn.conflicts=FALSE)
#                             countDFpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR")
#                             countDF <- as.matrix(read.table(countDFpath))
#                             colData <- data.frame(row.names = targetsWF(sal)[[2]]$SampleName,
#                                                   condition=targetsWF(sal)[[2]]$Factor)
#                             dds <- DESeqDataSetFromMatrix(countData = countDF, colData = colData,
#                                                           design = ~ condition)
#                             d <- cor(assay(rlog(dds)), method = "spearman")
#                             hc <- hclust(dist(1-d))
#                             plot.phylo(as.phylo(hc), type = "p", edge.col = 4, edge.width = 3,
#                                        show.node.label = TRUE, no.margin = TRUE)
#                             },
#                             step_name = "sample_tree_rlog",
#                             dependency = "read_counting")

## ----edger, message=FALSE, eval=FALSE, spr=TRUE---------------------------------------------------
# appendStep(sal) <- LineWise({
#                             targetspath <- system.file("extdata", "targets.txt", package = "systemPipeR")
#                             targets <- read.delim(targetspath, comment = "#")
#                             cmp <- readComp(file = targetspath, format = "matrix", delim = "-")
#                             countDFeBygpath <- system.file("extdata", "countDFeByg.xls", package = "systemPipeR")
#                             countDFeByg <- read.delim(countDFeBygpath, row.names = 1)
#                             edgeDF <- run_edgeR(countDF = countDFeByg, targets = targets, cmp = cmp[[1]],
#                                                 independent = FALSE, mdsplot = "")
#                             DEG_list <- filterDEGs(degDF = edgeDF, filter = c(Fold = 2, FDR = 10))
#                             },
#                             step_name = "edger",
#                             dependency = "read_counting")

## ----deseq2, message=FALSE, eval=FALSE, spr=TRUE--------------------------------------------------
# appendStep(sal) <- LineWise({
#                             degseqDF <- run_DESeq2(countDF=countDFeByg, targets=targets, cmp=cmp[[1]],
#                                                    independent=FALSE)
#                             DEG_list2 <- filterDEGs(degDF=degseqDF, filter=c(Fold=2, FDR=10))
#                             },
#                             step_name = "deseq2",
#                             dependency = "read_counting")

## ----vennplot, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------------
# appendStep(sal) <- LineWise({
#                             vennsetup <- overLapper(DEG_list$Up[6:9], type="vennsets")
#                             vennsetdown <- overLapper(DEG_list$Down[6:9], type="vennsets")
#                             vennPlot(list(vennsetup, vennsetdown), mymain="", mysub="",
#                                      colmode=2, ccol=c("blue", "red"))
#                             },
#                             step_name = "vennplot",
#                             dependency = "edger")

## ----get_go_biomart, message=FALSE, eval=FALSE, spr=TRUE------------------------------------------
# appendStep(sal) <- LineWise({
#                             library("biomaRt")
#                             listMarts() # To choose BioMart database
#                             listMarts(host="plants.ensembl.org")
#                             m <- useMart("plants_mart", host="https://plants.ensembl.org")
#                             listDatasets(m)
#                             m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="https://plants.ensembl.org")
#                             listAttributes(m) # Choose data types you want to download
#                             go <- getBM(attributes=c("go_id", "tair_locus", "namespace_1003"), mart=m)
#                             go <- go[go[,3]!="",]; go[,3] <- as.character(go[,3])
#                             go[go[,3]=="molecular_function", 3] <- "F"
#                             go[go[,3]=="biological_process", 3] <- "P"
#                             go[go[,3]=="cellular_component", 3] <- "C"
#                             go[1:4,]
#                             dir.create("./data/GO")
#                             write.table(go, "data/GO/GOannotationsBiomart_mod.txt",
#                                         quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t")
#                             catdb <- makeCATdb(myfile="data/GO/GOannotationsBiomart_mod.txt",
#                                                lib=NULL, org="", colno=c(1,2,3), idconv=NULL)
#                             save(catdb, file="data/GO/catdb.RData")
#                             },
#                             step_name = "get_go_biomart",
#                             dependency = "edger")

## ----go_enrichment, message=FALSE, eval=FALSE, spr=TRUE-------------------------------------------
# appendStep(sal) <- LineWise({
#                             load("data/GO/catdb.RData")
#                             DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=50), plot=FALSE)
#                             up_down <- DEG_list$UporDown; names(up_down) <- paste(names(up_down), "_up_down", sep="")
#                             up <- DEG_list$Up; names(up) <- paste(names(up), "_up", sep="")
#                             down <- DEG_list$Down; names(down) <- paste(names(down), "_down", sep="")
#                             DEGlist <- c(up_down, up, down)
#                             DEGlist <- DEGlist[sapply(DEGlist, length) > 0]
#                             BatchResult <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="all",
#                                                             id_type="gene", CLSZ=2, cutoff=0.9,
#                                                             gocats=c("MF", "BP", "CC"), recordSpecGO=NULL)
#                             library("biomaRt")
#                             m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="https://plants.ensembl.org")
#                             goslimvec <- as.character(getBM(attributes=c("goslim_goa_accession"), mart=m)[,1])
#                             BatchResultslim <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="slim",
#                                                                 id_type="gene", myslimv=goslimvec, CLSZ=10,
#                                                                 cutoff=0.01, gocats=c("MF", "BP", "CC"),
#                                                                 recordSpecGO=NULL)
#                             gos <- BatchResultslim[grep("M6-V6_up_down", BatchResultslim$CLID), ]
#                             gos <- BatchResultslim
#                             pdf("GOslimbarplotMF.pdf", height=8, width=10); goBarplot(gos, gocat="MF"); dev.off()
#                             goBarplot(gos, gocat="BP")
#                             goBarplot(gos, gocat="CC")
#                             },
#                             step_name = "go_enrichment",
#                             dependency = "get_go_biomart")

## ----hierarchical_clustering, message=FALSE, eval=FALSE, spr=TRUE---------------------------------
# appendStep(sal) <- LineWise({
#                             library(pheatmap)
#                             geneids <- unique(as.character(unlist(DEG_list[[1]])))
#                             y <- assay(rlog(dds))[geneids, ]
#                             pdf("heatmap1.pdf")
#                             pheatmap(y, scale="row", clustering_distance_rows="correlation",
#                                      clustering_distance_cols="correlation")
#                             dev.off()
#                             },
#                             step_name = "hierarchical_clustering",
#                             dependency = c("sample_tree_rlog", "edger"))

## ----sessionInfo----------------------------------------------------------------------------------
sessionInfo()