## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = FALSE,
    message = FALSE,
    warning = FALSE,
    echo = TRUE,
    eval = TRUE,
    cache = TRUE,
    fig.align = 'center', out.width="65%"
    
)

knitr::knit_hooks$set(time_it = local({
  now <- NULL
  function(before, options) {
    if (before) {
      # record the current time before each chunk
      now <<- Sys.time()
    } else {
      # calculate the time difference after a chunk
      res <- difftime(Sys.time(), now, units = "secs")
      # return a character string to show the time
      msg <- paste("Time for this code chunk to run:", round(res,
        2), "seconds")
      message(msg)  # <-- print to console
      return(msg)   # <-- also return to appear in knitted output
    }
  }
}))

## ----load, time_it = TRUE-----------------------------------------------------
library(standR)
library(SummarizedExperiment)
library(ExperimentHub)
library(readr)
library(dplyr)
library(stats)

eh <- ExperimentHub()
AnnotationHub::query(eh, "standR")
countFilePath <- eh[["EH7364"]]
sampleAnnoFilePath <- eh[["EH7365"]]

countFile <- readr::read_delim(unname(countFilePath), na = character())
sampleAnnoFile <- readr::read_delim(unname(sampleAnnoFilePath), na = character())

## -----------------------------------------------------------------------------
colnames(sampleAnnoFile)
sampleAnnoFile$disease_status %>% unique()
sampleAnnoFile$region %>% unique()

## ----new_sampleAnnoFile, time_it = TRUE---------------------------------------
new_sampleAnnoFile <- sampleAnnoFile %>%
    tidyr::unite(
        "disease_region", # newly created grouped variable
        c("disease_status","region"), # variables to combine
        sep = "_"
    )

new_sampleAnnoFile$disease_region %>% unique()

## ----readGeoMx, time_it = TRUE------------------------------------------------
spe <- standR::readGeoMx(as.data.frame(countFile),
                         as.data.frame(new_sampleAnnoFile))

## ----selectedTypes, time_it = TRUE--------------------------------------------
selectedTypes <- c("DKD_glomerulus", "DKD_tubule", "normal_tubule", "normal_glomerulus")
toKeep <- colData(spe) %>%
    tibble::as_tibble() %>%
    pull(disease_region)

spe <- spe[, grepl(paste(selectedTypes, collapse = "|"), toKeep)]

## ----filter, time_it = TRUE---------------------------------------------------
filter <- lapply("SequencingSaturation", function(column) {
    cutoff_value <- 85
    if(!is.na(cutoff_value)) {
        return(colData(spe)[[column]] > cutoff_value)
    } else {
        return(NULL)
    }
})

filtered_spe <- spe[,unlist(filter)]

colData(spe) %>% dim()
colData(filtered_spe) %>% dim()

## ----speQ3, time_it=TRUE------------------------------------------------------
speQ3 <- standR::geomxNorm(filtered_spe, method = "upperquartile")
speQ3 <- scater::runPCA(filtered_spe)

speQ3_compute <- SingleCellExperiment::reducedDim(speQ3, "PCA")

## ----speRUV, time_it=TRUE-----------------------------------------------------
speRuv_NCGs <- standR::findNCGs(filtered_spe,
        batch_name = "SlideName",
        top_n = 200)


speRuvBatchCorrection <- standR::geomxBatchCorrection(speRuv_NCGs,
            factors = "disease_region",
            NCGs = S4Vectors::metadata(speRuv_NCGs)$NCGs, k = 2
        )

speRuv <- scater::runPCA(speRuvBatchCorrection)

speRuv_compute <- SingleCellExperiment::reducedDim(speRuv, "PCA")


## ----.PCAFunction, echo=FALSE-------------------------------------------------
.PCAFunction <- function(spe, precomputed, colourShapeBy, selectedVar,
                         ROIshapes, ROIcolours) {
    standR::drawPCA(spe, precomputed = precomputed) +

        ## really need as.name() plus !! because of factor()
        ggplot2::geom_point(ggplot2::aes(
            shape = factor(!!as.name(colourShapeBy), levels = selectedVar),
            fill = factor(!!as.name(colourShapeBy), levels = selectedVar)
        ), size = 3, colour = "black", stroke = 0.5) +

        ggplot2::scale_shape_manual(colourShapeBy,
            values = as.integer(unlist(ROIshapes)) # as.integer() crucial
        ) +
        ggplot2::scale_fill_manual(colourShapeBy,
            ## colour() is a base function, so must be avoided
            values = unlist(ROIcolours)
        ) +
        ggplot2::scale_y_continuous(
            labels = scales::number_format(accuracy = 0.1)
        ) +
        ggplot2::scale_x_continuous(
            labels = scales::number_format(accuracy = 0.1)
        ) +
        ggplot2::theme(
            panel.grid.minor = ggplot2::element_blank(),
            panel.grid.major = ggplot2::element_blank(),
            axis.text = ggplot2::element_text(color = "black", size = 16),
            axis.line = ggplot2::element_blank(),
            axis.ticks = ggplot2::element_line(colour = "black"),
            axis.title = ggplot2::element_text(size = 16),
            legend.title = ggplot2::element_text(
                size = 16, vjust = 0.5, hjust = 0.5,
                face = "bold", family = "sans"
            ),
            legend.text = ggplot2::element_text(
                size = 16, vjust = 0.5, hjust = 0,
                face = "bold", family = "sans"
            ),
            plot.margin = grid::unit(c(1, 1, 1, 1), "mm"),
            plot.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            plot.title = ggplot2::element_text(
                size = 16, hjust = 0.5, face = "bold",
                family = "sans"
            ),
            panel.border = ggplot2::element_rect(
                colour = "black",
                linewidth = 0.4
            ),
            panel.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.box.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.key = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.position = "bottom",
            aspect.ratio = 1
        ) +
        ggplot2::guides(
            fill = ggplot2::guide_legend(
                nrow = length(selectedVar),
                title.position = "top"
            ),
            shape = ggplot2::guide_legend(
                nrow = length(selectedVar),
                title.position = "top"
            )
        )
}

## ----PCAFunction, time_it = TRUE----------------------------------------------
.PCAFunction(speRuv, speRuv_compute, "disease_region", selectedTypes, c(21,22,23,24), c("blue","dodgerblue1","black","grey75"))

.PCAFunction(speQ3, speQ3_compute, "disease_region", selectedTypes, c(21,22,23,24), c("blue","dodgerblue1","black","grey75"))

## ----design, time_it = TRUE---------------------------------------------------
design <- model.matrix(~0+disease_region+ruv_W1+ruv_W2, data = colData(speRuv))

# Clean up column name
colnames(design) <- gsub("disease_region", "", colnames(design))

## ----convert, time_it = TRUE--------------------------------------------------
library(edgeR)
dge <- SE2DGEList(speRuv)
keep <- filterByExpr(dge, design)

dge <- dge[keep, , keep.lib.sizes = FALSE]

dge <- estimateDisp(dge, design = design, robust = TRUE)

## ----contrast, time_it = TRUE-------------------------------------------------
# In case there are spaces
selectedTypes_underscore <- gsub(" ", "_", selectedTypes)

comparisons <- list()

comparisons <- lapply(
    seq_len(choose(length(selectedTypes_underscore), 2)),
    function(i) {
        noquote(
            paste0(
                utils::combn(selectedTypes_underscore, 2,
                    simplify = FALSE
                )[[i]][1],
                "-",
                utils::combn(selectedTypes_underscore, 2,
                    simplify = FALSE
                )[[i]][2]
            )
        )
    }
)

con <- makeContrasts(
    # Must use as.character()
    contrasts = as.character(unlist(comparisons)),
    levels = colnames(design)
)

colnames(con) <- sub("-", "_vs_", colnames(con))

con

## ----limma, time_it = TRUE----------------------------------------------------
library(limma)
block_by <- colData(speRuv)[["SlideName"]]

v <- voom(dge, design)
corfit <- duplicateCorrelation(v, design,
    block = block_by
)

v2 <- voom(dge, design,
    block = block_by,
    correlation = corfit$consensus
)

corfit2 <- duplicateCorrelation(v, design,
    block = block_by
)

fit <- lmFit(v, design,
    block = block_by,
    correlation = corfit2$consensus
)

fit_contrast <- contrasts.fit(fit,
    contrasts = con
)

efit <- eBayes(fit_contrast, robust = TRUE)

## ----tables, time_it = TRUE---------------------------------------------------
# Keep track of how many comparisons there are
numeric_vector <- seq_len(ncol(con))
new_list <- as.list(numeric_vector)

# This adds n+1th index to new_list where n = ncol(con)
# This index contains seq_len(ncol(con)) 
# ex. new_list[[7]] = 1 2 3 4 5 6
# This coefficient allows ANOVA-like comparison in toptable()
if (length(selectedTypes) > 2) {
    new_list[[length(new_list) + 1]] <- numeric_vector
}

topTabDF <- lapply(new_list, function(i) {
    limma::topTable(efit,
        coef = i, number = Inf, p.value = 0.05,
        adjust.method = "BH", lfc = 1
    ) %>%
        tibble::rownames_to_column(var = "Gene")
})

# Adds names to topTabDF
if (length(selectedTypes) > 2) {
    names(topTabDF) <- c(
        colnames(con),
        colnames(con) %>% stringr::str_split(., "_vs_") %>%
            unlist() %>% unique() %>% paste(., collapse = "_vs_")
    )
} else {
    names(topTabDF) <- colnames(con)
}


## ----tableExample-------------------------------------------------------------
colnames(con)
head(topTabDF[[1]])

## ----.volcanoFunction, echo = FALSE-------------------------------------------
.volcanoFunction <- function(volcano, delabSize, maxOverlap, title,
                             logFCcutoff, PvalCutoff,
                             DnCol, notDEcol, UpCol) {
    logFC <- adj.P.Val <- deLab <- NULL
    ggplot2::ggplot(
        data = volcano,
        ggplot2::aes(
            x = logFC,
            y = -log10(adj.P.Val),
            col = de,
            label = deLab
        )
    ) +
        ggplot2::geom_point() +
        ggplot2::theme_bw() +
        ggplot2::theme(
            axis.ticks = ggplot2::element_line(colour = "black"),
            panel.border = ggplot2::element_rect(colour = "black"),
            text = ggplot2::element_text(size = 16, color = "black"),
            title = ggplot2::element_text(size = 16, hjust = 0.5),
            axis.text = ggplot2::element_text(size = 16, color = "black"),
            axis.title = ggplot2::element_text(size = 16),
            plot.margin = grid::unit(c(1, 1, 1, 1), "mm"),
            plot.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            panel.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            panel.grid = ggplot2::element_blank(),
            legend.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.box.background = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.key = ggplot2::element_rect(
                fill = "transparent",
                colour = NA
            ),
            legend.position = "none"
        ) +
        ggplot2::geom_vline(
            xintercept = c(-(logFCcutoff), logFCcutoff), col = "black",
            linetype = 3
        ) +
        ggplot2::geom_hline(
            yintercept = -log10(PvalCutoff), col = "black",
            linetype = 3
        ) +
        ggrepel::geom_text_repel(
            size = delabSize,
            segment.colour = "black",
            segment.linetype = 3,
            data = volcano %>%
                dplyr::filter(logFC >= logFCcutoff | logFC <= -(logFCcutoff)),
            ggplot2::aes(label = deLab),
            min.segment.length = 0,
            max.overlaps = maxOverlap
        ) +
        ggplot2::scale_color_manual(
            values = c(DnCol, notDEcol, UpCol),
            breaks = c("DN", "NA", "UP")
        ) +
        ggplot2::xlab(expression(log[2] ~ fold ~ change)) +
        ggplot2::ylab(expression(-log[10] ~ italic(P) ~ value)) +
        ggplot2::theme(aspect.ratio = 1, rect = ggplot2::element_rect(
            fill = "transparent"
        )) +
        ggplot2::labs(title = title)
}

## ----VolcanoFunction, time_it = TRUE------------------------------------------
volcanoDF <- lapply(seq_len(ncol(con)), function(i) {
                limma::topTable(efit, coef = i, number = Inf) %>%
                    tibble::rownames_to_column(var = "Target.name") %>%
                    dplyr::select("Target.name", "logFC", "adj.P.Val") %>%
                    dplyr::mutate(de = ifelse(logFC >= 1 &
                        adj.P.Val < 0.05, "UP",
                    ifelse(logFC <= -(1) &
                        adj.P.Val < 0.05, "DN",
                    "NO"
                    )
                    )) %>%
                    dplyr::mutate(
                        logFC_threshold = stats::quantile(abs(logFC), 0.99,
                            na.rm = TRUE
                        ),
                        pval_threshold = stats::quantile(adj.P.Val, 0.01,
                            na.rm = TRUE
                        ),
                        deLab = ifelse(abs(logFC) > logFC_threshold &
                            adj.P.Val < pval_threshold &
                            abs(logFC) >= 0.05 &
                            adj.P.Val < 0.05, Target.name, NA)
                    )
            })

plots <- lapply(seq_along(volcanoDF), function(i) {
    .volcanoFunction(
        volcanoDF[[i]], 12, 5,
        colnames(con)[i],
        1, 0.05,
        "Blue", "Grey", "Red"
    ) +
        ggplot2::xlim(
            -2,
            2
        ) +
        ggplot2::ylim(
            0, 20
        )
})

plots[1]


## ----heatmapSetup, time_it = TRUE---------------------------------------------
lcpmSubScaleTopGenes <- lapply(names(topTabDF), function(name) {
            columns <- stringr::str_split_1(name, "_vs_") %>%
                lapply(function(.) {
                    which(SummarizedExperiment::colData(speRuv) %>%
                        tibble::as_tibble() %>%
                        dplyr::pull(disease_region) == .)
                }) %>%
                unlist()

            table <- SummarizedExperiment::assay(speRuv, 2)[
                topTabDF[[name]] %>%
                    dplyr::slice_head(n = 50) %>%
                    dplyr::select(Gene) %>%
                    unlist() %>%
                    unname(),
                columns
            ] %>%
                data.frame() %>%
                t() %>%
                scale() %>%
                t()

            return(table)
        })

        names(lcpmSubScaleTopGenes) <- names(topTabDF)
        
columnSplit <- lapply(names(topTabDF), function(name) {
    columnSplit <- stringr::str_split_1(name, "_vs_") %>%
        lapply(function(.){
            which(
                SummarizedExperiment::colData(speRuv) %>%
                    tibble::as_tibble() %>% dplyr::select(disease_region) == .
            )
        } ) %>%
        as.vector() %>%
        summary() %>%
        .[, "Length"]
})

names(columnSplit) <- names(lcpmSubScaleTopGenes)


## ----heatmap, fig.height = 6, fig.width = 4, time_it = TRUE-------------------
colFunc <- circlize::colorRamp2(
            c(
                -3, 0,
                3
            ),
            hcl_palette = "Inferno"
        )

heatmap <- lapply(names(lcpmSubScaleTopGenes), function(name) {
    ComplexHeatmap::Heatmap(lcpmSubScaleTopGenes[[name]],
        cluster_columns = FALSE, col = colFunc,
        heatmap_legend_param = list(
            border = "black",
            title = "Z score",
            title_gp = grid::gpar(
                fontsize = 12,
                fontface = "plain",
                fontfamily = "sans"
            ),
            labels_gp = grid::gpar(
                fontsize = 12,
                fontface = "plain",
                fontfamily = "sans"
            ),
            legend_height = grid::unit(
                3 * as.numeric(30),
                units = "mm"
            )
        ),
        top_annotation = ComplexHeatmap::HeatmapAnnotation(
            foo = ComplexHeatmap::anno_block(
                gp = grid::gpar(lty = 0, fill = "transparent"),
                labels = columnSplit[[name]] %>% names(),
                labels_gp = grid::gpar(
                    col = "black", fontsize = 14,
                    fontfamily = "sans",
                    fontface = "bold"
                ),
                labels_rot = 0, labels_just = "center",
                labels_offset = grid::unit(4.5, "mm")
            )
        ),
        border_gp = grid::gpar(col = "black", lwd = 0.2),
        row_names_gp = grid::gpar(
            fontfamily = "sans",
            fontface = "italic",
            fontsize = 10
        ),
        show_column_names = FALSE,
        column_title = NULL,
        column_split = rep(
            LETTERS[seq_len(columnSplit[[name]] %>% length())],
            columnSplit[[name]] %>% unname() %>% as.numeric()
        )

    )
})

names(heatmap) <- names(lcpmSubScaleTopGenes)

heatmap[[1]]