## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = FALSE, message = FALSE, warning = FALSE, echo = TRUE, eval = TRUE, cache = TRUE, fig.align = 'center', out.width="65%" ) knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { # record the current time before each chunk now <<- Sys.time() } else { # calculate the time difference after a chunk res <- difftime(Sys.time(), now, units = "secs") # return a character string to show the time msg <- paste("Time for this code chunk to run:", round(res, 2), "seconds") message(msg) # <-- print to console return(msg) # <-- also return to appear in knitted output } } })) ## ----load, time_it = TRUE----------------------------------------------------- library(standR) library(SummarizedExperiment) library(ExperimentHub) library(readr) library(dplyr) library(stats) eh <- ExperimentHub() AnnotationHub::query(eh, "standR") countFilePath <- eh[["EH7364"]] sampleAnnoFilePath <- eh[["EH7365"]] countFile <- readr::read_delim(unname(countFilePath), na = character()) sampleAnnoFile <- readr::read_delim(unname(sampleAnnoFilePath), na = character()) ## ----------------------------------------------------------------------------- colnames(sampleAnnoFile) sampleAnnoFile$disease_status %>% unique() sampleAnnoFile$region %>% unique() ## ----new_sampleAnnoFile, time_it = TRUE--------------------------------------- new_sampleAnnoFile <- sampleAnnoFile %>% tidyr::unite( "disease_region", # newly created grouped variable c("disease_status","region"), # variables to combine sep = "_" ) new_sampleAnnoFile$disease_region %>% unique() ## ----readGeoMx, time_it = TRUE------------------------------------------------ spe <- standR::readGeoMx(as.data.frame(countFile), as.data.frame(new_sampleAnnoFile)) ## ----selectedTypes, time_it = TRUE-------------------------------------------- selectedTypes <- c("DKD_glomerulus", "DKD_tubule", "normal_tubule", "normal_glomerulus") toKeep <- colData(spe) %>% tibble::as_tibble() %>% pull(disease_region) spe <- spe[, grepl(paste(selectedTypes, collapse = "|"), toKeep)] ## ----filter, time_it = TRUE--------------------------------------------------- filter <- lapply("SequencingSaturation", function(column) { cutoff_value <- 85 if(!is.na(cutoff_value)) { return(colData(spe)[[column]] > cutoff_value) } else { return(NULL) } }) filtered_spe <- spe[,unlist(filter)] colData(spe) %>% dim() colData(filtered_spe) %>% dim() ## ----speQ3, time_it=TRUE------------------------------------------------------ speQ3 <- standR::geomxNorm(filtered_spe, method = "upperquartile") speQ3 <- scater::runPCA(filtered_spe) speQ3_compute <- SingleCellExperiment::reducedDim(speQ3, "PCA") ## ----speRUV, time_it=TRUE----------------------------------------------------- speRuv_NCGs <- standR::findNCGs(filtered_spe, batch_name = "SlideName", top_n = 200) speRuvBatchCorrection <- standR::geomxBatchCorrection(speRuv_NCGs, factors = "disease_region", NCGs = S4Vectors::metadata(speRuv_NCGs)$NCGs, k = 2 ) speRuv <- scater::runPCA(speRuvBatchCorrection) speRuv_compute <- SingleCellExperiment::reducedDim(speRuv, "PCA") ## ----.PCAFunction, echo=FALSE------------------------------------------------- .PCAFunction <- function(spe, precomputed, colourShapeBy, selectedVar, ROIshapes, ROIcolours) { standR::drawPCA(spe, precomputed = precomputed) + ## really need as.name() plus !! because of factor() ggplot2::geom_point(ggplot2::aes( shape = factor(!!as.name(colourShapeBy), levels = selectedVar), fill = factor(!!as.name(colourShapeBy), levels = selectedVar) ), size = 3, colour = "black", stroke = 0.5) + ggplot2::scale_shape_manual(colourShapeBy, values = as.integer(unlist(ROIshapes)) # as.integer() crucial ) + ggplot2::scale_fill_manual(colourShapeBy, ## colour() is a base function, so must be avoided values = unlist(ROIcolours) ) + ggplot2::scale_y_continuous( labels = scales::number_format(accuracy = 0.1) ) + ggplot2::scale_x_continuous( labels = scales::number_format(accuracy = 0.1) ) + ggplot2::theme( panel.grid.minor = ggplot2::element_blank(), panel.grid.major = ggplot2::element_blank(), axis.text = ggplot2::element_text(color = "black", size = 16), axis.line = ggplot2::element_blank(), axis.ticks = ggplot2::element_line(colour = "black"), axis.title = ggplot2::element_text(size = 16), legend.title = ggplot2::element_text( size = 16, vjust = 0.5, hjust = 0.5, face = "bold", family = "sans" ), legend.text = ggplot2::element_text( size = 16, vjust = 0.5, hjust = 0, face = "bold", family = "sans" ), plot.margin = grid::unit(c(1, 1, 1, 1), "mm"), plot.background = ggplot2::element_rect( fill = "transparent", colour = NA ), plot.title = ggplot2::element_text( size = 16, hjust = 0.5, face = "bold", family = "sans" ), panel.border = ggplot2::element_rect( colour = "black", linewidth = 0.4 ), panel.background = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.background = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.box.background = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.key = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.position = "bottom", aspect.ratio = 1 ) + ggplot2::guides( fill = ggplot2::guide_legend( nrow = length(selectedVar), title.position = "top" ), shape = ggplot2::guide_legend( nrow = length(selectedVar), title.position = "top" ) ) } ## ----PCAFunction, time_it = TRUE---------------------------------------------- .PCAFunction(speRuv, speRuv_compute, "disease_region", selectedTypes, c(21,22,23,24), c("blue","dodgerblue1","black","grey75")) .PCAFunction(speQ3, speQ3_compute, "disease_region", selectedTypes, c(21,22,23,24), c("blue","dodgerblue1","black","grey75")) ## ----design, time_it = TRUE--------------------------------------------------- design <- model.matrix(~0+disease_region+ruv_W1+ruv_W2, data = colData(speRuv)) # Clean up column name colnames(design) <- gsub("disease_region", "", colnames(design)) ## ----convert, time_it = TRUE-------------------------------------------------- library(edgeR) dge <- SE2DGEList(speRuv) keep <- filterByExpr(dge, design) dge <- dge[keep, , keep.lib.sizes = FALSE] dge <- estimateDisp(dge, design = design, robust = TRUE) ## ----contrast, time_it = TRUE------------------------------------------------- # In case there are spaces selectedTypes_underscore <- gsub(" ", "_", selectedTypes) comparisons <- list() comparisons <- lapply( seq_len(choose(length(selectedTypes_underscore), 2)), function(i) { noquote( paste0( utils::combn(selectedTypes_underscore, 2, simplify = FALSE )[[i]][1], "-", utils::combn(selectedTypes_underscore, 2, simplify = FALSE )[[i]][2] ) ) } ) con <- makeContrasts( # Must use as.character() contrasts = as.character(unlist(comparisons)), levels = colnames(design) ) colnames(con) <- sub("-", "_vs_", colnames(con)) con ## ----limma, time_it = TRUE---------------------------------------------------- library(limma) block_by <- colData(speRuv)[["SlideName"]] v <- voom(dge, design) corfit <- duplicateCorrelation(v, design, block = block_by ) v2 <- voom(dge, design, block = block_by, correlation = corfit$consensus ) corfit2 <- duplicateCorrelation(v, design, block = block_by ) fit <- lmFit(v, design, block = block_by, correlation = corfit2$consensus ) fit_contrast <- contrasts.fit(fit, contrasts = con ) efit <- eBayes(fit_contrast, robust = TRUE) ## ----tables, time_it = TRUE--------------------------------------------------- # Keep track of how many comparisons there are numeric_vector <- seq_len(ncol(con)) new_list <- as.list(numeric_vector) # This adds n+1th index to new_list where n = ncol(con) # This index contains seq_len(ncol(con)) # ex. new_list[[7]] = 1 2 3 4 5 6 # This coefficient allows ANOVA-like comparison in toptable() if (length(selectedTypes) > 2) { new_list[[length(new_list) + 1]] <- numeric_vector } topTabDF <- lapply(new_list, function(i) { limma::topTable(efit, coef = i, number = Inf, p.value = 0.05, adjust.method = "BH", lfc = 1 ) %>% tibble::rownames_to_column(var = "Gene") }) # Adds names to topTabDF if (length(selectedTypes) > 2) { names(topTabDF) <- c( colnames(con), colnames(con) %>% stringr::str_split(., "_vs_") %>% unlist() %>% unique() %>% paste(., collapse = "_vs_") ) } else { names(topTabDF) <- colnames(con) } ## ----tableExample------------------------------------------------------------- colnames(con) head(topTabDF[[1]]) ## ----.volcanoFunction, echo = FALSE------------------------------------------- .volcanoFunction <- function(volcano, delabSize, maxOverlap, title, logFCcutoff, PvalCutoff, DnCol, notDEcol, UpCol) { logFC <- adj.P.Val <- deLab <- NULL ggplot2::ggplot( data = volcano, ggplot2::aes( x = logFC, y = -log10(adj.P.Val), col = de, label = deLab ) ) + ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::theme( axis.ticks = ggplot2::element_line(colour = "black"), panel.border = ggplot2::element_rect(colour = "black"), text = ggplot2::element_text(size = 16, color = "black"), title = ggplot2::element_text(size = 16, hjust = 0.5), axis.text = ggplot2::element_text(size = 16, color = "black"), axis.title = ggplot2::element_text(size = 16), plot.margin = grid::unit(c(1, 1, 1, 1), "mm"), plot.background = ggplot2::element_rect( fill = "transparent", colour = NA ), panel.background = ggplot2::element_rect( fill = "transparent", colour = NA ), panel.grid = ggplot2::element_blank(), legend.background = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.box.background = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.key = ggplot2::element_rect( fill = "transparent", colour = NA ), legend.position = "none" ) + ggplot2::geom_vline( xintercept = c(-(logFCcutoff), logFCcutoff), col = "black", linetype = 3 ) + ggplot2::geom_hline( yintercept = -log10(PvalCutoff), col = "black", linetype = 3 ) + ggrepel::geom_text_repel( size = delabSize, segment.colour = "black", segment.linetype = 3, data = volcano %>% dplyr::filter(logFC >= logFCcutoff | logFC <= -(logFCcutoff)), ggplot2::aes(label = deLab), min.segment.length = 0, max.overlaps = maxOverlap ) + ggplot2::scale_color_manual( values = c(DnCol, notDEcol, UpCol), breaks = c("DN", "NA", "UP") ) + ggplot2::xlab(expression(log[2] ~ fold ~ change)) + ggplot2::ylab(expression(-log[10] ~ italic(P) ~ value)) + ggplot2::theme(aspect.ratio = 1, rect = ggplot2::element_rect( fill = "transparent" )) + ggplot2::labs(title = title) } ## ----VolcanoFunction, time_it = TRUE------------------------------------------ volcanoDF <- lapply(seq_len(ncol(con)), function(i) { limma::topTable(efit, coef = i, number = Inf) %>% tibble::rownames_to_column(var = "Target.name") %>% dplyr::select("Target.name", "logFC", "adj.P.Val") %>% dplyr::mutate(de = ifelse(logFC >= 1 & adj.P.Val < 0.05, "UP", ifelse(logFC <= -(1) & adj.P.Val < 0.05, "DN", "NO" ) )) %>% dplyr::mutate( logFC_threshold = stats::quantile(abs(logFC), 0.99, na.rm = TRUE ), pval_threshold = stats::quantile(adj.P.Val, 0.01, na.rm = TRUE ), deLab = ifelse(abs(logFC) > logFC_threshold & adj.P.Val < pval_threshold & abs(logFC) >= 0.05 & adj.P.Val < 0.05, Target.name, NA) ) }) plots <- lapply(seq_along(volcanoDF), function(i) { .volcanoFunction( volcanoDF[[i]], 12, 5, colnames(con)[i], 1, 0.05, "Blue", "Grey", "Red" ) + ggplot2::xlim( -2, 2 ) + ggplot2::ylim( 0, 20 ) }) plots[1] ## ----heatmapSetup, time_it = TRUE--------------------------------------------- lcpmSubScaleTopGenes <- lapply(names(topTabDF), function(name) { columns <- stringr::str_split_1(name, "_vs_") %>% lapply(function(.) { which(SummarizedExperiment::colData(speRuv) %>% tibble::as_tibble() %>% dplyr::pull(disease_region) == .) }) %>% unlist() table <- SummarizedExperiment::assay(speRuv, 2)[ topTabDF[[name]] %>% dplyr::slice_head(n = 50) %>% dplyr::select(Gene) %>% unlist() %>% unname(), columns ] %>% data.frame() %>% t() %>% scale() %>% t() return(table) }) names(lcpmSubScaleTopGenes) <- names(topTabDF) columnSplit <- lapply(names(topTabDF), function(name) { columnSplit <- stringr::str_split_1(name, "_vs_") %>% lapply(function(.){ which( SummarizedExperiment::colData(speRuv) %>% tibble::as_tibble() %>% dplyr::select(disease_region) == . ) } ) %>% as.vector() %>% summary() %>% .[, "Length"] }) names(columnSplit) <- names(lcpmSubScaleTopGenes) ## ----heatmap, fig.height = 6, fig.width = 4, time_it = TRUE------------------- colFunc <- circlize::colorRamp2( c( -3, 0, 3 ), hcl_palette = "Inferno" ) heatmap <- lapply(names(lcpmSubScaleTopGenes), function(name) { ComplexHeatmap::Heatmap(lcpmSubScaleTopGenes[[name]], cluster_columns = FALSE, col = colFunc, heatmap_legend_param = list( border = "black", title = "Z score", title_gp = grid::gpar( fontsize = 12, fontface = "plain", fontfamily = "sans" ), labels_gp = grid::gpar( fontsize = 12, fontface = "plain", fontfamily = "sans" ), legend_height = grid::unit( 3 * as.numeric(30), units = "mm" ) ), top_annotation = ComplexHeatmap::HeatmapAnnotation( foo = ComplexHeatmap::anno_block( gp = grid::gpar(lty = 0, fill = "transparent"), labels = columnSplit[[name]] %>% names(), labels_gp = grid::gpar( col = "black", fontsize = 14, fontfamily = "sans", fontface = "bold" ), labels_rot = 0, labels_just = "center", labels_offset = grid::unit(4.5, "mm") ) ), border_gp = grid::gpar(col = "black", lwd = 0.2), row_names_gp = grid::gpar( fontfamily = "sans", fontface = "italic", fontsize = 10 ), show_column_names = FALSE, column_title = NULL, column_split = rep( LETTERS[seq_len(columnSplit[[name]] %>% length())], columnSplit[[name]] %>% unname() %>% as.numeric() ) ) }) names(heatmap) <- names(lcpmSubScaleTopGenes) heatmap[[1]]