## ----setup, message=FALSE----------------------------------------------------- library(DOTSeq) library(SummarizedExperiment, quietly = TRUE) ## ----dir---------------------------------------------------------------------- dir <- system.file("extdata", package = "DOTSeq") list.files(dir) ## ----read-in-count-file------------------------------------------------------- cnt <- read.table( file.path(dir, "featureCounts.cell_cycle_subset.txt.gz"), header = TRUE, comment.char = "#" ) names(cnt) <- gsub(".*(SRR[0-9]+).*", "\\1", names(cnt)) head(cnt) ## ----ref---------------------------------------------------------------------- gtf <- file.path(dir, "gencode.v47.orf_flattened_subset.gtf.gz") bed <- file.path(dir, "gencode.v47.orf_flattened_subset.bed.gz") ## ----condition-table---------------------------------------------------------- cond <- read.table(file.path(dir, "metadata.txt.gz")) # Ensure required column names for DOTSeq names(cond) <- c("run", "strategy", "replicate", "treatment", "condition") # Filter to include only chx-treated samples cond <- cond[cond$treatment == "chx", ] head(cond) ## ----dotseq, warning=FALSE---------------------------------------------------- # Create a DOTSeqDataSets object datasets <- DOTSeqDataSetsFromFeatureCounts( count_table = cnt, condition_table = cond, flattened_gtf = gtf, flattened_bed = bed, verbose = FALSE ) # Run the DOTSeq workflow d <- DOTSeq(datasets = datasets) ## ----dotseq-objects----------------------------------------------------------- show(d) ## ----posthoc------------------------------------------------------------------ results <- getContrasts(d, type = "interaction") results ## ----rowdata------------------------------------------------------------------ rowData(getDOU(d)) ## ----rowranges---------------------------------------------------------------- rowRanges(getDOU(d)) ## ----merge-------------------------------------------------------------------- ou <- results$DOU[results$DOU$contrast == "Mitotic_Cycling - Interphase", ] te <- results$DTE[results$DTE$contrast == "Mitotic_Cycling - Interphase", ] results <- merge(ou, te, by = c("orf_id", "contrast"), all = TRUE) ## ----plot-venn, eval = requireNamespace("eulerr", quietly = TRUE), fig.small = TRUE---- plotDOT(plot_type = "venn", results = results, force_new_device = FALSE) ## ----plot-composite-by-significance, fig.small = TRUE------------------------- plotDOT( plot_type = "composite", results = results, plot_params = list(color_by = "significance", legend_position = "bottomright"), force_new_device = FALSE ) ## ----plot-composite-by-orfs, fig.small = TRUE--------------------------------- plotDOT( plot_type = "composite", results = results, data = getDOU(d), plot_params = list(color_by = "orf_type", legend_position = "bottomright"), force_new_device = FALSE ) ## ----plot-volcano-by-significance, fig.small = TRUE--------------------------- mapping <- plotDOT( plot_type = "volcano", results = results, id_mapping = TRUE, plot_params = list(color_by = "significance", top_hits = 3, legend_position = "topright"), force_new_device = FALSE ) ## ----plot-volcano-by-orfs, fig.small = TRUE----------------------------------- plotDOT( plot_type = "volcano", results = results, data = getDOU(d), id_mapping = mapping, plot_params = list(color_by = "orf_type", top_hits = 3, legend_position = "topright"), force_new_device = FALSE ) ## ----plot-heatmap, warning=FALSE, fig.width=4, fig.height=6, fig.align="center", out.width="325px"---- plotDOT( plot_type = "heatmap", results = results, data = getDOU(d), id_mapping = mapping, plot_params = list(rank_by = "score", top_hits = 20), force_new_device = FALSE ) ## ----plot-usage-gene-symbol, eval = requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("ggsignif", quietly = TRUE)---- orderby <- c("Mitotic_Cycling", "Mitotic_Arrest", "Interphase") id <- "TAF2" plotDOT( plot_type = "usage", data = getDOU(d), gene_id = id, id_mapping = mapping, plot_params = list(order_by = orderby), force_new_device = FALSE ) ## ----plot-usage-gene-id, eval = requireNamespace("ggsignif", quietly = TRUE)---- id <- "ENSG00000060339" plotDOT( plot_type = "usage", data = getDOU(d), gene_id = id, id_mapping = mapping, plot_params = list(order_by = orderby), force_new_device = FALSE ) ## ----get-orfs, eval = FALSE--------------------------------------------------- # library(withr) # # falink <- "https://ftp.ensembl.org/pub/release-78/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP5.cdna.all.fa.gz" # gtflink <- "https://ftp.ensembl.org/pub/release-78/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP5.78.gtf.gz" # # annotation <- basename(gtflink) # sequences <- basename(falink) # # download.file(gtflink, destfile=annotation) # download.file(falink, destfile=sequences) # # # This will generate flattened ORF-level annotation # gr <- getORFs( # sequences = sequences, # annotation = annotation, # organism = "Drosophila melanogaster" # ) # # withr::defer(unlink(c(annotation, sequences))) ## ----pasilla, eval = FALSE---------------------------------------------------- # library(pasillaBamSubset) # library(GenomeInfoDb) # # bam_list <- c(untreated1_chr4(), untreated3_chr4()) # # # Keep only reads mapped to the exons of coding genes # temp_dir <- tempdir() # getExonicReads(gr = gr, seqlevels_style = "UCSC", bam_files = bam_list, bam_output_dir = temp_dir, coding_genes_only = TRUE) # # # Get the list of filtered BAM files # bam_list <- list.files( # path = temp_dir, # pattern = "*exonic.*", # full.names = TRUE # ) # # cnt <- countReads(gr = gr, bam_files = bam_list) # withr::defer(unlink(bam_list)) ## ----expand, eval = FALSE----------------------------------------------------- # set.seed(42) # # # Create count_table # # Create two replicates for each condition with random scaling # rna_treated_reps <- do.call(cbind, replicate(2, cnt[["untreated3_chr4.exonic.sorted.bam"]] * runif(nrow(cnt), min = 0.5, max = 2), simplify = FALSE)) # rna_control_reps <- do.call(cbind, replicate(2, cnt[["untreated3_chr4.exonic.sorted.bam"]] * runif(nrow(cnt), min = 0.1, max = 0.5), simplify = FALSE)) # ribo_treated_reps <- do.call(cbind, replicate(2, cnt[["untreated1_chr4.exonic.sorted.bam"]] * runif(nrow(cnt), min = 0.5, max = 2), simplify = FALSE)) # ribo_control_reps <- do.call(cbind, replicate(2, cnt[["untreated1_chr4.exonic.sorted.bam"]] * runif(nrow(cnt), min = 0.1, max = 0.5), simplify = FALSE)) # # # Combine and name columns # colnames(rna_treated_reps) <- paste0("rna_treated", 1:2) # colnames(rna_control_reps) <- paste0("rna_control", 1:2) # colnames(ribo_treated_reps) <- paste0("ribo_treated", 1:2) # colnames(ribo_control_reps) <- paste0("ribo_control", 1:2) # # cnt_expanded <- cbind( # rna_treated_reps, # rna_control_reps, # ribo_treated_reps, # ribo_control_reps # ) # # # Convert numbers to integer # cnt_expanded <- round(cnt_expanded) # storage.mode(cnt_expanded) <- "integer" # # rownames(cnt_expanded) <- rownames(cnt) # cnt_expanded <- as.data.frame(cnt_expanded) # # # # Create condition_table # # Sample names from cnt_expanded # sample_names <- colnames(cnt_expanded) # # # Define condition and strategy for each sample # condition <- c( # rep("treated", 2), # rep("control", 2), # rep("treated", 2), # rep("control", 2) # ) # strategy <- c(rep("RNA", 4), rep("Ribo", 4)) # # cond <- data.frame( # run = sample_names, # replicate = c(1,2), # condition = factor(condition, levels = c("control", "treated")), # strategy = factor(strategy, levels = c("RNA", "Ribo")) # ) # # # Create a DOTSeqDataSets object # d <- DOTSeqDataSetsFromSummarizeOverlaps( # count_table = cnt_expanded, # condition_table = cond, # annotation = gr # ) # # invisible(file.remove(bam_list)) # invisible(file.remove(sub("\\.gz$", ".sqlite", annotation))) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()