## ----echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"---------------
knitr::include_graphics(system.file("scATAC-seq_workflow.png", package = "scPipe"))

## ----message=FALSE------------------------------------------------------------
library(scPipe)
data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE)


## -----------------------------------------------------------------------------
output_folder <- paste0(tempdir(), "/scPipeATACVignette")

## -----------------------------------------------------------------------------
# data fastq files
r1      <- file.path(data.folder, "small_chr21_R1.fastq.gz") 
r2      <- file.path(data.folder, "small_chr21_R3.fastq.gz") 

# barcodes in fastq format
barcode_fastq      <- file.path(data.folder, "small_chr21_R2.fastq.gz") 

# barcodes in .csv format
barcode_1000       <- file.path(data.folder, "chr21_modified_barcode_1000.csv")


## -----------------------------------------------------------------------------
sc_atac_trim_barcode (r1            = r1, 
                      r2            = r2, 
                      bc_file       = barcode_fastq, 
                      rmN           = FALSE,
                      rmlow         = FALSE,
                      output_folder = output_folder)

## ----eval=FALSE---------------------------------------------------------------
#  sc_atac_trim_barcode (r1            = r1,
#                        r2            = r2,
#                        bc_file       = barcode_1000,
#                        id1_st        = -1,
#                        id1_len       = -1,
#                        id2_st        = -1,
#                        id2_len       = -10,
#                        output_folder = output_folder,
#                        rmN           = TRUE)

## -----------------------------------------------------------------------------
demux_r1        <- file.path(output_folder, "demux_completematch_small_chr21_R1.fastq.gz")
demux_r2        <- file.path(output_folder, "demux_completematch_small_chr21_R3.fastq.gz")
reference = file.path(data.folder, "small_chr21.fa")


aligned_bam <- sc_aligning(ref = reference, 
                R1 = demux_r1, 
                R2 = demux_r2, 
                nthreads  = 12,
                output_folder = output_folder)

## -----------------------------------------------------------------------------

sorted_tagged_bam <- sc_atac_bam_tagging (inbam = aligned_bam, 
                       output_folder =  output_folder, 
                       bam_tags      = list(bc="CB", mb="OX"), 
                       nthreads      =  12)


## ----eval=FALSE---------------------------------------------------------------
#  
#  removed <- sc_atac_remove_duplicates(sorted_tagged_bam,
#                            output_folder = output_folder)
#  if (!isFALSE(removed))
#    sorted_tagged_bam <- removed
#  

## -----------------------------------------------------------------------------
sc_atac_create_fragments(inbam = sorted_tagged_bam,
                         output_folder = output_folder)


## ----eval=FALSE---------------------------------------------------------------
#  # CHECK IF THE .narrowPeak file is small enough to include in the package,
#  # otherwise, we need to make this basilisk call work??
#  sc_atac_peak_calling(inbam = sorted_tagged_bam,
#                       ref = reference,
#                       genome_size = NULL,
#                       output_folder = output_folder)
#  

## ----eval=FALSE---------------------------------------------------------------
#  features          <- file.path(output_folder, "NA_peaks.narrowPeak")
#  
#  sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
#                            feature_input = features,
#                            bam_tags      = list(bc="CB", mb="OX"),
#                            feature_type  = "peak",
#                            organism      = "hg38",
#                            yieldsize     = 1000000,
#                            exclude_regions = TRUE,
#                            output_folder = output_folder,
#                            fix_chr       = "none"
#                            )

## ----eval=FALSE---------------------------------------------------------------
#  reference       <- file.path(data.folder, "small_chr21.fa")
#  sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
#                            feature_input = reference,
#                            bam_tags      = list(bc="CB", mb="OX"),
#                            feature_type  = "genome_bin",
#                            organism      = "hg38",
#                            cell_calling  = FALSE,
#                            yieldsize     = 1000000,
#                            exclude_regions = TRUE,
#                            output_folder = output_folder,
#                            fix_chr       = "none"
#                            )
#  

## ----eval=FALSE---------------------------------------------------------------
#  features          <- file.path(output_folder, "NA_peaks.narrowPeak")

## ----eval=TRUE, echo=FALSE----------------------------------------------------
features          <- file.path(data.folder, "NA_peaks.narrowPeak")

## -----------------------------------------------------------------------------
sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
                          feature_input = features, 
                          bam_tags      = list(bc="CB", mb="OX"), 
                          feature_type  = "peak",
                          organism      = "hg38",
                          cell_calling  = "filter",
                          min_uniq_frags = 0,
                          min_frac_peak = 0,
                          min_frac_promoter = 0,
                          yieldsize     = 1000000,
                          exclude_regions = TRUE,
                          output_folder = output_folder,
                          fix_chr       = "none"
                          )

## -----------------------------------------------------------------------------
feature_matrix <- readRDS(file.path(output_folder, "unfiltered_feature_matrix.rds"))
dplyr::glimpse(feature_matrix)

sparseM <- readRDS(file.path(output_folder, "sparse_matrix.rds"))
dplyr::glimpse(sparseM)

## -----------------------------------------------------------------------------
sce <- sc_atac_create_sce(input_folder = output_folder,
                   organism     = "hg38",
                   feature_type = "peak",
                   pheno_data   = NULL,
                   report       = FALSE)

## ----echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"---------------
knitr::include_graphics(system.file("report_example.png", package = "scPipe"))

## ----eval = FALSE-------------------------------------------------------------
#  data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE)
#  output_folder <- tempdir()
#  sce <- sc_atac_pipeline(r1 = file.path(data.folder, "small_chr21_R1.fastq.gz"),
#                          r2 = file.path(data.folder, "small_chr21_R3.fastq.gz"),
#                          barcode_fastq = file.path(data.folder, "small_chr21_R2.fastq.gz"),
#                          organism = "hg38",
#                          reference = file.path(data.folder, "small_chr21.fa"),
#                          feature_type = "peak",
#                          remove_duplicates = FALSE,
#                          min_uniq_frags = 0, # set to 0 as the sample dataset only has a small number of reads
#                          min_frac_peak = 0,
#                          min_frac_promoter = 0,
#                          output_folder = output_folder)

## ----echo=FALSE, results='hide'-----------------------------------------------
# cleanup tempfiles
unlink(output_folder, recursive=TRUE)

## -----------------------------------------------------------------------------
sessionInfo()