## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# BiocManager::install("metabinR")

## ----eval=TRUE, message=FALSE-------------------------------------------------
options(java.parameters="-Xmx1500M")
unloadNamespace("metabinR")
library(metabinR)
library(data.table)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(cvms)
library(sabre)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
abundances <- read.table(
    system.file("extdata", "distribution_0.txt",package = "metabinR"),
    col.names = c("genome_id", "abundance" ,"AB_id"))

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads.mapping <- fread(system.file("extdata", "reads_mapping.tsv.gz",
                                   package = "metabinR")) %>%
    merge(abundances[, c("genome_id","AB_id")], by = "genome_id") %>%
    arrange(anonymous_read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
assignments.AB <- abundance_based_binning(
        system.file("extdata","reads.metagenome.fasta.gz", package="metabinR"),
        numOfClustersAB = 2, 
        kMerSizeAB = 10, 
        dryRun = FALSE, 
        outputAB = "vignette") %>%
    arrange(read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
histogram.AB <- read.table("vignette__AB.histogram.tsv", header = TRUE)
ggplot(histogram.AB, aes(x=counts, y=frequency)) + 
    geom_area() +
    labs(title = "kmer counts histogram") + 
    theme_bw()

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
eval.AB.cvms <- cvms::evaluate(data = data.frame(
                                    prediction=as.character(assignments.AB$AB),
                                    target=as.character(reads.mapping$AB_id),
                                    stringsAsFactors = FALSE),
                                target_col = "target",
                                prediction_cols = "prediction",
                                type = "binomial"
)
eval.AB.sabre <- sabre::vmeasure(as.character(assignments.AB$AB),
                                as.character(reads.mapping$AB_id))

p <- cvms::plot_confusion_matrix(eval.AB.cvms) +
        labs(title = "Confusion Matrix", 
                x = "Target Abundance Class", 
                y = "Predicted Abundance Class")
tab <- as.data.frame(
    c(
        Accuracy =  round(eval.AB.cvms$Accuracy,4),
        Specificity =  round(eval.AB.cvms$Specificity,4),
        Sensitivity =  round(eval.AB.cvms$Sensitivity,4),
        Fscore =  round(eval.AB.cvms$F1,4),
        Kappa =  round(eval.AB.cvms$Kappa,4),
        Vmeasure = round(eval.AB.sabre$v_measure,4)
    )
)
grid.arrange(p, ncol = 1)
knitr::kable(tab, caption = "AB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads.mapping <- fread(
        system.file("extdata", "reads_mapping.tsv.gz",package = "metabinR")) %>%
    arrange(anonymous_read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
assignments.CB <- composition_based_binning(
        system.file("extdata","reads.metagenome.fasta.gz",package ="metabinR"),
        numOfClustersCB = 10, 
        kMerSizeCB = 4, 
        dryRun = TRUE, 
        outputCB = "vignette") %>%
    arrange(read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
eval.CB.sabre <- sabre::vmeasure(as.character(assignments.CB$CB),
                                as.character(reads.mapping$genome_id))
tab <- as.data.frame(
    c(
        Vmeasure = round(eval.AB.sabre$v_measure,4),
        Homogeneity = round(eval.AB.sabre$homogeneity,4),
        Completeness = round(eval.AB.sabre$completeness,4)
    )
)
knitr::kable(tab, caption = "CB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads.mapping <- fread(
        system.file("extdata", "reads_mapping.tsv.gz",package = "metabinR")) %>%
    arrange(anonymous_read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
assignments.ABxCB <- hierarchical_binning(
        system.file("extdata","reads.metagenome.fasta.gz",package ="metabinR"),
        numOfClustersAB = 2,
        kMerSizeAB = 10,
        kMerSizeCB = 4, 
        dryRun = TRUE, 
        outputC = "vignette") %>%
    arrange(read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
eval.ABxCB.sabre <- sabre::vmeasure(as.character(assignments.ABxCB$ABxCB),
                                    as.character(reads.mapping$genome_id))
tab <- as.data.frame(
    c(
        Vmeasure = round(eval.ABxCB.sabre$v_measure,4),
        Homogeneity = round(eval.ABxCB.sabre$homogeneity,4),
        Completeness = round(eval.ABxCB.sabre$completeness,4)
    )
)
knitr::kable(tab, caption = "ABxCB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
unlink("vignette__*")

## ----setup--------------------------------------------------------------------
utils::sessionInfo()