## ----setup,include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) ## ----pkg-overview, echo=FALSE,out.width="120%",fig.align='center',fig.cap="tidyexposomics pipeline overview. QC, association testing, integration, and enrichment steps on a MultiAssayExperiment."---- knitr::include_graphics("./overview.png") ## ----install, eval=FALSE------------------------------------------------------ # # install the package # BiocManager::install("tidyexposomics") # # # load the package # library(tidyexposomics) ## ----command-str, echo=FALSE,fig.align='center',fig.cap= "Command naming conventions used throughout `tidyexposomics`. More complex pipelines begin with the `run_` prefix, visualizations with `plot_`, and data processing with `filter_`, `transform_`, `pivot_`, or `extract_` prefixes."---- knitr::include_graphics("./command_str.png") ## ----ontology-app,eval=FALSE-------------------------------------------------- # # Launch the shiny app to annotate exposure variables # shiny::runApp(build_ont_annot_app()) ## ----ontology-app-img, echo=FALSE,out.width="150%",fig.align='center',fig.cap="Screenshot of the ontology annotation app. The sidebar has an upload button to load your exposure metadata file. The main panel displays the uploaded exposure metadata where users can select variables to annotate and categorize. The sidebar also contains buttons to apply annotations/categorizations and download the annotated file."---- knitr::include_graphics("./ont_annot.png") ## ----mae-overview,echo=FALSE,fig.align='center',fig.cap="Overview of the MultiAssayExperiment structure linking samples, assays, and metadata."---- knitr::include_graphics("./mae_rep.png") ## ----load-libraries,message=FALSE,warning=FALSE------------------------------- # Load Libraries library(tidyverse) library(tidyexposomics) ## ----load-data---------------------------------------------------------------- # Load example data data("tidyexposomics_example") # Create exposomic set object expom <- create_exposomicset( codebook = tidyexposomics_example$annotated_cb, exposure = tidyexposomics_example$meta, omics = list( "Gene Expression" = tidyexposomics_example$exp_filt, "Methylation" = tidyexposomics_example$methyl_filt ), row_data = list( "Gene Expression" = tidyexposomics_example$exp_fdata, "Methylation" = tidyexposomics_example$methyl_fdata ) ) ## ----exp-vars----------------------------------------------------------------- # Grab exposure variables exp_vars <- tidyexposomics_example$annotated_cb |> filter(category %in% c( "aerosol", "main group molecular entity", "polyatomic entity" )) |> pull(variable) |> as.character() ## ----missing-bar,fig.height=3,fig.width=6,fig.cap="Count of features with missing data above a 0% missingness threshold by data layer. Exposure data have variables with missingness."---- # Plot the missingness summary plot_missing( exposomicset = expom, plot_type = "summary", threshold = 0 ) ## ----missing-bar-lollipop,fig.height=2,fig.width=4,fig.cap= "Percent missingness per exposure variable. Parity, `h_parity_None`, shows the highest missingness."---- # Plot missing variables withing exposure group plot_missing( exposomicset = expom, plot_type = "lollipop", threshold = 0, layers = "Exposure" ) ## ----impute-missing----------------------------------------------------------- # Impute missing values expom <- run_impute_missing( exposomicset = expom, exposure_impute_method = "missforest", exposure_cols = exp_vars ) ## ----filter-omics------------------------------------------------------------- # filter omics layers by variance and expression # Methylation filtering expom <- filter_omics( exposomicset = expom, method = "variance", assays = "Methylation", assay_name = 1, min_var = 0.05 ) # Gene expression filtering expom <- filter_omics( exposomicset = expom, method = "expression", assays = "Gene Expression", assay_name = 1, min_value = 1, min_prop = 0.3 ) ## ----normality-check---------------------------------------------------------- # Check variable normality expom <- run_normality_check( exposomicset = expom, action = "add" ) ## ----trasform-vars------------------------------------------------------------ # Transform variables expom <- transform_exposure( exposomicset = expom, transform_method = "boxcox_best", exposure_cols = exp_vars ) ## ----norm-plot,fig.width= 4,fig.height= 4, fig.cap= "Normality status of numeric exposure variables after Box-Cox transformation."---- # Examine normality summary plot_normality_summary( exposomicset = expom, transformed = TRUE ) ## ----pca---------------------------------------------------------------------- # Perform principal component analysis expom <- run_pca( exposomicset = expom, log_trans_exp = TRUE, log_trans_omics = TRUE, action = "add" ) ## ----pca-plot, fig.align='center', fig.width= 9, fig.height= 7,fig.cap= "PCA of sample and feature space with sample outlier detection."---- # Plot the PCA plot of sample and feature space plot_pca(exposomicset = expom) ## ----filt-outliers------------------------------------------------------------ # Filter out sample outliers expom <- filter_sample_outliers( exposomicset = expom, outliers = c("s1231") ) ## ----corr-pcs----------------------------------------------------------------- # Run the correlation analysis expom <- run_correlation( exposomicset = expom, feature_type = "pcs", exposure_cols = exp_vars, n_pcs = 20, action = "add", correlation_cutoff = 0, pval_cutoff = 1 ) ## ----plot-pc-corr,fig.width= 7,fig.height= 2.2,fig.cap= "Correlation heatmap of exposures versus principal components. Child lead levels (`hs_pb_c_Log2`) and maternal BPA levels (`hs_bpa_madj_Log2`) are associated with the most principal components."---- # Plot the correlation tile plot plot_correlation_tile( exposomicset = expom, feature_type = "pcs", pval_cutoff = 0.05 ) ## ----exp-sum, fig.width=6, fig.height=4--------------------------------------- # Summarize exposure data run_summarize_exposures( exposomicset = expom, action = "get", exposure_cols = exp_vars ) |> head() ## ----plot-aerosol, fig.width=4, fig.height=3.5, fig.cap="Distribution of aerosol exposures by sex."---- # Plot aerosol exposure distributions by sex plot_exposures( exposomicset = expom, group_by = "e3_sex_None", exposure_cat = "aerosol", plot_type = "boxplot", ylab = "Values", title = "Aerosol Exposure by Sex" ) ## ----sample-clusters---------------------------------------------------------- # Sample clustering expom <- run_cluster_samples( exposomicset = expom, exposure_cols = exp_vars, clustering_approach = "dynamic", action = "add" ) ## ----plot-sample-clusters, fig.height=4, fig.width=12, fig.cap="Sample clustering heatmap using exposure profiles (z-scored). Clusters appear mostly driven by aerosol exposure during pregnancy."---- # Plot the sample clusters plot_sample_clusters( exposomicset = expom, exposure_cols = exp_vars ) ## ----correlate-exposures------------------------------------------------------ # Run correlation analysis expom <- run_correlation( exposomicset = expom, feature_type = "exposures", action = "add", exposure_cols = exp_vars, correlation_cutoff = 0.3 ) ## ----exposure-circos-corr, fig.height=10,fig.width=10,fig.align='center',fig.cap="Circos view of exposure-exposure correlations (threshold 0.3)."---- # Plot exposure correlation circos plot plot_circos_correlation( exposomicset = expom, feature_type = "exposures", corr_threshold = 0.3, exposure_cols = exp_vars ) ## ----assoc-exposures---------------------------------------------------------- # Perform ExWAS expom <- run_association( exposomicset = expom, source = "exposures", outcome = "hs_asthma", feature_set = exp_vars, action = "add", family = "binomial" ) ## ----plot-exposure-assoc,fig.height=3, fig.width=5, fig.align='center',fig.cap="ExWAS associations of exposures with asthma status. No exposures are significantly associated (P < 0.05) with asthma status."---- # Plot the association forest plot plot_association( exposomicset = expom, source = "exposures", terms = exp_vars, filter_thresh = 0.05, filter_col = "p.value", r2_col = "r2" ) ## ----associate-top-omics,fig.height=3,fig.width=4.5,fig.align='center'-------- # Perform ExWAS expom <- run_association( exposomicset = expom, outcome = "hs_asthma", source = "omics", top_n = 500, action = "add", family = "binomial" ) ## ----plot-manhattan,fig.height=7, fig.width=5, fig.align='center',fig.cap="Manhattan plot of omics-wide associations with asthma status."---- # Plot the manhattan plot plot_manhattan( exposomicset = expom, min_per_cat = 0, feature_col = "feature_clean", vars_to_label = c( "TC19001180.hg.1", "TC01000565.hg.1", "cg01937701", "hs_mibp_cadj_Log2" ), panel_sizes = c(1, 3, 1, 3, 1, 1, 1), facet_angle = 0 ) ## ----calc-exposome, results='hide'-------------------------------------------- # determine which aerosol variables to use aerosols <- c("h_pm25_ratio_preg_None", "h_pm10_ratio_preg_None") # Create exposome scores expom <- expom |> run_exposome_score( exposure_cols = aerosols, score_type = "median", score_column_name = "exposome_median_score" ) |> run_exposome_score( exposure_cols = aerosols, score_type = "pca", score_column_name = "exposome_pca_score" ) |> run_exposome_score( exposure_cols = aerosols, score_type = "irt", score_column_name = "exposome_irt_score" ) |> run_exposome_score( exposure_cols = aerosols, score_type = "quantile", score_column_name = "exposome_quantile_score" ) |> run_exposome_score( exposure_cols = aerosols, score_type = "var", score_column_name = "exposome_var_score" ) ## ----associate-exposome-score------------------------------------------------- # Associate exposome scores with outcome expom <- run_association( exposomicset = expom, outcome = "hs_asthma", source = "exposures", feature_set = c( "exposome_median_score", "exposome_pca_score", "exposome_irt_score", "exposome_quantile_score", "exposome_var_score" ), action = "add", family = "binomial" ) ## ----plot-exposome-scores, fig.height=2.5, fig.width=5,fig.cap="Associations of aerosol exposome scores with asthma status. The variance-based score has the strongest association with asthma status."---- # Plot the association forest plot plot_association( exposomicset = expom, source = "exposures", terms = c( "exposome_median_score", "exposome_pca_score", "exposome_irt_score", "exposome_quantile_score", "exposome_var_score" ), filter_col = "p.value", filter_thresh = 0.05, r2_col = "r2" ) ## ----diff-abundance,results='hide'-------------------------------------------- # Run differential abundance analysis expom <- run_differential_abundance( exposomicset = expom, formula = ~hs_asthma, method = "limma_trend", scaling_method = "none", action = "add" ) ## ----volcano-plot,fig.height=3.5, fig.width=6, fig.align='center',fig.cap="Volcano plot of differentially abundant features across omics layers."---- # Plot the volcano plot plot_volcano( exposomicset = expom, top_n_label = 3, feature_col = "feature_clean", logFC_thresh = log2(1), pval_thresh = 0.05, pval_col = "P.Value", logFC_col = "logFC", nrow = 1 ) ## ----sensitivity-analysis,results='hide'-------------------------------------- # Perform Sensitivity Analysis expom <- run_sensitivity_analysis( exposomicset = expom, base_formula = ~hs_asthma, methods = c("limma_trend"), scaling_methods = c("none"), pval_col = "P.Value", logfc_col = "logFC", logFC_threshold = log2(1), pval_threshold = 0.05, stability_metric = "stability_score", bootstrap_n = 10, action = "add" ) ## ----plot-stability-score,fig.height=2.5, fig.width=8, fig.cap="Sensitivity summary highlighting robust features by stability score."---- # Plot distriubtion of stability scores plot_sensitivity_summary( exposomicset = expom, stability_score_thresh = 0.34, stability_metric = "stability_score" ) ## ----multiomics-int----------------------------------------------------------- # Perform multi-omics integration expom <- run_multiomics_integration( exposomicset = expom, method = "DIABLO", n_factors = 5, outcome = "hs_asthma", action = "add" ) ## ----factor-summary, fig.height=2, fig.width=5, fig.align='center',fig.cap="Contribution of each omics layer to DIABLO latent factors."---- # Plot factor summary plot_factor_summary( exposomicset = expom, midpoint = 2.5 ) ## ----associate-factors-------------------------------------------------------- # Identify factors that are associated with the outcome expom <- run_association( exposomicset = expom, source = "factors", outcome = "hs_asthma", feature_set = exp_vars, action = "add", family = "binomial" ) ## ----plot-associate-factors,fig.width=5, fig.height=3,fig.cap="Associations of latent factors with asthma after covariate adjustment."---- # Plot the forest plot plot_association( exposomicset = expom, source = "factors", filter_col = "p_adjust", filter_thresh = 0.05, r2_col = "r2" ) ## ----extract-top-features----------------------------------------------------- # Extract top features that contribute to a factor expom <- extract_top_factor_features( exposomicset = expom, method = "percentile", pval_col = "p_adjust", pval_thresh = 0.05, percentile = 0.7, action = "add" ) ## ----plot-top-features,fig.height=8,fig.width=6,fig.align='center',fig.cap="Top-loading features per factor and omics layer."---- # Plot the top factor features plot_top_factor_features( exposomicset = expom, top_n = 5, feature_col = "feature_clean" ) ## ----omics-exp-corr----------------------------------------------------------- # Grab top common factor features and ensure # feature is renamed to variable for the variable_map top_factor_features <- expom |> extract_results(result = "multiomics_integration") |> pluck("top_factor_features") |> dplyr::select(variable = feature, exp_name) # Correlate top factor features with exposures # Perform correlation analysis between # factor features and exposures expom <- run_correlation( exposomicset = expom, feature_type = "omics", variable_map = top_factor_features, exposure_cols = exp_vars, action = "add", correlation_cutoff = 0.3, pval_cutoff = 0.05, cor_pval_column = "p.value" ) # Perform correlation analysis between factor features expom <- run_correlation( exposomicset = expom, feature_type = "omics", variable_map = top_factor_features, exposure_cols = exp_vars, feature_cors = TRUE, action = "add", correlation_cutoff = 0.3, pval_cutoff = 0.05, cor_pval_column = "p.value" ) ## ----exp-omic-corr-summary,fig.height=7,fig.width=12,fig.align='center',fig.cap="Summary of exposure-omics and feature-feature associations."---- # Plot correlation summary plot_correlation_summary( exposomicset = expom, mode = "summary", feature_type = "omics" ) ## ----exposure-shared-corr, fig.height=10,fig.width=10,fig.cap="Circos of exposures sharing correlated molecular features."---- # Plot the circos plot of shared correlated features plot_circos_correlation( exposomicset = expom, feature_type = "omics", shared_cutoff = 1, midpoint = 1.5 ) ## ----create-network, fig.width=10,fig.height=7-------------------------------- # Create omics within feature correlation network expom <- run_create_network( exposomicset = expom, feature_type = "omics_feature_cor", action = "add" ) # Create omics-exposure correlation network expom <- run_create_network( exposomicset = expom, feature_type = "omics", action = "add" ) ## ----plot-network,fig.width=7, fig.height=7,fig.cap="Bipartite network of exposures and correlated omics features where node labels reflect most central nodes."---- # Plot the exposure-omics network # Setting the seed so that the graph layout is consistent # given that layouts can have random starting points set.seed(42) plot_network( exposomicset = expom, network = "omics", top_n_nodes = 50, include_stats = TRUE, cor_thresh = 0.2, node_color_var = "group", label = TRUE, label_top_n = 5 ) ## ----exposure-impact---------------------------------------------------------- # Run exposure-omics impact analysis expom <- run_exposure_impact( exposomicset = expom, feature_type = "omics" ) ## ----plot-exposure-impact,fig.width=5.5,fig.height=5, fig.cap= "Network centrality metrics of omics features associated with each exposure."---- # Plot the exposure impact summary plot_exposure_impact( exposomicset = expom, feature_type = "omics", min_per_group = 10 ) ## ----enrichment--------------------------------------------------------------- # Run enrichment analysis on factor features correlated with exposures expom <- run_enrichment( exposomicset = expom, feature_type = c("omics_cor"), feature_col = "feature_clean", db = c("GO"), species = "goa_human", fenr_col = "gene_symbol", padj_method = "none", pval_thresh = 0.1, min_set = 1, max_set = 800, clustering_approach = "diana", action = "add" ) ## ----enrichment-summary,fig.width=14,fig.height=5, fig.cap="Summary of enriched GO terms grouped by overlap and exposure category."---- # Plot the summary diagram plot_enrichment( exposomicset = expom, feature_type = "omics_cor", plot_type = "summary" ) ## ----enrichment-dotplot,fig.height=11, fig.width=9, fig.cap="Dotplot of top enriched GO terms by omics layer and exposure category."---- # Plot a dotplot of terms plot_enrichment( exposomicset = expom, feature_type = "omics_cor", plot_type = "dotplot", top_n = 15, add_top_genes = TRUE, top_n_genes = 5 ) ## ----enrichment-term-network, fig.width=6, fig.height=5, fig.cap="Network of enriched GO terms connected by shared genes."---- # Plot the term network plot # Setting a seed so that the plot layout is consistent set.seed(42) plot_enrichment( exposomicset = expom, feature_type = "omics_cor", plot_type = "network", label_top_n = 1 ) ## ----enrichment-heatmap, fig.height=3, fig.width=8, fig.cap="Heatmap of genes driving enriched GO terms (Group 1) with log2 fold-change overlay."---- # Plot a heatmap of genes and corresponding GO terms plot_enrichment( exposomicset = expom, feature_type = "omics_cor", go_groups = "Group 1", plot_type = "heatmap", heatmap_fill = TRUE, feature_col = "feature_clean" ) ## ----enrichment-cnetplot, fig.width=6, fig.height=6, fig.cap="Cnet plot linking enriched terms to contributing genes (Group 1)."---- # Plot the gene-term network # Setting a seed so that the plot layout is consistent set.seed(42) plot_enrichment( exposomicset = expom, feature_type = "omics_cor", go_groups = "Group 1", plot_type = "cnet", feature_col = "feature_clean" ) ## ----pivot-sample------------------------------------------------------------- # Pivot sample data to a tibble expom |> pivot_sample() |> head() ## ----pivot-sample-example----------------------------------------------------- # Count the number of asthma cases by sex status expom |> pivot_sample() |> group_by(hs_asthma, e3_sex_None) |> reframe(n = n()) ## ----pivot-feature------------------------------------------------------------ # Pivot feature data to a tibble expom |> pivot_feature() |> head() ## ----pivot-feature-example---------------------------------------------------- # Count the number of features per omic layer expom |> pivot_feature() |> group_by(.exp_name) |> summarise(n = n()) ## ----pivot-exp---------------------------------------------------------------- # Pivot experiment data to a tibble expom |> pivot_exp( exp_name = "Gene Expression", features = "TC01004453.hg.1" ) |> head() ## ----pivot-exp-example, fig.height=4, fig.width=4, fig.align='center',fig.cap="IL23R gene expression levels (probe TC01004453.hg.1) by asthma status."---- # Plot expression of IL23R expom |> pivot_exp( exp_name = "Gene Expression", features = "TC01004453.hg.1" ) |> ggplot(aes( x = as.character(hs_asthma), y = log2(counts + 1), color = as.character(hs_asthma), fill = as.character(hs_asthma) )) + geom_boxplot(alpha = 0.5) + geom_jitter(alpha = 0.1) + ggpubr::geom_pwc(label = "{p.adj.format}{p.adj.signif}") + theme_minimal() + ggpubr::rotate_x_text(angle = 45) + ggsci::scale_color_cosmic() + ggsci::scale_fill_cosmic() + labs( x = "", y = expression(Log[2] * "Abd."), fill = "Asthma Status", color = "Asthma Status" ) ## ----pipeline-summary--------------------------------------------------------- # Run the pipeline summary expom |> run_pipeline_summary(console_print = TRUE, include_notes = TRUE) ## ----save-results------------------------------------------------------------- # Save results extract_results_excel( exposomicset = expom, file = tempfile(), result_types = "all" ) ## ----session_info------------------------------------------------------------- sessionInfo()