vignettes/articles/ibd_enrichment.Rmd
ibd_enrichment.Rmd
library(bugphyzzAnalyses)
library(bugphyzz)
library(curatedMetagenomicData)
library(dplyr)
library(EnrichmentBrowser)
library(DT)
library(purrr)
library(tibble)Use the HMP_2019_bidmdb study in cMD:
dataset_name <- "HallAB_2017.relative_abundance"
tse <- curatedMetagenomicData(
pattern = dataset_name,
dryrun = FALSE, rownames = 'NCBI',
counts = TRUE
)[[1]]
colData(tse)$GROUP <- ifelse(colData(tse)$study_condition == 'control', 0, 1)
## Filter taxa with at least 1 count as abundance in at least 20% of samples
min_n_samples <- round(ncol(tse) * 0.2)
tse_subset <- tse[rowSums(assay(tse) >= 1) >= min_n_samples,]
tse_subset
#> class: TreeSummarizedExperiment
#> dim: 117 259
#> metadata(1): agglomerated_by_rank
#> assays(1): relative_abundance
#> rownames(117): 853 820 ... 1262992 329854
#> rowData names(7): superkingdom phylum ... genus species
#> colnames(259): p8582_mo1 p8582_mo10 ... SKST041_2_G103027
#> SKST041_3_G103028
#> colData names(25): study_name subject_id ... SCCAI GROUP
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
#> rowLinks: a LinkDataFrame (117 rows)
#> rowTree: 1 phylo tree(s) (10430 leaves)
#> colLinks: NULL
#> colTree: NULL
bp <- importBugphyzz()
aer <- bp$aerophilicity
sp_sigs <- makeSignatures(dat = aer, tax_id_type = 'NCBI_ID', tax_level = 'species') |>
discard(is.null)DA:
assay(tse_subset) <- assay(tse_subset) + 1
edger <- deAna(
expr = tse_subset, de.method = 'edgeR', padj.method = 'BH',
filter.by.expr = FALSE
)
dim(edger)
#> [1] 117 259Almost all bacteria found in the SummarizedExperiment are anaerobic. Maybe a different approach would be more convenient, like merging several datasets in CuratedMetagenomicData.
edger <- limmaVoom(edger)
gsea <- sbea(
method = 'gsea', se = edger, gs = sp_sigs, alpha = 0.1, perm = 1000,
padj.method = 'BH'
)
gsea_res <- as.data.frame(gsea$res.tbl)
caption1 <- c(
'Table 1. GSEA results comparing the gut microbiome of subjects with
a normal gut micriobiome (healthy controls) and subjects with IBD.
ES = Enrichment score, NES = Normalized Enrichment Score. Positive and
negative scores (ES and NES) indicate enrichment in the IBD and
healthy control samples, respectively. Bug sets marked with an asterik (*)
indicate significant enrichment results (adjusted p-value < 0.1).
Benjamini & Hochberg (BH) was used for P value adjustment.'
)
datatable(
data = gsea_res,
filter = 'top',
rownames = FALSE,
extensions = 'Buttons',
options = list(
dom = 'Bft',
buttons = list('copy', 'print'),
iDisplayLength = 10,
keys = TRUE,
autoWidth = TRUE
),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
caption1
)
)
biosis <- importBiosis()
row_data <- as.data.frame(rowData(tse_subset))
row_data <- rownames_to_column(row_data, var = 'rowname')
# row_data <- modify_at(row_data, 'genus', ~ as.character(.x))
row_data <- left_join(row_data, biosis, by = c('genus' = 'GenusID'))
biosis_sigs <- split(row_data, factor(row_data$Attribute))
biosis_sigs <- map(biosis_sigs, ~ unique(.x$rowname))
biosis_sigs
#> $anaerobic
#> [1] "820" "301301" "357276" "39491" "40520" "418240" "28116"
#> [8] "674529" "360807" "371601" "47678" "74426" "818" "1262986"
#> [15] "821" "1262756" "1262989" "301302" "166486" "39492" "1263005"
#> [22] "1535" "165179" "147207" "46506" "33035" "53443" "29466"
#> [29] "39778" "1911679" "39777" "218538" "1532" "1262948" "817"
#> [36] "338188" "1262992" "329854"
#>
#> $facultatively_anaerobic
#> [1] "39491" "1304" "1262986" "729" "1318" "1262989" "39492"
#> [8] "28026" "1263005" "1535" "1308" "216816" "1680" "1681"
#> [15] "1358" "1262992"No aerobic taxa, which I think should be expected?
gsea2 <- sbea(
method = 'gsea', se = edger, gs = biosis_sigs, alpha = 0.1, perm = 1000,
padj.method = 'BH'
)
gsea2_res <- as.data.frame(gsea2$res.tbl)
caption2 <- c(
'Table 2. GSEA results comparing the gut microbiome of subjects with
a normal gut micriobiome (healthy controls) and subjects with IBD using
biosis (nychanes) signatures.
ES = Enrichment score, NES = Normalized Enrichment Score. Positive and
negative scores (ES and NES) indicate enrichment in the IBD and
healthy control samples, respectively. Bug sets marked with an asterik (*)
indicate significant enrichment results (adjusted p-value < 0.1).
Benjamini & Hochberg (BH) was used for P value adjustment.'
)
datatable(
data = gsea2_res,
filter = 'top',
rownames = FALSE,
extensions = 'Buttons',
options = list(
dom = 'Bft',
buttons = list('copy', 'print'),
iDisplayLength = 10,
keys = TRUE,
autoWidth = TRUE
),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
caption2
)
)
sessionInfo()
#> R version 4.3.3 (2024-02-29)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] tibble_3.2.1 purrr_1.0.2
#> [3] DT_0.33 EnrichmentBrowser_2.32.0
#> [5] graph_1.80.0 dplyr_1.1.4
#> [7] curatedMetagenomicData_3.10.0 TreeSummarizedExperiment_2.10.0
#> [9] Biostrings_2.70.3 XVector_0.42.0
#> [11] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
#> [13] Biobase_2.62.0 GenomicRanges_1.54.1
#> [15] GenomeInfoDb_1.38.8 IRanges_2.36.0
#> [17] S4Vectors_0.40.2 BiocGenerics_0.48.1
#> [19] MatrixGenerics_1.14.0 matrixStats_1.2.0
#> [21] bugphyzz_0.99.0 bugphyzzAnalyses_0.1.1
#>
#> loaded via a namespace (and not attached):
#> [1] jsonlite_1.8.8 MultiAssayExperiment_1.28.0
#> [3] magrittr_2.0.3 ggbeeswarm_0.7.2
#> [5] rmarkdown_2.26 fs_1.6.3
#> [7] zlibbioc_1.48.2 ragg_1.3.0
#> [9] vctrs_0.6.5 memoise_2.0.1
#> [11] DelayedMatrixStats_1.24.0 RCurl_1.98-1.14
#> [13] htmltools_0.5.8.1 S4Arrays_1.2.1
#> [15] AnnotationHub_3.10.1 curl_5.2.1
#> [17] BiocNeighbors_1.20.2 SparseArray_1.2.4
#> [19] sass_0.4.9 bslib_0.7.0
#> [21] htmlwidgets_1.6.4 desc_1.4.3
#> [23] plyr_1.8.9 DECIPHER_2.30.0
#> [25] cachem_1.0.8 igraph_2.0.3
#> [27] mime_0.12 lifecycle_1.0.4
#> [29] pkgconfig_2.0.3 rsvd_1.0.5
#> [31] Matrix_1.6-5 R6_2.5.1
#> [33] fastmap_1.1.1 GenomeInfoDbData_1.2.11
#> [35] shiny_1.8.1.1 digest_0.6.35
#> [37] colorspace_2.1-0 AnnotationDbi_1.64.1
#> [39] scater_1.30.1 irlba_2.3.5.1
#> [41] ExperimentHub_2.10.0 crosstalk_1.2.1
#> [43] textshaping_0.3.7 RSQLite_2.3.6
#> [45] vegan_2.6-4 beachmat_2.18.1
#> [47] filelock_1.0.3 fansi_1.0.6
#> [49] mgcv_1.9-1 httr_1.4.7
#> [51] abind_1.4-5 compiler_4.3.3
#> [53] withr_3.0.0 bit64_4.0.5
#> [55] BiocParallel_1.36.0 viridis_0.6.5
#> [57] DBI_1.2.2 MASS_7.3-60.0.1
#> [59] rappdirs_0.3.3 DelayedArray_0.28.0
#> [61] bluster_1.12.0 permute_0.9-7
#> [63] tools_4.3.3 vipor_0.4.7
#> [65] beeswarm_0.4.0 ape_5.7-1
#> [67] interactiveDisplayBase_1.40.0 httpuv_1.6.15
#> [69] glue_1.7.0 nlme_3.1-164
#> [71] promises_1.3.0 grid_4.3.3
#> [73] mia_1.10.0 reshape2_1.4.4
#> [75] cluster_2.1.6 generics_0.1.3
#> [77] gtable_0.3.4 tidyr_1.3.1
#> [79] BiocSingular_1.18.0 ScaledMatrix_1.10.0
#> [81] utf8_1.2.4 stringr_1.5.1
#> [83] ggrepel_0.9.5 BiocVersion_3.18.1
#> [85] pillar_1.9.0 limma_3.58.1
#> [87] yulab.utils_0.1.4 later_1.3.2
#> [89] splines_4.3.3 BiocFileCache_2.10.2
#> [91] treeio_1.26.0 lattice_0.22-6
#> [93] bit_4.0.5 annotate_1.80.0
#> [95] tidyselect_1.2.1 DirichletMultinomial_1.44.0
#> [97] locfit_1.5-9.9 scuttle_1.12.0
#> [99] knitr_1.46 gridExtra_2.3
#> [101] edgeR_4.0.16 xfun_0.43
#> [103] statmod_1.5.0 KEGGgraph_1.62.0
#> [105] stringi_1.8.3 lazyeval_0.2.2
#> [107] yaml_2.3.8 evaluate_0.23
#> [109] codetools_0.2-20 Rgraphviz_2.46.0
#> [111] BiocManager_1.30.22 cli_3.6.2
#> [113] xtable_1.8-4 systemfonts_1.0.6
#> [115] munsell_0.5.1 jquerylib_0.1.4
#> [117] Rcpp_1.0.12 dbplyr_2.5.0
#> [119] png_0.1-8 XML_3.99-0.16.1
#> [121] parallel_4.3.3 pkgdown_2.0.8
#> [123] ggplot2_3.5.0 blob_1.2.4
#> [125] sparseMatrixStats_1.14.0 bitops_1.0-7
#> [127] GSEABase_1.64.0 decontam_1.22.0
#> [129] viridisLite_0.4.2 tidytree_0.4.6
#> [131] scales_1.3.0 crayon_1.5.2
#> [133] rlang_1.1.3 KEGGREST_1.42.0