Expected result

  • Enrichment of anaerobic bacteria in healthy samples (gut).
  • Enrichment of facultative anaerobic bacteria in IBD samples (gut).

Data

Use the HMP_2019_bidmdb study in cMD:

dataset_name <- "HallAB_2017.relative_abundance"
tse <- curatedMetagenomicData(
    pattern = dataset_name, 
    dryrun = FALSE, rownames = 'NCBI',
    counts = TRUE
)[[1]]
colData(tse)$GROUP <- ifelse(colData(tse)$study_condition == 'control', 0, 1)
## Filter taxa with at least 1 count as abundance in at least 20% of samples
min_n_samples <- round(ncol(tse) * 0.2)
tse_subset <- tse[rowSums(assay(tse) >= 1) >= min_n_samples,]
tse_subset
#> class: TreeSummarizedExperiment 
#> dim: 117 259 
#> metadata(1): agglomerated_by_rank
#> assays(1): relative_abundance
#> rownames(117): 853 820 ... 1262992 329854
#> rowData names(7): superkingdom phylum ... genus species
#> colnames(259): p8582_mo1 p8582_mo10 ... SKST041_2_G103027
#>   SKST041_3_G103028
#> colData names(25): study_name subject_id ... SCCAI GROUP
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
#> rowLinks: a LinkDataFrame (117 rows)
#> rowTree: 1 phylo tree(s) (10430 leaves)
#> colLinks: NULL
#> colTree: NULL

bugphyzz data

bp <- importBugphyzz()
aer <- bp$aerophilicity
sp_sigs <- makeSignatures(dat = aer, tax_id_type = 'NCBI_ID', tax_level = 'species') |> 
    discard(is.null)

Differential abundance

DA:

assay(tse_subset) <- assay(tse_subset) + 1
edger <- deAna(
    expr = tse_subset, de.method = 'edgeR', padj.method = 'BH',
    filter.by.expr = FALSE 
)
dim(edger)
#> [1] 117 259

Almost all bacteria found in the SummarizedExperiment are anaerobic. Maybe a different approach would be more convenient, like merging several datasets in CuratedMetagenomicData.

lapply(sp_sigs, function(x) sum(x %in% rownames(edger)))
#> $`bugphyzz:aerophilicity|aerobic`
#> [1] 4
#> 
#> $`bugphyzz:aerophilicity|anaerobic`
#> [1] 88
#> 
#> $`bugphyzz:aerophilicity|facultatively anaerobic`
#> [1] 5

Enrichment (GSEA) - bugphyzz:

edger <- limmaVoom(edger)
gsea <- sbea(
    method = 'gsea', se = edger, gs = sp_sigs, alpha = 0.1, perm = 1000,
    padj.method = 'BH'
)
gsea_res <- as.data.frame(gsea$res.tbl)

caption1 <- c(
  'Table 1. GSEA results comparing the gut microbiome of subjects with
  a normal gut micriobiome (healthy controls) and subjects with IBD.
  ES = Enrichment score, NES = Normalized Enrichment Score. Positive and
  negative scores (ES and NES) indicate enrichment in the IBD and
  healthy control samples, respectively. Bug sets marked with an asterik (*) 
  indicate significant enrichment results (adjusted p-value < 0.1). 
  Benjamini & Hochberg (BH) was used for P value adjustment.'
)

datatable(
  data = gsea_res,
  filter = 'top',
  rownames = FALSE,
  extensions = 'Buttons',
  options = list(
    dom = 'Bft',
    buttons = list('copy', 'print'),
    iDisplayLength = 10,
    keys = TRUE,
    autoWidth = TRUE
  ),
  caption = htmltools::tags$caption(
    style = 'caption-side: top; text-align: left;',
    caption1
  )
)

Import biosis data

biosis <- importBiosis()
row_data <- as.data.frame(rowData(tse_subset))
row_data <- rownames_to_column(row_data, var = 'rowname')
# row_data <- modify_at(row_data, 'genus', ~ as.character(.x))
row_data <- left_join(row_data, biosis, by = c('genus' = 'GenusID'))
biosis_sigs <- split(row_data, factor(row_data$Attribute))
biosis_sigs <- map(biosis_sigs, ~ unique(.x$rowname))
biosis_sigs
#> $anaerobic
#>  [1] "820"     "301301"  "357276"  "39491"   "40520"   "418240"  "28116"  
#>  [8] "674529"  "360807"  "371601"  "47678"   "74426"   "818"     "1262986"
#> [15] "821"     "1262756" "1262989" "301302"  "166486"  "39492"   "1263005"
#> [22] "1535"    "165179"  "147207"  "46506"   "33035"   "53443"   "29466"  
#> [29] "39778"   "1911679" "39777"   "218538"  "1532"    "1262948" "817"    
#> [36] "338188"  "1262992" "329854" 
#> 
#> $facultatively_anaerobic
#>  [1] "39491"   "1304"    "1262986" "729"     "1318"    "1262989" "39492"  
#>  [8] "28026"   "1263005" "1535"    "1308"    "216816"  "1680"    "1681"   
#> [15] "1358"    "1262992"

No aerobic taxa, which I think should be expected?

Enrichemnt (GSEA) - biosis

gsea2 <- sbea(
    method = 'gsea', se = edger, gs = biosis_sigs, alpha = 0.1, perm = 1000,
    padj.method = 'BH'
)
gsea2_res <- as.data.frame(gsea2$res.tbl)

caption2 <- c(
  'Table 2. GSEA results comparing the gut microbiome of subjects with
  a normal gut micriobiome (healthy controls) and subjects with IBD using
  biosis (nychanes) signatures.
  ES = Enrichment score, NES = Normalized Enrichment Score. Positive and
  negative scores (ES and NES) indicate enrichment in the IBD and
  healthy control samples, respectively. Bug sets marked with an asterik (*) 
  indicate significant enrichment results (adjusted p-value < 0.1). 
  Benjamini & Hochberg (BH) was used for P value adjustment.'
)

datatable(
  data = gsea2_res,
  filter = 'top',
  rownames = FALSE,
  extensions = 'Buttons',
  options = list(
    dom = 'Bft',
    buttons = list('copy', 'print'),
    iDisplayLength = 10,
    keys = TRUE,
    autoWidth = TRUE
  ),
  caption = htmltools::tags$caption(
    style = 'caption-side: top; text-align: left;',
    caption2
  )
)

Session Information

sessionInfo()
#> R version 4.3.3 (2024-02-29)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] tibble_3.2.1                    purrr_1.0.2                    
#>  [3] DT_0.33                         EnrichmentBrowser_2.32.0       
#>  [5] graph_1.80.0                    dplyr_1.1.4                    
#>  [7] curatedMetagenomicData_3.10.0   TreeSummarizedExperiment_2.10.0
#>  [9] Biostrings_2.70.3               XVector_0.42.0                 
#> [11] SingleCellExperiment_1.24.0     SummarizedExperiment_1.32.0    
#> [13] Biobase_2.62.0                  GenomicRanges_1.54.1           
#> [15] GenomeInfoDb_1.38.8             IRanges_2.36.0                 
#> [17] S4Vectors_0.40.2                BiocGenerics_0.48.1            
#> [19] MatrixGenerics_1.14.0           matrixStats_1.2.0              
#> [21] bugphyzz_0.99.0                 bugphyzzAnalyses_0.1.1         
#> 
#> loaded via a namespace (and not attached):
#>   [1] jsonlite_1.8.8                MultiAssayExperiment_1.28.0  
#>   [3] magrittr_2.0.3                ggbeeswarm_0.7.2             
#>   [5] rmarkdown_2.26                fs_1.6.3                     
#>   [7] zlibbioc_1.48.2               ragg_1.3.0                   
#>   [9] vctrs_0.6.5                   memoise_2.0.1                
#>  [11] DelayedMatrixStats_1.24.0     RCurl_1.98-1.14              
#>  [13] htmltools_0.5.8.1             S4Arrays_1.2.1               
#>  [15] AnnotationHub_3.10.1          curl_5.2.1                   
#>  [17] BiocNeighbors_1.20.2          SparseArray_1.2.4            
#>  [19] sass_0.4.9                    bslib_0.7.0                  
#>  [21] htmlwidgets_1.6.4             desc_1.4.3                   
#>  [23] plyr_1.8.9                    DECIPHER_2.30.0              
#>  [25] cachem_1.0.8                  igraph_2.0.3                 
#>  [27] mime_0.12                     lifecycle_1.0.4              
#>  [29] pkgconfig_2.0.3               rsvd_1.0.5                   
#>  [31] Matrix_1.6-5                  R6_2.5.1                     
#>  [33] fastmap_1.1.1                 GenomeInfoDbData_1.2.11      
#>  [35] shiny_1.8.1.1                 digest_0.6.35                
#>  [37] colorspace_2.1-0              AnnotationDbi_1.64.1         
#>  [39] scater_1.30.1                 irlba_2.3.5.1                
#>  [41] ExperimentHub_2.10.0          crosstalk_1.2.1              
#>  [43] textshaping_0.3.7             RSQLite_2.3.6                
#>  [45] vegan_2.6-4                   beachmat_2.18.1              
#>  [47] filelock_1.0.3                fansi_1.0.6                  
#>  [49] mgcv_1.9-1                    httr_1.4.7                   
#>  [51] abind_1.4-5                   compiler_4.3.3               
#>  [53] withr_3.0.0                   bit64_4.0.5                  
#>  [55] BiocParallel_1.36.0           viridis_0.6.5                
#>  [57] DBI_1.2.2                     MASS_7.3-60.0.1              
#>  [59] rappdirs_0.3.3                DelayedArray_0.28.0          
#>  [61] bluster_1.12.0                permute_0.9-7                
#>  [63] tools_4.3.3                   vipor_0.4.7                  
#>  [65] beeswarm_0.4.0                ape_5.7-1                    
#>  [67] interactiveDisplayBase_1.40.0 httpuv_1.6.15                
#>  [69] glue_1.7.0                    nlme_3.1-164                 
#>  [71] promises_1.3.0                grid_4.3.3                   
#>  [73] mia_1.10.0                    reshape2_1.4.4               
#>  [75] cluster_2.1.6                 generics_0.1.3               
#>  [77] gtable_0.3.4                  tidyr_1.3.1                  
#>  [79] BiocSingular_1.18.0           ScaledMatrix_1.10.0          
#>  [81] utf8_1.2.4                    stringr_1.5.1                
#>  [83] ggrepel_0.9.5                 BiocVersion_3.18.1           
#>  [85] pillar_1.9.0                  limma_3.58.1                 
#>  [87] yulab.utils_0.1.4             later_1.3.2                  
#>  [89] splines_4.3.3                 BiocFileCache_2.10.2         
#>  [91] treeio_1.26.0                 lattice_0.22-6               
#>  [93] bit_4.0.5                     annotate_1.80.0              
#>  [95] tidyselect_1.2.1              DirichletMultinomial_1.44.0  
#>  [97] locfit_1.5-9.9                scuttle_1.12.0               
#>  [99] knitr_1.46                    gridExtra_2.3                
#> [101] edgeR_4.0.16                  xfun_0.43                    
#> [103] statmod_1.5.0                 KEGGgraph_1.62.0             
#> [105] stringi_1.8.3                 lazyeval_0.2.2               
#> [107] yaml_2.3.8                    evaluate_0.23                
#> [109] codetools_0.2-20              Rgraphviz_2.46.0             
#> [111] BiocManager_1.30.22           cli_3.6.2                    
#> [113] xtable_1.8-4                  systemfonts_1.0.6            
#> [115] munsell_0.5.1                 jquerylib_0.1.4              
#> [117] Rcpp_1.0.12                   dbplyr_2.5.0                 
#> [119] png_0.1-8                     XML_3.99-0.16.1              
#> [121] parallel_4.3.3                pkgdown_2.0.8                
#> [123] ggplot2_3.5.0                 blob_1.2.4                   
#> [125] sparseMatrixStats_1.14.0      bitops_1.0-7                 
#> [127] GSEABase_1.64.0               decontam_1.22.0              
#> [129] viridisLite_0.4.2             tidytree_0.4.6               
#> [131] scales_1.3.0                  crayon_1.5.2                 
#> [133] rlang_1.1.3                   KEGGREST_1.42.0