Analysis objectives

  1. Import, recode, and subset data from bugsigdb.org
  2. Create a table of studies
  3. Create a clustered heatmap showing similarity of signatures from independent studies
  4. Calculate the frequency of appearance of each taxa in independent signatures, and identify the most frequently reported taxa
  5. Estimate the probability of the most frequently identified taxa occuring so frequently by chance

Packages installation

Install packages (not evaluated in vignette)

install.packages(c("devtools", "tidyverse", "kableExtra", "gt", "glue"))
devtools::install_github("waldronlab/bugSigSimple")
devtools::install_github("waldronlab/BugSigDBStats")
devtools::install_github("waldronlab/bugsigdbr")

Data import, recoding, and subset

library(bugSigSimple)
dat <- bugsigdbr::importBugSigDB(cache = TRUE) 
## Using cached version from 2025-05-05 19:22:53
dim(dat)
## [1] 8163   50
names(dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
##  dplyr     1.1.4      readr     2.1.5
##  forcats   1.0.0      stringr   1.5.1
##  ggplot2   3.5.2      tibble    3.2.1
##  lubridate 1.9.4      tidyr     1.3.1
##  purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
##  dplyr::filter() masks stats::filter()
##  dplyr::lag()    masks stats::lag()
##  Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
 efo <- bugsigdbr::getOntology("efo")
## Loading required namespace: ontologyIndex
## Using cached version from 2025-05-05 19:17:56
covid_all <- bugsigdbr::subsetByOntology(dat, column = "Condition", "COVID-19", efo) %>%
  mutate(studyexp = paste(Study, Experiment, sep = "_")) %>%
  mutate(
    site = recode(`Body site`,
      "Feces" = "Gut",
      "Rectum" = "Gut",
      "Nasopharynx" = "aURT",
      "Oropharynx" = "aURT",
      "Nasopharynx,Oropharynx" = "aURT",
      "Nasal cavity" = "aURT",
      "Surface of tongue" = "aURT",
      "Nasopharynx,Throat" = "aURT",
      "Throat" = "aURT",
      "Tongue" = "aURT",
      "Lung" = "LRT",
      "Sputum" = "LRT"
    )
  ) %>%
  mutate(comparison1 = paste(`Group 0 name`, `Group 1 name`, sep = " vs "))

Table of studies

bugSigSimple::createStudyTable(covid_all)
## # A tibble: 47 × 9
##    `Study code` MaxCases MaxControls `Study design` Condition N_signatures PMID 
##    <chr>           <dbl>       <dbl> <chr>          <chr>            <int> <chr>
##  1 BellatoM_20…      156          36 cross-section… COVID-19…            3 3780…
##  2 BraunT_2021        26          29 cross-section… COVID-19             1 3390…
##  3 CaoJ_2021          13           8 case-control   COVID-19             6 3367…
##  4 ChenY_2021         30          30 time series /… COVID-19             2 3383…
##  5 DeMaioF_2020       18          22 cross-section… COVID-19             1 3272…
##  6 EngenPA_2021        9          10 cross-section… COVID-19             2 3405…
##  7 Feehan_2021        79          81 cross-section… COVID-19             3 NA   
##  8 GaibaniP_20…       24          24 case-control   COVID-19             2 3398…
##  9 GalperineT_…       57          43 time series /… COVID-19             4 3759…
## 10 GaoM_2021          94          48 prospective c… COVID-19             2 3442…
## # ℹ 37 more rows
## # ℹ 2 more variables: DOI <chr>, URL <chr>

Taxon frequency tables by body site

library(dplyr)
gut_sigs <- filter(covid_all, 
                           site == "Gut") %>%
    drop_na(Source)

naso_sigs <- filter(covid_all, 
                           site == "aURT") %>%
    drop_na(Source)
  
resp_sigs <- filter(covid_all, 
                           site == "LRT") %>%
  drop_na(Source)

In this table, the Binomial Test p-value corresponds to the null hypothesis

H0: the proportion of signatures in which the taxon is reported increased or decreased, relative to the total number of signatures in which it is reported, is equal to 0.5

kableExtra::kbl(bugSigSimple::createTaxonTable(gut_sigs))
## Warning: Expected 7 pieces. Additional pieces discarded in 1 rows [4].
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Clostridium genus 15 6 9 0.61000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium
Faecalibacterium genus 15 1 14 0.00098 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium
Anaerostipes genus 15 4 11 0.12000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerostipes 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerostipes
Faecalibacterium prausnitzii species 13 0 13 0.00024 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium|s__Faecalibacterium prausnitzii
Ruminococcus genus 13 3 10 0.09200 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Streptococcus genus 12 10 2 0.03900 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Lachnospiraceae family 12 4 8 0.39000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae NA 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae
Coprococcus genus 12 0 12 0.00049 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Coprococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Coprococcus
Dorea genus 12 4 8 0.39000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Dorea 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Dorea
Roseburia genus 12 0 12 0.00049 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Roseburia 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Roseburia
kableExtra::kbl(bugSigSimple::createTaxonTable(naso_sigs))
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Veillonella genus 15 12 3 0.035 Bacteria Bacillati Bacillota Negativicutes Veillonellales Veillonellaceae Veillonella 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Veillonella
Prevotella genus 12 6 6 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Prevotella 0 d__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella
Haemophilus genus 11 2 9 0.065 Bacteria Pseudomonadati Pseudomonadota Gammaproteobacteria Pasteurellales Pasteurellaceae Haemophilus 0 d__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Gammaproteobacteria|o__Pasteurellales|f__Pasteurellaceae|g__Haemophilus
Streptococcus genus 10 4 6 0.750 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Oribacterium genus 10 2 8 0.110 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Oribacterium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Oribacterium
Rothia genus 9 3 6 0.510 Bacteria Bacillati Actinomycetota Actinomycetes Micrococcales Micrococcaceae Rothia 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Micrococcales|f__Micrococcaceae|g__Rothia
Neisseria genus 9 1 8 0.039 Bacteria Pseudomonadati Pseudomonadota Betaproteobacteria Neisseriales Neisseriaceae Neisseria 0 d__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria
Filifactor genus 8 1 7 0.070 Bacteria Bacillati Bacillota Clostridia Peptostreptococcales Filifactoraceae Filifactor 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Peptostreptococcales|f__Filifactoraceae|g__Filifactor
Alloprevotella genus 8 4 4 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Alloprevotella 0 d__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Alloprevotella
Bifidobacterium genus 7 0 7 0.016 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium
kableExtra::kbl(bugSigSimple::createTaxonTable(resp_sigs))
## Warning: Expected 7 pieces. Additional pieces discarded in 4 rows [1, 3,
## 9, 10].
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Rothia mucilaginosa species 2 1 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Micrococcales Micrococcaceae Rothia 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Micrococcales|f__Micrococcaceae|g__Rothia|s__Rothia mucilaginosa
Streptococcus genus 2 1 1 1.0 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Lachnoclostridium phytofermentans species 2 1 1 1.0 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Lachnoclostridium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Lachnoclostridium|s__Lachnoclostridium phytofermentans
Porphyromonas genus 2 0 2 0.5 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Porphyromonadaceae Porphyromonas 0 d__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Porphyromonadaceae|g__Porphyromonas
Tannerella genus 2 0 2 0.5 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Tannerellaceae Tannerella 0 d__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Tannerellaceae|g__Tannerella
Capnocytophaga genus 2 1 1 1.0 Bacteria Pseudomonadati Bacteroidota Flavobacteriia Flavobacteriales Flavobacteriaceae Capnocytophaga 0 d__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Flavobacteriia|o__Flavobacteriales|f__Flavobacteriaceae|g__Capnocytophaga
Neisseria genus 2 0 2 0.5 Bacteria Pseudomonadati Pseudomonadota Betaproteobacteria Neisseriales Neisseriaceae Neisseria 0 d__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria
Actinomyces genus 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Actinomyces 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces
Actinomyces pacaensis species 1 1 0 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Actinomyces 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces pacaensis
Schaalia meyeri species 1 1 0 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Schaalia 0 d__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Schaalia|s__Schaalia meyeri

gut microbiota analysis

Look specifically at case-control comparisons

healthy <- grepl(".*(healthy.*COVID|COVID.*healthy).*", gut_sigs$comparison1, ignore.case = TRUE)

cc_gut_sigs1 <- gut_sigs %>%
      filter(healthy == TRUE) 

Overall frequencies of taxa increased in cases for case/control feces studies

Identifying a taxon reported consistently in 8 out of 8 studies is much more compelling that the FDR value here would suggest, since this taxon also passed a significance threshold in every one of those studies.

cc_gut_sigs1_taxontable <- bugSigSimple::createTaxonTable(cc_gut_sigs1) %>% 
  mutate(FDR =  p.adjust(p = `Binomial Test pval`, method="fdr")) %>%
  relocate(FDR, .after = `Binomial Test pval`)
## Warning: Expected 7 pieces. Additional pieces discarded in 1 rows [10].
kableExtra::kbl(cc_gut_sigs1_taxontable)
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval FDR kingdom phylum class order family genus species n_signatures metaphlan_name
Faecalibacterium genus 9 0 9 0.0039 0.0156000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium
Anaerostipes genus 9 1 8 0.0390 0.0557143 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerostipes 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerostipes
Roseburia genus 9 0 9 0.0039 0.0156000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Roseburia 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Roseburia
Enterococcus genus 8 8 0 0.0078 0.0156000 Bacteria Bacillati Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Enterococcaceae|g__Enterococcus
Lactobacillaceae family 8 5 3 0.7300 0.7300000 Bacteria Bacillati Bacillota Bacilli Lactobacillales Lactobacillaceae NA 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae
Streptococcus genus 8 8 0 0.0078 0.0156000 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Clostridium genus 8 3 5 0.7300 0.7300000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium
Coprococcus genus 8 0 8 0.0078 0.0156000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Coprococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Coprococcus
Ruminococcus genus 7 1 6 0.1300 0.1625000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Blautia obeum species 7 0 7 0.0160 0.0266667 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Blautia 0 d__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Blautia|s__Blautia obeum

Monte-Carlo simulation for increased abundance taxa

Just for the increased cc_gut_sigs1 for now. I am inclined to skip this analysis in favor of the clustering and binomial test analysis.

library(bugSigSimple)
gut.sigs.increased <- filter(cc_gut_sigs1, `Abundance in Group 1` == "increased") %>% 
  bugsigdbr::getSignatures(tax.id.type = "taxname")
 my.siglengths.inc <- sapply(gut.sigs.increased, length)

getCriticalN(gut.sigs.increased, my.siglengths.inc)
## 95% 
##  10
# Compare to observed - enterococcus & streptococcus are the only taxa that equal the critical limit
frequencySigs(gut.sigs.increased)
##            Enterococcus           Streptococcus      Enterobacteriaceae 
##                       8                       8                       5 
## Enterocloster citroniae         Enterococcaceae        Lactobacillaceae 
##                       5                       5                       5 
##           Lactobacillus                  Rothia          Actinomycetota 
##                       5                       5                       4 
##               Atopobium 
##                       4

Overall frequencies of taxa decreased in cases for case/control feces studies

createTaxonTable(cc_gut_sigs1, n=40)

nasopharyngeal microbiota analysis

Look specifically at case-control comparisons

library(dplyr)
healthy <- grepl(".*(control.*COVID|COVID.*control).*", naso_sigs$comparison1, ignore.case = TRUE)

cc_naso_sigs1 <- naso_sigs %>%
      filter(healthy == TRUE) %>%
  subset(Study != "Study 458")

Cluster analysis

Note, this EDA should really be done before hypothesis testing.

First calculate pairwise overlaps for all signatures of length > 1:

allsigs <- bugsigdbr::getSignatures(covid_all, tax.id.type = "taxname")
allsigs <- allsigs[sapply(allsigs, length) > 1] #require length > 1
dim(allsigs)
## NULL
mydists <- BugSigDBStats::calcPairwiseOverlaps(allsigs)
dim(mydists)
## [1] 3020    8

Create a matrix of Jaccard similarities (0 for no overlap, 1 for 100% overlap)

jmat <- BugSigDBStats::calcJaccardSimilarity(allsigs)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.24.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
siglengths <- sapply(allsigs, length)
ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(`Signature Length` = anno_barplot(siglengths))
hm <- Heatmap(
  jmat,
  top_annotation = ha, left_annotation = hr,
  row_names_max_width = unit(20, "cm"),
  column_names_max_height = unit(20, "cm"),
  row_labels = sub(".+:", "", rownames(jmat)),
  column_labels = sub(".+:", "", colnames(jmat))
)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
hm

Use this interactively to make an interactive heatmap (not evaluated in vignette). Some expanding of the default size is required to see anything. Creating a sub-heatmap, then exporting it as a table, allows in-depth identification of the subgroups.

hc <- hclust(as.dist(jmat))
plot(hc)

This tree can be cut to show the clusters, for example. The clusters of more than 1 signature but less than ~10 signatures are most likely to be something interesting.

clusts <- sort(cutree(hc, h = 0.05))
lapply(unique(clusts), function(i) names(clusts)[clusts == i])
## [[1]]
##  [1] "bsdb:513/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                                                             
##  [2] "bsdb:32442562/1/1_COVID-19:Antibiotic-naive-COVID-19-patients_vs_Uninfected-controls,-pneumonia-patients,-antibiotics-treated-COVID-19-patients_UP"            
##  [3] "bsdb:32442562/2/1_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients,-pneumonia-patients,-uninfected-controls_DOWN"           
##  [4] "bsdb:32497191/3/2_COVID-19:COVID-19-patients_vs_H1N1-patients_DOWN"                                                                                            
##  [5] "bsdb:32497191/4/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                                              
##  [6] "bsdb:32690600/1/1_COVID-19:Patients-with-high-SARS-CoV-2-infectivity_vs_Patients-with-low-to-none-SARS-CoV-2-infectivity_DOWN"                                 
##  [7] "bsdb:32690600/1/2_COVID-19:Patients-with-high-SARS-CoV-2-infectivity_vs_Patients-with-low-to-none-SARS-CoV-2-infectivity_UP"                                   
##  [8] "bsdb:33431578/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                                              
##  [9] "bsdb:33431578/2/1_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_UP"                                                                                    
## [10] "bsdb:33577896/1/2_COVID-19:Symptomatic,-mild-to-moderate-COVID-19-patients_vs_Asymptomatic-uninfected-controls_DOWN"                                           
## [11] "bsdb:33596245/2/1_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_UP"
## [12] "bsdb:33678150/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                                                         
## [13] "bsdb:33789966/1/2_COVID-19:Confirmed-COVID-19-patients_vs_Healthy-controls_UP"                                                                                 
## [14] "bsdb:33977168/1/2_COVID-19:COVID-19-patients_vs_Non-COVID-patients-with-URTI-or-COPD_DOWN"                                                                     
## [15] "bsdb:33978940/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-patients-with-fever-and-cough_UP"                                                   
## [16] "bsdb:33980943/1/1_COVID-19:COVID-19-patients_vs_COVID-19-negative-patients-with-pneumonia_UP"                                                                  
## [17] "bsdb:33986253/3/2_COVID-19:Flu-patients_vs_Healthy-controls_DOWN"                                                                                              
## [18] "bsdb:34031657/3/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-healthcare-workers-(controls)_UP"                                                   
## [19] "bsdb:34058978/1/2_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                                                        
## [20] "bsdb:34100340/3/1_COVID-19:Recovered-COVID-19-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_UP"                              
## [21] "bsdb:34100340/4/1_COVID-19:Positive-for-COVID-19-viral-RNA-in-feces_vs_Negative-for-COVID-19-viral-RNA-in-feces_UP"                                            
## [22] "bsdb:34151035/1/1_COVID-19:COVID-19-patients_vs_Non-COVID-19-controls_DOWN"                                                                                    
## [23] "bsdb:34168484/1/1_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_UP"                                                                 
## [24] "bsdb:34294722/3/2_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_DOWN"                                                            
## [25] "bsdb:35411293/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                                            
## [26] "bsdb:36869345/3/1_COVID-19:Feces-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"                                                           
## [27] "bsdb:36894986/1/1_COVID-19:COVID-19-infection-cases._vs_Control-group_UP"                                                                                      
## [28] "bsdb:36894986/1/2_COVID-19:COVID-19-infection-cases._vs_Control-group_DOWN"                                                                                    
## [29] "bsdb:37119437/1/1_COVID-19:Healthy-Controls-Group_vs_Covid-19-and-Post-Covid-19-Groups_UP"                                                                     
## [30] "bsdb:37119437/3/1_COVID-19:Post-Covid-19-Group_vs_Healthy-Controls-and-Covid-19-Groups_UP"                                                                     
## [31] "bsdb:37119437/9/1_COVID-19:Post-Covid-19-Group_vs_Healthy-Controls_UP"                                                                                         
## [32] "bsdb:37596518/1/1_COVID-19:COVID-19-Patients_vs_Non-Covid-19-Patients---pneumonia-patients_UP"                                                                 
## 
## [[2]]
##  [1] "bsdb:513/2/1_COVID-19:COVID-19-positive-patients-with-breathing-assistance_vs_COVID-19-positive-patients-without-breathing-assistance_UP"
##  [2] "bsdb:32442562/3/1_COVID-19:Pneumonia-controls_vs_Uninfected-controls,-COVID-19-patients_DOWN"                                            
##  [3] "bsdb:32497191/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                      
##  [4] "bsdb:33431578/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                      
##  [5] "bsdb:33431578/3/1_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_UP"                                                              
##  [6] "bsdb:33577896/1/1_COVID-19:Symptomatic,-mild-to-moderate-COVID-19-patients_vs_Asymptomatic-uninfected-controls_UP"                       
##  [7] "bsdb:33659220/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_DOWN"                                                
##  [8] "bsdb:33672177/2/1_COVID-19:Mild-moderate-COVID-19-patients_vs_Healthy-controls_DOWN"                                                     
##  [9] "bsdb:33850111/1/2_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_UP"                                                        
## [10] "bsdb:34031657/2/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                                  
## [11] "bsdb:34100340/3/2_COVID-19:Recovered-COVID-19-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_DOWN"      
## [12] "bsdb:34200249/1/1_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Infected-COVID-19-samples-(respiratory-positive)_UP"     
## [13] "bsdb:34294722/2/1_COVID-19:COVID-19-patients-(severe/critical)_vs_COVID-19-patients-(mild/moderate)_DOWN"                                
## [14] "bsdb:37119437/2/1_COVID-19:Covid-19-Group_vs_Healthy-Controls-and-Post-Covid-19-Groups_UP"                                               
## [15] "bsdb:37119437/4/2_COVID-19:Covid-19-N-group_vs_Control-group_DOWN"                                                                       
## [16] "bsdb:37119437/8/1_COVID-19:Covid-19-Group_vs_Healthy-Controls_UP"                                                                        
## [17] "bsdb:37803040/3/1_COVID-19,Respiratory-failure-requiring-assisted-ventilation:Intubated_vs_Non-intubated_UP"                             
## 
## [[3]]
##  [1] "bsdb:513/3/1_COVID-19:COVID-19-positive-patients-with-antibiotic-use_vs_COVID-19-positive-patients-without-antibiotic-use_UP"
##  [2] "bsdb:32497191/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                            
##  [3] "bsdb:33431578/2/2_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_DOWN"                                                
##  [4] "bsdb:33431578/4/2_COVID-19:COVID-19-patients-treated-with-antibiotics_vs_Healthy-controls_UP"                                
##  [5] "bsdb:33577896/2/2_COVID-19:COVID-19-patients-with-high-viral-load_vs_COVID-19-patients-with-low-viral-load_DOWN"             
##  [6] "bsdb:33672177/2/2_COVID-19:Mild-moderate-COVID-19-patients_vs_Healthy-controls_UP"                                           
##  [7] "bsdb:33789966/2/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                               
##  [8] "bsdb:33789966/3/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                       
##  [9] "bsdb:33815323/1/1_COVID-19:Severe-COVID-19-patients_vs_COVID-19-negative-controls_UP"                                        
## [10] "bsdb:33978940/2/1_COVID-19:COVID-19-positive-patients_vs_Healthy-controls_UP"                                                
## [11] "bsdb:34100340/4/2_COVID-19:Positive-for-COVID-19-viral-RNA-in-feces_vs_Negative-for-COVID-19-viral-RNA-in-feces_DOWN"        
## [12] "bsdb:34382150/1/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                               
## [13] "bsdb:37119437/4/1_COVID-19:Covid-19-N-group_vs_Control-group_UP"                                                             
## [14] "bsdb:37119437/5/2_COVID-19:Covid-19-C-group_vs_Control-group_DOWN"                                                           
## [15] "bsdb:37119437/10/2_COVID-19:Post-Covid-19-Group_vs_Covid-19-Group_DOWN"                                                      
## 
## [[4]]
## [1] "bsdb:32497191/3/1_COVID-19:COVID-19-patients_vs_H1N1-patients_UP"                                                                                       
## [2] "bsdb:33431578/4/1_COVID-19:COVID-19-patients-treated-with-antibiotics_vs_Healthy-controls_DOWN"                                                         
## [3] "bsdb:33596245/1/1_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_Pneumonia-control-patients-(COVID-negative)_UP"                      
## [4] "bsdb:33977168/1/1_COVID-19:COVID-19-patients_vs_Non-COVID-patients-with-URTI-or-COPD_UP"                                                                
## [5] "bsdb:33986253/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                                    
## [6] "bsdb:34103263/1/1_COVID-19:COVID-19-patients_vs_Non-intubated-patients-with-non-incubation-viral-pneumonia-or-non-incubation-non-infectious-diseases_UP"
## [7] "bsdb:37119437/5/1_COVID-19:Covid-19-C-group_vs_Control-group_UP"                                                                                        
## [8] "bsdb:37119437/8/2_COVID-19:Covid-19-Group_vs_Healthy-Controls_DOWN"                                                                                     
## 
## [[5]]
##  [1] "bsdb:33431578/3/2_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_DOWN"                                                                 
##  [2] "bsdb:33596245/3/1_COVID-19:COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_vs_Pneumonia-controls-(COVID-negative)_UP"
##  [3] "bsdb:33672177/3/1_COVID-19:ICU-COVID-19-patients_vs_Mild-Moderate-COVID-19-patients_DOWN"                                                     
##  [4] "bsdb:33789966/2/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"                                                              
##  [5] "bsdb:33850111/1/1_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_DOWN"                                                           
##  [6] "bsdb:33986253/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                                        
##  [7] "bsdb:34168484/2/2_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_DOWN"                                              
##  [8] "bsdb:36744910/1/1_COVID-19:Moderate-severity_vs_Mild-severity_UP"                                                                             
##  [9] "bsdb:37119437/7/1_COVID-19:Covid-19-Group_vs_Healthy-controls_UP"                                                                             
## [10] "bsdb:37119437/7/2_COVID-19:Covid-19-Group_vs_Healthy-controls_DOWN"                                                                           
## 
## [[6]]
## [1] "bsdb:33577896/2/1_COVID-19:COVID-19-patients-with-high-viral-load_vs_COVID-19-patients-with-low-viral-load_UP"                                  
## [2] "bsdb:33596245/3/2_COVID-19:COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_vs_Pneumonia-controls-(COVID-negative)_DOWN"
## [3] "bsdb:33678150/3/1_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_UP"                                                               
## [4] "bsdb:33986253/2/1_COVID-19:COVID-19-patients_vs_Flu-patients_DOWN"                                                                              
## [5] "bsdb:33986253/4/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                            
## [6] "bsdb:34055851/1/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"                                                                
## [7] "bsdb:34100340/1/1_COVID-19:SARS-CoV-2-infected-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_UP"              
## [8] "bsdb:34423593/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                               
## [9] "bsdb:37119437/9/2_COVID-19:Post-Covid-19-Group_vs_Healthy-Controls_DOWN"                                                                        
## 
## [[7]]
##  [1] "bsdb:33596245/1/2_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_Pneumonia-control-patients-(COVID-negative)_DOWN"
##  [2] "bsdb:33672177/1/1_COVID-19:ICU-COVID-19-patients_vs_Healthy-controls_DOWN"                                                          
##  [3] "bsdb:33678150/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                
##  [4] "bsdb:33978940/4/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-patients-with-fever-and-cough_UP"                        
##  [5] "bsdb:33986253/3/1_COVID-19:Flu-patients_vs_Healthy-controls_UP"                                                                     
##  [6] "bsdb:34055851/1/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                                      
##  [7] "bsdb:34199203/3/1_COVID-19:Severe-COVID-19-patients_vs_Healthy-controls_UP"                                                         
##  [8] "bsdb:34294722/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                              
##  [9] "bsdb:36869345/2/1_COVID-19:Sputum-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"                               
## [10] "bsdb:37119437/10/1_COVID-19:Post-Covid-19-Group_vs_Covid-19-Group_UP"                                                               
## 
## [[8]]
## [1] "bsdb:33596245/2/2_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_DOWN"
## [2] "bsdb:33986253/5/1_COVID-19:Flu-patients_vs_Healthy-controls_DOWN"                                                                                                
## [3] "bsdb:33986253/5/2_COVID-19:Flu-patients_vs_Healthy-controls_UP"                                                                                                  
## [4] "bsdb:34423593/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                                              
## [5] "bsdb:36744910/2/1_COVID-19:Severe/Critical-severity_vs_Mild-severity_UP"                                                                                         
## [6] "bsdb:37596518/2/1_COVID-19:COVID-19-patients---Ventilated_vs_COVID-19-patients---Non-Ventilated_UP"                                                              
## 
## [[9]]
## [1] "bsdb:33672177/1/2_COVID-19:ICU-COVID-19-patients_vs_Healthy-controls_UP"                                   
## [2] "bsdb:33678150/2/1_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients_UP"  
## [3] "bsdb:33678150/2/2_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients_DOWN"
## [4] "bsdb:33789966/3/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                       
## [5] "bsdb:34294722/4/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                     
## [6] "bsdb:36869345/2/2_COVID-19:Sputum-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"    
## [7] "bsdb:37158877/1/2_COVID-19:RPs-(Recovered-Patients)_vs_HCs-(Healthy-Controls)_DOWN"                        
## 
## [[10]]
## [1] "bsdb:33672177/3/2_COVID-19:ICU-COVID-19-patients_vs_Mild-Moderate-COVID-19-patients_UP"          
## [2] "bsdb:33789966/1/1_COVID-19:Confirmed-COVID-19-patients_vs_Healthy-controls_DOWN"                 
## [3] "bsdb:34168484/1/2_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_DOWN" 
## [4] "bsdb:34294722/3/1_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_UP"
## [5] "bsdb:34382150/1/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"                 
## 
## [[11]]
## [1] "bsdb:33672177/4/1_COVID-19:ICU-COVID-19-patients_vs_Patients-infected-with-other-human-coronaviruses_DOWN"
## [2] "bsdb:33672177/4/2_COVID-19:ICU-COVID-19-patients_vs_Patients-infected-with-other-human-coronaviruses_UP"  
## [3] "bsdb:33980943/1/2_COVID-19:COVID-19-patients_vs_COVID-19-negative-patients-with-pneumonia_DOWN"           
## [4] "bsdb:36350127/3/2_COVID-19:Covid-19-patients_vs_non-Covid-19-patients_DOWN"                               
## 
## [[12]]
## [1] "bsdb:33986253/4/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                             
## [2] "bsdb:34100340/2/2_COVID-19:SARS-CoV-2-recovered-individuals_vs_SARS-CoV-2-infected-patients_UP"                    
## [3] "bsdb:34168484/2/1_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_UP"                     
## [4] "bsdb:36744910/1/2_COVID-19:Moderate-severity_vs_Mild-severity_DOWN"                                                
## [5] "bsdb:38289047/2/2_COVID-19:COVID-19-patients_vs_NO-COVID-19-(Patients-with-diagnosis-different-from-COVID-19)_DOWN"
## 
## [[13]]
## [1] "bsdb:34031657/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls-and-healthcare-workers_UP"                                            
## [2] "bsdb:34100340/2/1_COVID-19:SARS-CoV-2-recovered-individuals_vs_SARS-CoV-2-infected-patients_DOWN"                                                         
## [3] "bsdb:34103263/1/2_COVID-19:COVID-19-patients_vs_Non-intubated-patients-with-non-incubation-viral-pneumonia-or-non-incubation-non-infectious-diseases_DOWN"
## [4] "bsdb:34199203/1/1_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_UP"                                                                         
## [5] "bsdb:34200249/2/1_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Healthy-controls_DOWN"                                                    
## [6] "bsdb:36744910/3/1_COVID-19:Severe/Critical-severity_vs_Moderate-severity_UP"                                                                              
## 
## [[14]]
## [1] "bsdb:34058978/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_DOWN"             
## [2] "bsdb:34173452/2/2_COVID-19:COVID-19-patients_vs_Seasonal-flu-patients_DOWN"                           
## [3] "bsdb:34199203/2/1_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_UP"                     
## [4] "bsdb:35411293/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                     
## [5] "bsdb:36869345/3/2_COVID-19:Feces-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"
## 
## [[15]]
## [1] "bsdb:34100340/1/2_COVID-19:SARS-CoV-2-infected-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_DOWN"
## [2] "bsdb:36350127/3/1_COVID-19:Covid-19-patients_vs_non-Covid-19-patients_UP"                                                           
## [3] "bsdb:37438797/1/2_COVID-19:Severe-COVID-19_vs_Moderate-COVID-19_DOWN"                                                               
## [4] "bsdb:37596518/1/2_COVID-19:COVID-19-Patients_vs_Non-Covid-19-Patients---pneumonia-patients_DOWN"                                    
## 
## [[16]]
## [1] "bsdb:34173452/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"               
## [2] "bsdb:34199203/1/2_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_DOWN"
## [3] "bsdb:34425246/1/2_COVID-19:COVID-19-cases_vs_Non-infected-controls_DOWN"           
## [4] "bsdb:36744910/2/2_COVID-19:Severe/Critical-severity_vs_Mild-severity_DOWN"         
## 
## [[17]]
## [1] "bsdb:34173452/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                              
## [2] "bsdb:34200249/2/2_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Healthy-controls_UP"
## [3] "bsdb:34294722/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                
## 
## [[18]]
## [1] "bsdb:34173452/2/1_COVID-19:COVID-19-patients_vs_Seasonal-flu-patients_UP"                          
## [2] "bsdb:34294722/5/2_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_DOWN"
## 
## [[19]]
## [1] "bsdb:34199203/2/2_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_DOWN"
## [2] "bsdb:34294722/4/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"               
## 
## [[20]]
## [1] "bsdb:34199203/3/2_COVID-19:Severe-COVID-19-patients_vs_Healthy-controls_DOWN"                            
## [2] "bsdb:36869345/1/2_COVID-19:Pharyngeal-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"
## 
## [[21]]
## [1] "bsdb:34200249/3/1_COVID-19:Infected-COVID-19-samples-(respiratory-positive)_vs_Healthy-controls_DOWN"      
## [2] "bsdb:34200249/3/2_COVID-19:Infected-COVID-19-samples-(respiratory-positive)_vs_Healthy-controls_UP"        
## [3] "bsdb:36350127/6/1_COVID-19:Hospitalized-patients_vs_local-controls_UP"                                     
## [4] "bsdb:36744910/3/2_COVID-19:Severe/Critical-severity_vs_Moderate-severity_DOWN"                             
## [5] "bsdb:36869345/1/1_COVID-19:Pharyngeal-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"
## [6] "bsdb:37596518/2/2_COVID-19:COVID-19-patients---Ventilated_vs_COVID-19-patients---Non-Ventilated_DOWN"      
## 
## [[22]]
## [1] "bsdb:34294722/5/1_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_UP"
## [2] "bsdb:36350127/1/2_COVID-19:COVID-19-patients_vs_local-controls_DOWN"                             
## [3] "bsdb:37158877/1/1_COVID-19:RPs-(Recovered-Patients)_vs_HCs-(Healthy-Controls)_UP"                
## 
## [[23]]
## [1] "bsdb:36350127/1/1_COVID-19:COVID-19-patients_vs_local-controls_UP"      
## [2] "bsdb:36350127/6/2_COVID-19:Hospitalized-patients_vs_local-controls_DOWN"
## 
## [[24]]
## [1] "bsdb:38289047/1/1_COVID-19:COVID-19-patients_vs_healthy-controls_UP"  
## [2] "bsdb:38289047/1/2_COVID-19:COVID-19-patients_vs_healthy-controls_DOWN"

Create a wide-format dataframe

This would be suitable for regression analysis.

covid_withsigs <- filter(covid_all, !is.na(covid_all$`NCBI Taxonomy IDs`))
sigs <- bugsigdbr::getSignatures(covid_withsigs, tax.id.type = "taxname")
cmat <- t(safe::getCmatrix(sigs, as.matrix = TRUE, min.size = 0, prune = FALSE))
## WARNING: rows are sorted elements of keyword.list
## 184 categories formed
cdf <- data.frame(cmat, stringsAsFactors = FALSE, check.names = FALSE)
cdf <- cbind(covid_withsigs, cdf)
colnames(cdf)[1:54]
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"                  
## [51] "studyexp"                   "site"                      
## [53] "comparison1"                "[Clostridium] colinum"

Note this has a number of columns that are mostly zeros, it could be filtered significantly for any regression or machine learning analysis:

table(cdf[["[Brevibacterium] frigoritolerans"]])
## < table of extent 0 >

Create another heatmap on correlations of presence/absence of taxa. This is not necessary because the previous Jaccard Index heatmap is probably better, it is just a demonstration of doing something with the taxa presence/absence directly.

sigcors <- cor(t(cmat))
siglengths <- sapply(sigs, length)
ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(`Signature Length` = anno_barplot(siglengths))
hm <- Heatmap(
  sigcors,
  top_annotation = ha, left_annotation = hr,
  row_names_max_width = unit(20, "cm"),
  column_names_max_height = unit(20, "cm"),
  row_labels = sub(".+:", "", rownames(sigcors)), ##removing study just to make signature names legible
  column_labels = sub(".+:", "", colnames(sigcors))
)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
hm

Use this interactively to make an interactive heatmap: