Analysis objectives

  1. Import, recode, and subset data from bugsigdb.org
  2. Create a table of studies
  3. Create a clustered heatmap showing similarity of signatures from independent studies
  4. Calculate the frequency of appearance of each taxa in independent signatures, and identify the most frequently reported taxa
  5. Estimate the probability of the most frequently identified taxa occuring so frequently by chance

Packages installation

Install packages (not evaluated in vignette)

install.packages(c("devtools", "tidyverse", "kableExtra", "gt", "glue"))
devtools::install_github("waldronlab/bugSigSimple")
devtools::install_github("waldronlab/BugSigDBStats")
devtools::install_github("waldronlab/bugsigdbr")

Data import, recoding, and subset

library(bugSigSimple)
dat <- bugsigdbr::importBugSigDB(cache = TRUE) 
## Using cached version from 2025-01-30 21:57:36
dim(dat)
## [1] 5520   50
names(dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
##  dplyr     1.1.4      readr     2.1.5
##  forcats   1.0.0      stringr   1.5.1
##  ggplot2   3.5.1      tibble    3.2.1
##  lubridate 1.9.4      tidyr     1.3.1
##  purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
##  dplyr::filter() masks stats::filter()
##  dplyr::lag()    masks stats::lag()
##  Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
 efo <- bugsigdbr::getOntology("efo")
## Loading required namespace: ontologyIndex
## Using cached version from 2025-01-30 21:55:38
covid_all <- bugsigdbr::subsetByOntology(dat, column = "Condition", "COVID-19", efo) %>%
  mutate(studyexp = paste(Study, Experiment, sep = "_")) %>%
  mutate(
    site = recode(`Body site`,
      "Feces" = "Gut",
      "Rectum" = "Gut",
      "Nasopharynx" = "aURT",
      "Oropharynx" = "aURT",
      "Nasopharynx,Oropharynx" = "aURT",
      "Nasal cavity" = "aURT",
      "Surface of tongue" = "aURT",
      "Nasopharynx,Throat" = "aURT",
      "Throat" = "aURT",
      "Tongue" = "aURT",
      "Lung" = "LRT",
      "Sputum" = "LRT"
    )
  ) %>%
  mutate(comparison1 = paste(`Group 0 name`, `Group 1 name`, sep = " vs "))

Table of studies

bugSigSimple::createStudyTable(covid_all)
## # A tibble: 44 × 5
##    Study          Condition Cases Controls `Study Design`                       
##    <chr>          <chr>     <dbl>    <dbl> <chr>                                
##  1 Bellato 2023   COVID-19    156       36 cross-sectional observational, not c…
##  2 Braun 2021     COVID-19     26       29 cross-sectional observational, not c…
##  3 Cao 2021       COVID-19     13        8 case-control                         
##  4 Chen 2021      COVID-19     30       30 time series / longitudinal observati…
##  5 De 2020        COVID-19     18       22 cross-sectional observational, not c…
##  6 Engen 2021     COVID-19      9       10 cross-sectional observational, not c…
##  7 Gaibani 2021   COVID-19     24       24 case-control                         
##  8 Galperine 2023 COVID-19     57       43 time series / longitudinal observati…
##  9 Gao 2021       COVID-19     94       48 prospective cohort                   
## 10 Gu 2020        COVID-19     30       30 cross-sectional observational, not c…
## # ℹ 34 more rows

Taxon frequency tables by body site

library(dplyr)
gut_sigs <- filter(covid_all, 
                           site == "Gut") %>%
    drop_na(Source)

naso_sigs <- filter(covid_all, 
                           site == "aURT") %>%
    drop_na(Source)
  
resp_sigs <- filter(covid_all, 
                           site == "LRT") %>%
  drop_na(Source)

In this table, the Binomial Test p-value corresponds to the null hypothesis

H0: the proportion of signatures in which the taxon is reported increased or decreased, relative to the total number of signatures in which it is reported, is equal to 0.5

kableExtra::kbl(bugSigSimple::createTaxonTable(gut_sigs))
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Faecalibacterium genus 15 1 14 0.00098 Bacteria Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium NA 21 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium
Anaerostipes genus 15 4 11 0.12000 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerostipes NA 17 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerostipes
Streptococcus genus 12 10 2 0.03900 Bacteria Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus NA 18 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Clostridium genus 12 6 6 1.00000 Bacteria Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium NA 18 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium
Roseburia genus 12 0 12 0.00049 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Roseburia NA 19 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Roseburia
Enterococcus genus 11 11 0 0.00098 Bacteria Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus NA 18 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Enterococcaceae|g__Enterococcus
Ruminococcus genus 10 3 7 0.34000 Bacteria Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus NA 20 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Lactobacillaceae family 9 5 4 1.00000 Bacteria Bacillota Bacilli Lactobacillales Lactobacillaceae NA NA 19 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae
Faecalibacterium prausnitzii species 9 0 9 0.00390 Bacteria Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium Faecalibacterium prausnitzii 9 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium|s__Faecalibacterium prausnitzii
Anaerobutyricum hallii species 9 3 6 0.51000 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerobutyricum Anaerobutyricum hallii 9 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerobutyricum|s__Anaerobutyricum hallii
kableExtra::kbl(bugSigSimple::createTaxonTable(naso_sigs))
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Veillonella genus 15 12 3 0.035 Bacteria Bacillota Negativicutes Veillonellales Veillonellaceae Veillonella NA 17 k__Bacteria|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Veillonella
Prevotella genus 12 6 6 1.000 Bacteria Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Prevotella NA 22 k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella
Haemophilus genus 11 2 9 0.065 Bacteria Pseudomonadota Gammaproteobacteria Pasteurellales Pasteurellaceae Haemophilus NA 13 k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Pasteurellales|f__Pasteurellaceae|g__Haemophilus
Streptococcus genus 10 4 6 0.750 Bacteria Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus NA 15 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Oribacterium genus 10 2 8 0.110 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Oribacterium NA 10 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Oribacterium
Rothia genus 9 3 6 0.510 Bacteria Actinomycetota Actinomycetes Micrococcales Micrococcaceae Rothia NA 12 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Micrococcales|f__Micrococcaceae|g__Rothia
Neisseria genus 9 1 8 0.039 Bacteria Pseudomonadota Betaproteobacteria Neisseriales Neisseriaceae Neisseria NA 13 k__Bacteria|p__Pseudomonadota|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria
Filifactor genus 8 1 7 0.070 Bacteria Bacillota Clostridia Peptostreptococcales Filifactoraceae Filifactor NA 8 k__Bacteria|p__Bacillota|c__Clostridia|o__Peptostreptococcales|f__Filifactoraceae|g__Filifactor
Alloprevotella genus 8 4 4 1.000 Bacteria Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Alloprevotella NA 8 k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Alloprevotella
Bifidobacterium genus 7 0 7 0.016 Bacteria Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium NA 7 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium
kableExtra::kbl(bugSigSimple::createTaxonTable(resp_sigs))
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Rothia mucilaginosa species 2 1 1 1.0 Bacteria Actinomycetota Actinomycetes Micrococcales Micrococcaceae Rothia Rothia mucilaginosa 2 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Micrococcales|f__Micrococcaceae|g__Rothia|s__Rothia mucilaginosa
Streptococcus genus 2 1 1 1.0 Bacteria Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus NA 3 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Lachnoclostridium phytofermentans species 2 1 1 1.0 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Lachnoclostridium Lachnoclostridium phytofermentans 2 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Lachnoclostridium|s__Lachnoclostridium phytofermentans
Porphyromonas genus 2 0 2 0.5 Bacteria Bacteroidota Bacteroidia Bacteroidales Porphyromonadaceae Porphyromonas NA 2 k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Porphyromonadaceae|g__Porphyromonas
Tannerella genus 2 0 2 0.5 Bacteria Bacteroidota Bacteroidia Bacteroidales Tannerellaceae Tannerella NA 2 k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Tannerellaceae|g__Tannerella
Capnocytophaga genus 2 1 1 1.0 Bacteria Bacteroidota Flavobacteriia Flavobacteriales Flavobacteriaceae Capnocytophaga NA 2 k__Bacteria|p__Bacteroidota|c__Flavobacteriia|o__Flavobacteriales|f__Flavobacteriaceae|g__Capnocytophaga
Neisseria genus 2 0 2 0.5 Bacteria Pseudomonadota Betaproteobacteria Neisseriales Neisseriaceae Neisseria NA 2 k__Bacteria|p__Pseudomonadota|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria
Actinomyces genus 1 0 1 1.0 Bacteria Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Actinomyces NA 2 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces
Actinomyces pacaensis species 1 1 0 1.0 Bacteria Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Actinomyces Actinomyces pacaensis 1 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces pacaensis
Schaalia meyeri species 1 1 0 1.0 Bacteria Actinomycetota Actinomycetes Actinomycetales Actinomycetaceae Schaalia Schaalia meyeri 1 k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Schaalia|s__Schaalia meyeri

gut microbiota analysis

Look specifically at case-control comparisons

healthy <- grepl(".*(healthy.*COVID|COVID.*healthy).*", gut_sigs$comparison1, ignore.case = TRUE)

cc_gut_sigs1 <- gut_sigs %>%
      filter(healthy == TRUE) 

Overall frequencies of taxa increased in cases for case/control feces studies

Identifying a taxon reported consistently in 8 out of 8 studies is much more compelling that the FDR value here would suggest, since this taxon also passed a significance threshold in every one of those studies.

cc_gut_sigs1_taxontable <- bugSigSimple::createTaxonTable(cc_gut_sigs1) %>% 
  mutate(FDR =  p.adjust(p = `Binomial Test pval`, method="fdr")) %>%
  relocate(FDR, .after = `Binomial Test pval`)
kableExtra::kbl(cc_gut_sigs1_taxontable)
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval FDR kingdom phylum class order family genus species n_signatures metaphlan_name
Faecalibacterium genus 9 0 9 0.0039 0.0156000 Bacteria Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium NA 14 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium
Anaerostipes genus 9 1 8 0.0390 0.0557143 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerostipes NA 10 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerostipes
Roseburia genus 9 0 9 0.0039 0.0156000 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Roseburia NA 11 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Roseburia
Enterococcus genus 8 8 0 0.0078 0.0156000 Bacteria Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus NA 11 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Enterococcaceae|g__Enterococcus
Lactobacillaceae family 8 5 3 0.7300 0.7300000 Bacteria Bacillota Bacilli Lactobacillales Lactobacillaceae NA NA 12 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae
Streptococcus genus 8 8 0 0.0078 0.0156000 Bacteria Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus NA 10 k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Clostridium genus 8 3 5 0.7300 0.7300000 Bacteria Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium NA 8 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium
Coprococcus genus 8 0 8 0.0078 0.0156000 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Coprococcus NA 13 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Coprococcus
Ruminococcus genus 7 1 6 0.1300 0.1625000 Bacteria Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus NA 14 k__Bacteria|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Blautia obeum species 7 0 7 0.0160 0.0266667 Bacteria Bacillota Clostridia Lachnospirales Lachnospiraceae Blautia Blautia obeum 7 k__Bacteria|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Blautia|s__Blautia obeum

Monte-Carlo simulation for increased abundance taxa

Just for the increased cc_gut_sigs1 for now. I am inclined to skip this analysis in favor of the clustering and binomial test analysis.

library(bugSigSimple)
gut.sigs.increased <- filter(cc_gut_sigs1, `Abundance in Group 1` == "increased") %>% 
  bugsigdbr::getSignatures(tax.id.type = "taxname")
 my.siglengths.inc <- sapply(gut.sigs.increased, length)

getCriticalN(gut.sigs.increased, my.siglengths.inc)
## 95% 
##  10
# Compare to observed - enterococcus & streptococcus are the only taxa that equal the critical limit
frequencySigs(gut.sigs.increased)
##            Enterococcus           Streptococcus      Enterobacteriaceae 
##                       8                       8                       5 
## Enterocloster citroniae         Enterococcaceae        Lactobacillaceae 
##                       5                       5                       5 
##           Lactobacillus                  Rothia          Actinomycetota 
##                       5                       5                       4 
##               Atopobium 
##                       4

Overall frequencies of taxa decreased in cases for case/control feces studies

createTaxonTable(cc_gut_sigs1, n=40)

nasopharyngeal microbiota analysis

Look specifically at case-control comparisons

library(dplyr)
healthy <- grepl(".*(control.*COVID|COVID.*control).*", naso_sigs$comparison1, ignore.case = TRUE)

cc_naso_sigs1 <- naso_sigs %>%
      filter(healthy == TRUE) %>%
  subset(Study != "Study 458")

Cluster analysis

Note, this EDA should really be done before hypothesis testing.

First calculate pairwise overlaps for all signatures of length > 1:

allsigs <- bugsigdbr::getSignatures(covid_all, tax.id.type = "taxname")
allsigs <- allsigs[sapply(allsigs, length) > 1] #require length > 1
dim(allsigs)
## NULL
mydists <- BugSigDBStats::calcPairwiseOverlaps(allsigs)
dim(mydists)
## [1] 2722    8

Create a matrix of Jaccard similarities (0 for no overlap, 1 for 100% overlap)

jmat <- BugSigDBStats::calcJaccardSimilarity(allsigs)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
siglengths <- sapply(allsigs, length)
ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(`Signature Length` = anno_barplot(siglengths))
hm <- Heatmap(
  jmat,
  top_annotation = ha, left_annotation = hr,
  row_names_max_width = unit(20, "cm"),
  column_names_max_height = unit(20, "cm"),
  row_labels = sub(".+:", "", rownames(jmat)),
  column_labels = sub(".+:", "", colnames(jmat))
)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
hm

Use this interactively to make an interactive heatmap (not evaluated in vignette). Some expanding of the default size is required to see anything. Creating a sub-heatmap, then exporting it as a table, allows in-depth identification of the subgroups.

hc <- hclust(as.dist(jmat))
plot(hc)

This tree can be cut to show the clusters, for example. The clusters of more than 1 signature but less than ~10 signatures are most likely to be something interesting.

clusts <- sort(cutree(hc, h = 0.05))
lapply(unique(clusts), function(i) names(clusts)[clusts == i])
## [[1]]
##  [1] "bsdb:428/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                                       
##  [2] "bsdb:428/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                                     
##  [3] "bsdb:428/3/1_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_UP"                                                                             
##  [4] "bsdb:430/1/1_COVID-19:Patients-with-high-SARS-CoV-2-infectivity_vs_Patients-with-low-to-none-SARS-CoV-2-infectivity_DOWN"                          
##  [5] "bsdb:441/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                                       
##  [6] "bsdb:441/3/2_COVID-19:COVID-19-patients_vs_H1N1-patients_DOWN"                                                                                     
##  [7] "bsdb:449/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                                     
##  [8] "bsdb:453/3/1_COVID-19:Recovered-COVID-19-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_UP"                       
##  [9] "bsdb:453/3/2_COVID-19:Recovered-COVID-19-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_DOWN"                     
## [10] "bsdb:458/1/1_COVID-19:COVID-19-patients_vs_Non-intubated-patients-with-non-incubation-viral-pneumonia-or-non-incubation-non-infectious-diseases_UP"
## [11] "bsdb:459/1/2_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                                                 
## [12] "bsdb:464/1/2_COVID-19:Confirmed-COVID-19-patients_vs_Healthy-controls_UP"                                                                          
## [13] "bsdb:469/1/1_COVID-19:COVID-19-patients_vs_Non-COVID-19-controls_DOWN"                                                                             
## [14] "bsdb:474/1/2_COVID-19:COVID-19-patients_vs_Non-COVID-patients-with-URTI-or-COPD_DOWN"                                                              
## [15] "bsdb:479/2/2_COVID-19:Mild-moderate-COVID-19-patients_vs_Healthy-controls_UP"                                                                      
## [16] "bsdb:482/1/2_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_UP"                                                                       
## [17] "bsdb:483/1/1_COVID-19:Antibiotic-naive-COVID-19-patients_vs_Uninfected-controls,-pneumonia-patients,-antibiotics-treated-COVID-19-patients_UP"     
## [18] "bsdb:483/3/1_COVID-19:Pneumonia-controls_vs_Uninfected-controls,-COVID-19-patients_DOWN"                                                           
## [19] "bsdb:484/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-patients-with-fever-and-cough_UP"                                            
## [20] "bsdb:485/1/1_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_UP"                                                          
## [21] "bsdb:511/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                                                  
## [22] "bsdb:512/3/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-healthcare-workers-(controls)_UP"                                            
## [23] "bsdb:513/3/1_COVID-19:COVID-19-positive-patients-with-antibiotic-use_vs_COVID-19-positive-patients-without-antibiotic-use_UP"                      
## [24] "bsdb:734/1/1_COVID-19:COVID-19-infection-cases._vs_Control-group_UP"                                                                               
## [25] "bsdb:734/1/2_COVID-19:COVID-19-infection-cases._vs_Control-group_DOWN"                                                                             
## [26] "bsdb:831/1/1_COVID-19:COVID-19-Patients_vs_Non-Covid-19-Patients---pneumonia-patients_UP"                                                          
## [27] "bsdb:940/1/2_COVID-19:Covid-19-group_vs_Control-group_DOWN"                                                                                        
## [28] "bsdb:940/2/1_COVID-19:Post-Covid-19-group_vs_Covid-19-group_UP"                                                                                    
## 
## [[2]]
##  [1] "bsdb:428/2/1_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_UP"                                                         
##  [2] "bsdb:428/2/2_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_DOWN"                                                       
##  [3] "bsdb:441/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                                                 
##  [4] "bsdb:441/4/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                   
##  [5] "bsdb:453/1/1_COVID-19:SARS-CoV-2-infected-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_UP"  
##  [6] "bsdb:464/2/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                                      
##  [7] "bsdb:474/1/1_COVID-19:COVID-19-patients_vs_Non-COVID-patients-with-URTI-or-COPD_UP"                                            
##  [8] "bsdb:476/1/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                                      
##  [9] "bsdb:478/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                
## [10] "bsdb:479/2/1_COVID-19:Mild-moderate-COVID-19-patients_vs_Healthy-controls_DOWN"                                                
## [11] "bsdb:481/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                
## [12] "bsdb:481/3/1_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_UP"                                                   
## [13] "bsdb:487/1/1_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Infected-COVID-19-samples-(respiratory-positive)_UP"
## [14] "bsdb:498/1/2_COVID-19:Symptomatic,-mild-to-moderate-COVID-19-patients_vs_Asymptomatic-uninfected-controls_DOWN"                
## [15] "bsdb:511/3/2_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_DOWN"                                 
## [16] "bsdb:512/2/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                             
## [17] "bsdb:828/3/1_COVID-19,Respiratory-failure-requiring-assisted-ventilation:Intubated_vs_Non-intubated_UP"                        
## [18] "bsdb:940/1/1_COVID-19:Covid-19-group_vs_Control-group_UP"                                                                      
## [19] "bsdb:940/3/1_COVID-19:Post-Covid-19-group_vs_Control-group_UP"                                                                 
## [20] "bsdb:940/5/2_COVID-19:Covid-19-C-group_vs_Control-group_DOWN"                                                                  
## 
## [[3]]
##  [1] "bsdb:428/3/2_COVID-19:Recovered-COVID-19-cases_vs_Healthy-controls_DOWN"                                                                             
##  [2] "bsdb:453/2/1_COVID-19:SARS-CoV-2-recovered-individuals_vs_SARS-CoV-2-infected-patients_DOWN"                                                         
##  [3] "bsdb:458/1/2_COVID-19:COVID-19-patients_vs_Non-intubated-patients-with-non-incubation-viral-pneumonia-or-non-incubation-non-infectious-diseases_DOWN"
##  [4] "bsdb:478/2/1_COVID-19:COVID-19-patients_vs_Flu-patients_DOWN"                                                                                        
##  [5] "bsdb:479/1/1_COVID-19:ICU-COVID-19-patients_vs_Healthy-controls_DOWN"                                                                                
##  [6] "bsdb:479/4/2_COVID-19:ICU-COVID-19-patients_vs_Patients-infected-with-other-human-coronaviruses_UP"                                                  
##  [7] "bsdb:481/2/1_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients_UP"                                                 
##  [8] "bsdb:485/2/2_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_DOWN"                                                          
##  [9] "bsdb:487/2/1_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Healthy-controls_DOWN"                                                    
## [10] "bsdb:782/3/1_COVID-19:Feces-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"                                                      
## [11] "bsdb:940/2/2_COVID-19:Post-Covid-19-group_vs_Covid-19-group_DOWN"                                                                                    
## [12] "bsdb:940/4/2_COVID-19:Covid-19-N-group_vs_Control-group_DOWN"                                                                                        
## 
## [[4]]
##  [1] "bsdb:428/4/1_COVID-19:COVID-19-patients-treated-with-antibiotics_vs_Healthy-controls_DOWN"                                     
##  [2] "bsdb:428/4/2_COVID-19:COVID-19-patients-treated-with-antibiotics_vs_Healthy-controls_UP"                                       
##  [3] "bsdb:430/1/2_COVID-19:Patients-with-high-SARS-CoV-2-infectivity_vs_Patients-with-low-to-none-SARS-CoV-2-infectivity_UP"        
##  [4] "bsdb:453/1/2_COVID-19:SARS-CoV-2-infected-patients_vs_Non-infected-patients-with-unrelated-respiratory-medical-conditions_DOWN"
##  [5] "bsdb:453/4/2_COVID-19:Positive-for-COVID-19-viral-RNA-in-feces_vs_Negative-for-COVID-19-viral-RNA-in-feces_DOWN"               
##  [6] "bsdb:464/2/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"                                                    
##  [7] "bsdb:477/1/1_COVID-19:Severe-COVID-19-patients_vs_COVID-19-negative-controls_UP"                                               
##  [8] "bsdb:479/3/1_COVID-19:ICU-COVID-19-patients_vs_Mild-Moderate-COVID-19-patients_DOWN"                                           
##  [9] "bsdb:484/2/1_COVID-19:COVID-19-positive-patients_vs_Healthy-controls_UP"                                                       
## [10] "bsdb:498/1/1_COVID-19:Symptomatic,-mild-to-moderate-COVID-19-patients_vs_Asymptomatic-uninfected-controls_UP"                  
## [11] "bsdb:940/4/1_COVID-19:Covid-19-N-group_vs_Control-group_UP"                                                                    
## 
## [[5]]
##  [1] "bsdb:441/3/1_COVID-19:COVID-19-patients_vs_H1N1-patients_UP"                                           
##  [2] "bsdb:473/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_DOWN"                   
##  [3] "bsdb:478/3/1_COVID-19:Flu-patients_vs_Healthy-controls_UP"                                             
##  [4] "bsdb:479/1/2_COVID-19:ICU-COVID-19-patients_vs_Healthy-controls_UP"                                    
##  [5] "bsdb:481/2/2_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients_DOWN" 
##  [6] "bsdb:484/4/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-patients-with-fever-and-cough_UP"
##  [7] "bsdb:500/1/1_COVID-19:COVID-19-patients_vs_COVID-19-negative-patients-with-pneumonia_UP"               
##  [8] "bsdb:511/2/1_COVID-19:COVID-19-patients-(severe/critical)_vs_COVID-19-patients-(mild/moderate)_DOWN"   
##  [9] "bsdb:831/1/2_COVID-19:COVID-19-Patients_vs_Non-Covid-19-Patients---pneumonia-patients_DOWN"            
## [10] "bsdb:940/5/1_COVID-19:Covid-19-C-group_vs_Control-group_UP"                                            
## 
## [[6]]
## [1] "bsdb:449/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                 
## [2] "bsdb:453/2/2_COVID-19:SARS-CoV-2-recovered-individuals_vs_SARS-CoV-2-infected-patients_UP"                                   
## [3] "bsdb:459/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_DOWN"                                         
## [4] "bsdb:478/5/1_COVID-19:Flu-patients_vs_Healthy-controls_DOWN"                                                                 
## [5] "bsdb:481/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                            
## [6] "bsdb:486/1/2_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_DOWN"                                               
## [7] "bsdb:486/2/1_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_UP"                                                 
## [8] "bsdb:489/1/1_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_Pneumonia-control-patients-(COVID-negative)_UP"
## [9] "bsdb:513/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls_UP"                                           
## 
## [[7]]
## [1] "bsdb:453/4/1_COVID-19:Positive-for-COVID-19-viral-RNA-in-feces_vs_Negative-for-COVID-19-viral-RNA-in-feces_UP"                                              
## [2] "bsdb:464/3/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                                             
## [3] "bsdb:476/1/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"                                                                                 
## [4] "bsdb:489/2/1_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_UP"  
## [5] "bsdb:489/2/2_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_DOWN"
## [6] "bsdb:498/2/1_COVID-19:COVID-19-patients-with-high-viral-load_vs_COVID-19-patients-with-low-viral-load_UP"                                                   
## [7] "bsdb:546/1/2_COVID-19:COVID-19-cases_vs_Non-infected-controls_DOWN"                                                                                         
## 
## [[8]]
## [1] "bsdb:464/1/1_COVID-19:Confirmed-COVID-19-patients_vs_Healthy-controls_DOWN"                                                                    
## [2] "bsdb:478/4/2_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                                                                
## [3] "bsdb:479/3/2_COVID-19:ICU-COVID-19-patients_vs_Mild-Moderate-COVID-19-patients_UP"                                                             
## [4] "bsdb:483/2/1_COVID-19:Antibiotic-treated-COVID-19-patients_vs_Antibiotic-naive-COVID-19-patients,-pneumonia-patients,-uninfected-controls_DOWN"
## [5] "bsdb:485/1/2_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_DOWN"                                                    
## [6] "bsdb:831/2/1_COVID-19:COVID-19-patients---Ventilated_vs_COVID-19-patients---Non-Ventilated_UP"                                                 
## 
## [[9]]
## [1] "bsdb:464/3/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                                          
## [2] "bsdb:478/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                                                          
## [3] "bsdb:478/5/2_COVID-19:Flu-patients_vs_Healthy-controls_UP"                                                                                 
## [4] "bsdb:489/3/2_COVID-19:COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_vs_Pneumonia-controls-(COVID-negative)_DOWN"
## [5] "bsdb:511/5/1_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_UP"                                               
## [6] "bsdb:965/1/1_COVID-19:COVID-19-patients_vs_healthy-controls_UP"                                                                            
## 
## [[10]]
## [1] "bsdb:478/3/2_COVID-19:Flu-patients_vs_Healthy-controls_DOWN"                                         
## [2] "bsdb:479/4/1_COVID-19:ICU-COVID-19-patients_vs_Patients-infected-with-other-human-coronaviruses_DOWN"
## [3] "bsdb:482/1/1_COVID-19:Severe-COVID-19-patients_vs_Mild-COVID-19-patients_DOWN"                       
## [4] "bsdb:485/2/1_COVID-19:COVID-19-patients-with-fever_vs_COVID-19-patients-without-fever_UP"            
## 
## [[11]]
## [1] "bsdb:478/4/1_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                          
## [2] "bsdb:486/1/1_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_UP"                               
## [3] "bsdb:498/2/2_COVID-19:COVID-19-patients-with-high-viral-load_vs_COVID-19-patients-with-low-viral-load_DOWN"
## [4] "bsdb:511/3/1_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_UP"               
## [5] "bsdb:511/4/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                          
## [6] "bsdb:547/1/1_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_UP"                                  
## [7] "bsdb:548/1/2_COVID-19:COVID-19-cases_vs_Healthy-controls_DOWN"                                             
## 
## [[12]]
## [1] "bsdb:486/2/2_COVID-19:Asymptomatic-COVID-19-patients_vs_Healthy-controls_DOWN"                           
## [2] "bsdb:511/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                          
## [3] "bsdb:512/1/1_COVID-19:COVID-19-positive-patients_vs_COVID-19-negative-controls-and-healthcare-workers_UP"
## [4] "bsdb:782/2/1_COVID-19:Sputum-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"         
## [5] "bsdb:782/2/2_COVID-19:Sputum-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"       
## [6] "bsdb:1040/1/2_COVID-19:RPs-(Recovered-Patients)_vs_HCs-(Healthy-Controls)_DOWN"                          
## 
## [[13]]
## [1] "bsdb:486/3/1_COVID-19:Severe-COVID-19-patients_vs_Healthy-controls_UP"                                                         
## [2] "bsdb:489/1/2_COVID-19:COVID-positive-patients-admitted-to-ICU-(i-COVID-19)_vs_Pneumonia-control-patients-(COVID-negative)_DOWN"
## [3] "bsdb:496/2/1_COVID-19:COVID-19-patients_vs_Seasonal-flu-patients_UP"                                                           
## [4] "bsdb:977/3/2_COVID-19:Covid-19-patients_vs_non-Covid-19-patients_DOWN"                                                         
## 
## [[14]]
## [1] "bsdb:486/3/2_COVID-19:Severe-COVID-19-patients_vs_Healthy-controls_DOWN"                                      
## [2] "bsdb:511/4/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                                               
## [3] "bsdb:965/2/2_COVID-19:COVID-19-patients_vs_NO-COVID-19-(Patients-with-diagnosis-different-from-COVID-19)_DOWN"
## [4] "bsdb:977/6/2_COVID-19:Hospitalized-patients_vs_local-controls_DOWN"                                           
## 
## [[15]]
## [1] "bsdb:487/2/2_COVID-19:Recovered-COVID-19-samples-(respiratory-negative)_vs_Healthy-controls_UP"       
## [2] "bsdb:487/3/1_COVID-19:Infected-COVID-19-samples-(respiratory-positive)_vs_Healthy-controls_DOWN"      
## [3] "bsdb:782/1/1_COVID-19:Pharyngeal-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"
## [4] "bsdb:831/2/2_COVID-19:COVID-19-patients---Ventilated_vs_COVID-19-patients---Non-Ventilated_DOWN"      
## [5] "bsdb:977/6/1_COVID-19:Hospitalized-patients_vs_local-controls_UP"                                     
## 
## [[16]]
## [1] "bsdb:487/3/2_COVID-19:Infected-COVID-19-samples-(respiratory-positive)_vs_Healthy-controls_UP"
## [2] "bsdb:977/1/1_COVID-19:COVID-19-patients_vs_local-controls_UP"                                 
## [3] "bsdb:977/1/2_COVID-19:COVID-19-patients_vs_local-controls_DOWN"                               
## [4] "bsdb:1040/1/1_COVID-19:RPs-(Recovered-Patients)_vs_HCs-(Healthy-Controls)_UP"                 
## 
## [[17]]
## [1] "bsdb:489/3/1_COVID-19:COVID-positive-patients-admitted-to-infectious-disease-ward-(w-COVID-19)_vs_Pneumonia-controls-(COVID-negative)_UP"
## [2] "bsdb:496/2/2_COVID-19:COVID-19-patients_vs_Seasonal-flu-patients_DOWN"                                                                   
## [3] "bsdb:548/1/1_COVID-19:COVID-19-cases_vs_Healthy-controls_UP"                                                                             
## [4] "bsdb:782/3/2_COVID-19:Feces-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_DOWN"                                        
## 
## [[18]]
## [1] "bsdb:496/1/1_COVID-19:COVID-19-patients_vs_Healthy-controls_UP"                               
## [2] "bsdb:511/5/2_COVID-19:COVID-19-patients-not-treated-with-antibiotics_vs_Healthy-controls_DOWN"
## 
## [[19]]
## [1] "bsdb:496/1/2_COVID-19:COVID-19-patients_vs_Healthy-controls_DOWN"                                   
## [2] "bsdb:782/1/2_COVID-19:Pharyngeal-samples-from-COVID-19-patients_vs_samples-from-Healthy-patients_UP"
## [3] "bsdb:977/3/1_COVID-19:Covid-19-patients_vs_non-Covid-19-patients_UP"                                
## 
## [[20]]
## [1] "bsdb:500/1/2_COVID-19:COVID-19-patients_vs_COVID-19-negative-patients-with-pneumonia_DOWN"
## [2] "bsdb:547/1/2_COVID-19:Recovered-COVID-19-patients_vs_Healthy-controls_DOWN"               
## 
## [[21]]
## [1] "bsdb:513/2/1_COVID-19:COVID-19-positive-patients-with-breathing-assistance_vs_COVID-19-positive-patients-without-breathing-assistance_UP"
## [2] "bsdb:965/1/2_COVID-19:COVID-19-patients_vs_healthy-controls_DOWN"

Create a wide-format dataframe

This would be suitable for regression analysis.

covid_withsigs <- filter(covid_all, !is.na(covid_all$`NCBI Taxonomy IDs`))
sigs <- bugsigdbr::getSignatures(covid_withsigs, tax.id.type = "taxname")
cmat <- t(safe::getCmatrix(sigs, as.matrix = TRUE, min.size = 0, prune = FALSE))
## WARNING: rows are sorted elements of keyword.list
## 170 categories formed
cdf <- data.frame(cmat, stringsAsFactors = FALSE, check.names = FALSE)
cdf <- cbind(covid_withsigs, cdf)
colnames(cdf)[1:54]
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"                  
## [51] "studyexp"                   "site"                      
## [53] "comparison1"                "[Clostridium] colinum"

Note this has a number of columns that are mostly zeros, it could be filtered significantly for any regression or machine learning analysis:

table(cdf[["[Brevibacterium] frigoritolerans"]])
## < table of extent 0 >

Create another heatmap on correlations of presence/absence of taxa. This is not necessary because the previous Jaccard Index heatmap is probably better, it is just a demonstration of doing something with the taxa presence/absence directly.

sigcors <- cor(t(cmat))
siglengths <- sapply(sigs, length)
ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(`Signature Length` = anno_barplot(siglengths))
hm <- Heatmap(
  sigcors,
  top_annotation = ha, left_annotation = hr,
  row_names_max_width = unit(20, "cm"),
  column_names_max_height = unit(20, "cm"),
  row_labels = sub(".+:", "", rownames(sigcors)), ##removing study just to make signature names legible
  column_labels = sub(".+:", "", colnames(sigcors))
)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
hm

Use this interactively to make an interactive heatmap: