Objectives of the study

To assess commonality or heterogeneity in socioeconomic and ethnic patterning of the fecal microbiome in studies undertaken in various parts of the world, through cluster analysis of results on differentially abundant taxa.

Data Analysis Plan

  1. creating frequency tables of how many signatures each microbial taxon (species, genus, or other rank) has been reported – by race/ethnicity separately, by SES separately, and for all studies combined.

  2. cluster analysis of the signatures. Similarity between all pairs of signatures will be calculated using Jaccard Distance, to create a distance matrix. This distance matrix will be used for hierarchical clustering, to identify subgroups of similar signatures. This analysis will be performed separately for signatures of increased abundance and of decreased abundance (in low SES and non-white/Caucasian ethnicity) repeated using Semantic Similarity as an alternative measure of signature similarity, in sensitivity analysis.

  3. Common and distinct effects of SES and race/ethnicity on the gut microbiome will be identified using meta-regression for the most identified taxa. Specifically, the signature will be the unit of analysis, with the presence or absence of the taxon in each study coded as a 0/1 binary variable in a regression analysis.

Install Packages

This is not evaluated, run manually if needed.

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install()
BiocManager::version()
BiocManager::install("remotes", dependencies = TRUE)
BiocManager::install("waldronlab/bugSigSimple")
BiocManager::install("curatedMetagenomicData")
devtools::install_github("waldronlab/BugSigDBStats")
devtools::install_github("waldronlab/bugsigdbr")

Import data from Bugsigdb database

dat <- bugsigdbr::importBugSigDB(version = 'devel', cache = FALSE) #11/27/2022
dim(dat)
## [1] 7349   50

Subset Signatures by Curator and Conditions

dat.sub <- subsetByCurator(dat, "Kaluifeanyi101") %>%
  dplyr::filter(`Body site` == "Feces") %>%
  mutate(`Body site` = tolower(`Body site`)) %>%
  mutate(`Condition` = tolower(`Condition`))
table(dat.sub[,"Condition"])
## 
##                                ethnic group 
##                                          31 
## place of residence measurement,ethnic group 
##                                           2 
##                                 risk factor 
##                                           2 
##                               sampling time 
##                                           2 
##                        socioeconomic status 
##                                          25 
##           socioeconomic status,ethnic group 
##                                           2

Table of studies

bugSigSimple::createStudyTable(dat.sub) %>%
  kbl() %>%
  kable_styling()
Study code MaxCases MaxControls Study design Condition N_signatures PMID DOI URL
AmaruddinAI_2020 74 66 cross-sectional observational, not case-control socioeconomic status 2 32604882 https://doi.org/10.3390/microorganisms8060961 NA
AngQY_2021 22 24 cross-sectional observational, not case-control ethnic group 2 34617511 https://doi.org/10.7554/eLife.70349 https://pubmed.ncbi.nlm.nih.gov/34617511
BalakrishnanB_2021 30 30 cross-sectional observational, not case-control ethnic group; socioeconomic status 3 33596768 https://doi.org/10.1080/19490976.2021.1882926 NA
BowyerRCE_2019 342 457 cross-sectional observational, not case-control socioeconomic status 2 30641975 https://doi.org/10.3390/microorganisms7010017 https://pubmed.ncbi.nlm.nih.gov/30641975/
BrooksAW_2018 88 1237 cross-sectional observational, not case-control ethnic group 12 30513082 https://doi.org/10.1371/journal.pbio.2006842 https://pubmed.ncbi.nlm.nih.gov/30513082/
CarsonTL_2018 47 33 cross-sectional observational, not case-control ethnic group 1 29901485 https://doi.org/10.1097/PSY.0000000000000614. https://pubmed.ncbi.nlm.nih.gov/29901485/
ChongCW_2015 20 24 cross-sectional observational, not case-control socioeconomic status 4 26290472 https://doi.org/10.1038/srep13338 https://pubmed.ncbi.nlm.nih.gov/26290472/
DeFilippoC_2010 14 15 cross-sectional observational, not case-control place of residence measurement,ethnic group 2 20679230 https://doi.org/10.1073/pnas.1005963107 https://pubmed.ncbi.nlm.nih.gov/20679230/
DwiyantoJ_2021 54 65 cross-sectional observational, not case-control ethnic group 9 33514807 https://doi.org/10.1038/s41598-021-82311-3 https://pubmed.ncbi.nlm.nih.gov/33514807/
EscobarJS_2014 30 54 cross-sectional observational, not case-control ethnic group 6 25495462 https://doi.org/10.1186/s12866-014-0311-6 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4275940/
GreenhillAR_2015 86 29 cross-sectional observational, not case-control socioeconomic status 1 25658868 https://doi.org/10.1371/journal.pone.0117427 https://pubmed.ncbi.nlm.nih.gov/25658868/
GrześkowiakŁ_2012 44 31 cross-sectional observational, not case-control socioeconomic status 2 22228076 https://doi.org/10.1097/MPG.0b013e318249039c. https://pubmed.ncbi.nlm.nih.gov/22228076/
HeY_2018 7009 NA cross-sectional observational, not case-control socioeconomic status 2 30249275 https://doi.org/10.1186/s40168-018-0557-6. https://pubmed.ncbi.nlm.nih.gov/30249275/
KortekangasE_2020 NA NA prospective cohort sampling time 2 32011017 https://doi.org/10.1111/ppe.12623 NA
LapidotY_2021 70 69 cross-sectional observational, not case-control socioeconomic status 2 34444813 https://doi.org/10.3390/nu13082645 https://pubmed.ncbi.nlm.nih.gov/34444813/
LevinAM_2016 168 130 prospective cohort risk factor 2 27558272 https://doi.org/10.1038/srep31775 NA
LewisCR_2021 NA NA cross-sectional observational, not case-control socioeconomic status 2 34442687 https://doi.org/10.3390/microorganisms9081608 https://pubmed.ncbi.nlm.nih.gov/34442687/
LinA_2013 6 4 cross-sectional observational, not case-control socioeconomic status,ethnic group 2 23349750 https://doi.org/10.1371/journal.pone.0053838 https://pubmed.ncbi.nlm.nih.gov/23349750/
MelloCS_2016 100 30 cross-sectional observational, not case-control socioeconomic status 2 26982745 https://doi.org/10.1097/MPG.0000000000001186 https://pubmed.ncbi.nlm.nih.gov/26982745/
MillerGE_2016 NA 44 cross-sectional observational, not case-control socioeconomic status 2 26859894 https://doi.org/10.1371/journal.pone.0148952 https://pubmed.ncbi.nlm.nih.gov/26859894/
Moran-RamosS_2020 NA NA cross-sectional observational, not case-control socioeconomic status 2 31973685 https://doi.org/10.1080/19490976.2020.1712985 https://pubmed.ncbi.nlm.nih.gov/31973685/

Taxon Frequency Table by SES

dat.sub_SES <- filter(dat.sub, dat.sub$Condition == "socioeconomic status") 
dim(dat.sub_SES)
## [1] 25 50
bugSigSimple::createTaxonTable(dat.sub_SES)  %>%
  kbl() %>%
  kable_styling()
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Prevotella genus 5 1 4 0.37 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Prevotella 5 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella
Eubacterium genus 4 3 1 0.63 Bacteria Bacillati Bacillota Clostridia Eubacteriales Eubacteriaceae Eubacterium 4 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Eubacteriaceae|g__Eubacterium
Ruminococcus genus 4 3 1 0.63 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus 4 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Bacteroidota phylum 4 4 0 0.12 Bacteria Pseudomonadati Bacteroidota NA NA NA NA 17 k__Bacteria|k__Pseudomonadati|p__Bacteroidota
Rikenellaceae family 4 3 1 0.63 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Rikenellaceae NA 5 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae
Lachnospiraceae family 3 2 1 1.00 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae NA 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae
Bacteroides genus 3 1 2 1.00 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae Bacteroides 4 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides
Deltaproteobacteria class 2 2 0 0.50 Bacteria Deltaproteobacteria NA NA NA NA NA 2 k__Bacteria|c__Deltaproteobacteria
Bifidobacterium genus 2 2 0 0.50 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 3 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium
Atopobiaceae family 2 1 1 1.00 Bacteria Bacillati Actinomycetota Coriobacteriia Coriobacteriales Atopobiaceae NA 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae

Taxon Frequency Table by Race-ethnicity

dat.sub_ethnic <- dat.sub %>% filter(dat.sub$Condition == "ethnic group") 
dim(dat.sub_ethnic)
## [1] 31 50
bugSigSimple::createTaxonTable(dat.sub_ethnic)  %>%
  kbl() %>%
  kable_styling()
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Eubacteriales order 7 2 5 0.450 Bacteria Bacillati Bacillota Clostridia Eubacteriales NA NA 10 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales
Coriobacteriaceae family 6 2 4 0.690 Bacteria Bacillati Actinomycetota Coriobacteriia Coriobacteriales Coriobacteriaceae NA 7 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae
Christensenellaceae family 6 1 5 0.220 Bacteria Bacillati Bacillota Clostridia Christensenellales Christensenellaceae NA 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Christensenellales|f__Christensenellaceae
Peptococcaceae family 6 3 3 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Peptococcaceae NA 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Peptococcaceae
Veillonella genus 6 6 0 0.031 Bacteria Bacillati Bacillota Negativicutes Veillonellales Veillonellaceae Veillonella 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Veillonella
Odoribacteraceae family 6 3 3 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Odoribacteraceae NA 6 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Odoribacteraceae
Odoribacter genus 6 3 3 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Odoribacteraceae Odoribacter 6 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Odoribacteraceae|g__Odoribacter
Rikenellaceae family 6 2 4 0.690 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Rikenellaceae NA 6 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae
Victivallaceae family 6 4 2 0.690 Bacteria Pseudomonadati Lentisphaerota Lentisphaeria Victivallales Victivallaceae NA 6 k__Bacteria|k__Pseudomonadati|p__Lentisphaerota|c__Lentisphaeria|o__Victivallales|f__Victivallaceae
Verrucomicrobiaceae family 6 0 6 0.031 Bacteria Pseudomonadati Verrucomicrobiota Verrucomicrobiia Verrucomicrobiales Verrucomicrobiaceae NA 6 k__Bacteria|k__Pseudomonadati|p__Verrucomicrobiota|c__Verrucomicrobiia|o__Verrucomicrobiales|f__Verrucomicrobiaceae

Taxon Frequency Table by all studies (SES and Ethnicity) combined.

bugSigSimple::createTaxonTable(dat.sub, n = 50) %>%
  kbl() %>%
  kable_styling()
## Warning: Expected 7 pieces. Additional pieces discarded in 11 rows [19, 22, 23, 24, 28,
## 29, 34, 35, 36, 37, 44].
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Prevotella genus 12 7 5 0.770 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Prevotellaceae Prevotella 13 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella
Eubacteriales order 10 4 6 0.750 Bacteria Bacillati Bacillota Clostridia Eubacteriales NA NA 32 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales
Rikenellaceae family 10 5 5 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Rikenellaceae NA 12 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae
Bacteroides genus 9 3 6 0.510 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae Bacteroides 14 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides
Coriobacteriaceae family 7 2 5 0.450 Bacteria Bacillati Actinomycetota Coriobacteriia Coriobacteriales Coriobacteriaceae NA 9 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae
Bacillota phylum 7 3 4 1.000 Bacteria Bacillati Bacillota NA NA NA NA 47 k__Bacteria|k__Bacillati|p__Bacillota
Ruminococcus genus 7 4 3 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Ruminococcus 7 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Ruminococcus
Veillonella genus 7 7 0 0.016 Bacteria Bacillati Bacillota Negativicutes Veillonellales Veillonellaceae Veillonella 7 k__Bacteria|k__Bacillati|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Veillonella
Odoribacter genus 7 4 3 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Odoribacteraceae Odoribacter 7 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Odoribacteraceae|g__Odoribacter
Christensenellaceae family 6 1 5 0.220 Bacteria Bacillati Bacillota Clostridia Christensenellales Christensenellaceae NA 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Christensenellales|f__Christensenellaceae
Clostridium genus 6 4 2 0.690 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium 8 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium
Eubacterium genus 6 4 2 0.690 Bacteria Bacillati Bacillota Clostridia Eubacteriales Eubacteriaceae Eubacterium 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Eubacteriaceae|g__Eubacterium
Peptococcaceae family 6 3 3 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Peptococcaceae NA 6 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Peptococcaceae
Odoribacteraceae family 6 3 3 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Odoribacteraceae NA 7 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Odoribacteraceae
Victivallaceae family 6 4 2 0.690 Bacteria Pseudomonadati Lentisphaerota Lentisphaeria Victivallales Victivallaceae NA 6 k__Bacteria|k__Pseudomonadati|p__Lentisphaerota|c__Lentisphaeria|o__Victivallales|f__Victivallaceae
Verrucomicrobiaceae family 6 0 6 0.031 Bacteria Pseudomonadati Verrucomicrobiota Verrucomicrobiia Verrucomicrobiales Verrucomicrobiaceae NA 6 k__Bacteria|k__Pseudomonadati|p__Verrucomicrobiota|c__Verrucomicrobiia|o__Verrucomicrobiales|f__Verrucomicrobiaceae
Bacteroidota phylum 5 5 0 0.062 Bacteria Pseudomonadati Bacteroidota NA NA NA NA 41 k__Bacteria|k__Pseudomonadati|p__Bacteroidota
Bifidobacterium genus 4 3 1 0.630 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 8 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium
Bifidobacterium longum species 4 3 1 0.630 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 4 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium longum
Oscillospiraceae family 4 4 0 0.120 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae NA 19 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae
Faecalibacterium genus 4 3 1 0.630 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium 7 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium
Faecalibacterium prausnitzii species 4 2 2 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Faecalibacterium 4 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Faecalibacterium|s__Faecalibacterium prausnitzii
Bifidobacterium adolescentis species 3 1 2 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 3 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium adolescentis
Bifidobacterium bifidum species 3 2 1 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 3 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium bifidum
Lactobacillus genus 3 2 1 1.000 Bacteria Bacillati Bacillota Bacilli Lactobacillales Lactobacillaceae Lactobacillus 4 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus
Lachnospiraceae family 3 2 1 1.000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae NA 12 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae
Blautia genus 3 2 1 1.000 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Blautia 4 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Blautia
Bacteroides fragilis species 3 1 2 1.000 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae Bacteroides 3 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides fragilis
Bacteroides thetaiotaomicron species 3 0 3 0.250 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae Bacteroides 3 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides thetaiotaomicron
Parabacteroides genus 3 3 0 0.250 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Tannerellaceae Parabacteroides 3 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Tannerellaceae|g__Parabacteroides
Enterobacteriaceae family 3 1 2 1.000 Bacteria Pseudomonadati Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae NA 7 k__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae
Deltaproteobacteria class 2 2 0 0.500 Bacteria Deltaproteobacteria NA NA NA NA NA 2 k__Bacteria|c__Deltaproteobacteria
Actinomycetota phylum 2 2 0 0.500 Bacteria Bacillati Actinomycetota NA NA NA NA 21 k__Bacteria|k__Bacillati|p__Actinomycetota
Bifidobacterium breve species 2 1 1 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium breve
Bifidobacterium catenulatum species 2 1 1 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium catenulatum
Bifidobacterium pseudocatenulatum species 2 1 1 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium pseudocatenulatum
Bifidobacterium ruminantium species 2 1 1 1.000 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium ruminantium
Atopobiaceae family 2 1 1 1.000 Bacteria Bacillati Actinomycetota Coriobacteriia Coriobacteriales Atopobiaceae NA 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae
Slackia genus 2 2 0 0.500 Bacteria Bacillati Actinomycetota Coriobacteriia Eggerthellales Eggerthellaceae Slackia 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Coriobacteriia|o__Eggerthellales|f__Eggerthellaceae|g__Slackia
Staphylococcus genus 2 1 1 1.000 Bacteria Bacillati Bacillota Bacilli Bacillales Staphylococcaceae Staphylococcus 3 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Bacillales|f__Staphylococcaceae|g__Staphylococcus
Enterococcus genus 2 2 0 0.500 Bacteria Bacillati Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Enterococcaceae|g__Enterococcus
Lactococcus genus 2 1 1 1.000 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Lactococcus 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Lactococcus
Clostridiaceae family 2 0 2 0.500 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiaceae NA 11 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae
Clostridium perfringens species 2 0 2 0.500 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiaceae Clostridium 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Clostridiaceae|g__Clostridium|s__Clostridium perfringens
Anaerotruncus genus 2 1 1 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Anaerotruncus 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Anaerotruncus
Oscillospira genus 2 1 1 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Oscillospiraceae Oscillospira 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Oscillospira
butyrate-producing bacterium M21/2 species 2 1 1 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales butyrate-producing bacterium M21/2 NA 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|s__butyrate-producing bacterium M21/2
Clostridiales bacterium species 2 1 1 1.000 Bacteria Bacillati Bacillota Clostridia Eubacteriales Clostridiales bacterium NA 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Eubacteriales|s__Clostridiales bacterium
Anaerostipes genus 2 2 0 0.500 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Anaerostipes 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Anaerostipes
Butyrivibrio genus 2 2 0 0.500 Bacteria Bacillati Bacillota Clostridia Lachnospirales Lachnospiraceae Butyrivibrio 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Clostridia|o__Lachnospirales|f__Lachnospiraceae|g__Butyrivibrio

Cluster Analysis for signatures of increased abundance and of decreased abundance in low SES calculated using Jaccard Distance to create a distance matrix.

allsigs <- bugsigdbr::getSignatures(dat.sub_SES , tax.id.type = "taxname")
allsigs <- allsigs[sapply(allsigs, length) > 1] #require length > 1
length(allsigs)
## [1] 19
mydists <- BugSigDBStats::calcPairwiseOverlaps(allsigs)
dim(mydists)
## [1] 38  8

Visualize the distribution of the signature lengths

library(ggplot2)
siglengths <- sapply(allsigs, length)
siglengths.df <- data.frame(siglengths = siglengths)
ggplot(siglengths.df, aes(x=siglengths)) +
  geom_bar()

table(siglengths)
## siglengths
##  2  3  4  5  6  7  8 10 15 
##  1  5  1  3  2  2  3  1  1

Create a matrix of Jaccard similarities (0 for no overlap, 1 for 100% overlap)

jmat <- BugSigDBStats::calcJaccardSimilarity(allsigs)

##Create a Clustered heatmap

library(ComplexHeatmap)

ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(
  `Signature Length` = anno_barplot(siglengths)
  )
hm <- Heatmap(
  jmat,
  top_annotation = ha,
#  left_annotation = hr,
#  column_names_max_height = unit(23, "cm"),
  column_names_rot = 45,
#  row_names_max_width = unit(15, "cm"),
#  get rid of study labels
  row_labels = sub("bsdb:", "", sub("_.+", "", rownames(jmat)), fixed = TRUE),  
  column_labels = sub("bsdb:", "", sub("_.+", "", colnames(jmat)), fixed = TRUE)
)

hm

Create a wide format dataframe for Regression Analysis

dat_withsigs <- filter(dat.sub_SES , !is.na(dat.sub_SES$`NCBI Taxonomy IDs`))
sigs <- bugsigdbr::getSignatures(dat_withsigs, tax.id.type = "taxname")
cmat <- t(safe::getCmatrix(sigs, as.matrix = TRUE, min.size = 0, prune = FALSE))
## WARNING: rows are sorted elements of keyword.list
## 25 categories formed
cdf <- data.frame(cmat, stringsAsFactors = FALSE, check.names = FALSE)
cdf <- cbind(dat_withsigs, cdf)
dim(cdf)
## [1]  25 129

An arbitrary example of meta-regression:

fit <-
  glm(
    Prevotella ~ `Location of subjects` + `Sequencing type` + `Abundance in Group 1`,
    family = binomial(link = "logit"),
    data = cdf
  )
summary(fit)
## 
## Call:
## glm(formula = Prevotella ~ `Location of subjects` + `Sequencing type` + 
##     `Abundance in Group 1`, family = binomial(link = "logit"), 
##     data = cdf)
## 
## Coefficients:
##                                                  Estimate Std. Error z value
## (Intercept)                                    -1.977e+01  1.145e+04  -0.002
## `Location of subjects`China                     2.103e+01  1.145e+04   0.002
## `Location of subjects`Finland,Malawi            1.953e+01  1.983e+04   0.001
## `Location of subjects`Indonesia                 2.103e+01  1.145e+04   0.002
## `Location of subjects`Israel                    2.103e+01  1.145e+04   0.002
## `Location of subjects`Malaysia                  1.219e-09  1.402e+04   0.000
## `Location of subjects`Mexico                    2.103e+01  1.145e+04   0.002
## `Location of subjects`Papua New Guinea          1.729e+00  2.111e+04   0.000
## `Location of subjects`United Kingdom            1.224e-09  1.619e+04   0.000
## `Location of subjects`United States of America  1.953e+01  1.145e+04   0.002
## `Sequencing type`WMS                           -1.953e+01  1.145e+04  -0.002
## `Abundance in Group 1`increased                -2.527e+00  1.517e+00  -1.666
##                                                Pr(>|z|)  
## (Intercept)                                      0.9986  
## `Location of subjects`China                      0.9985  
## `Location of subjects`Finland,Malawi             0.9992  
## `Location of subjects`Indonesia                  0.9985  
## `Location of subjects`Israel                     0.9985  
## `Location of subjects`Malaysia                   1.0000  
## `Location of subjects`Mexico                     0.9985  
## `Location of subjects`Papua New Guinea           0.9999  
## `Location of subjects`United Kingdom             1.0000  
## `Location of subjects`United States of America   0.9986  
## `Sequencing type`WMS                             0.9986  
## `Abundance in Group 1`increased                  0.0958 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25.020  on 24  degrees of freedom
## Residual deviance: 12.082  on 13  degrees of freedom
## AIC: 36.082
## 
## Number of Fisher Scoring iterations: 19

Create another heatmap on correlations of presence/absence of taxa.

sigcors <- cor(t(cmat))
siglengths <- sapply(sigs, length)
ha <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengths))
hr <- rowAnnotation(`Signature Length` = anno_barplot(siglengths))
hm <- Heatmap(
  sigcors,
  top_annotation = ha, left_annotation = hr,
  row_names_max_width = unit(.05, "cm"),
  column_names_max_height = unit(.1, "cm"),
 # row_labels = sub(".+:", "", rownames(sigcors)), ##removing study just to make signature names legible
  column_labels = sub(".+:", "", colnames(sigcors))
)
hm

Cluster Analysis for signatures of increased abundance and of decreased abundance in non-white/caucasions calculated using Jaccard Distance to create a distance matrix.

allsige <- bugsigdbr::getSignatures(dat.sub_ethnic , tax.id.type = "taxname")
allsige <- allsige[sapply(allsige, length) > 1] #require length > 1
length(allsige )
## [1] 15
mydistse <- BugSigDBStats::calcPairwiseOverlaps(allsige)
dim(mydistse)
## [1] 52  8

Visualize the distribution of the signature lengths

library(ggplot2)
siglengthse <- sapply(allsige, length)
siglengthse.df <- data.frame(siglengthse = siglengthse)
ggplot(siglengthse.df, aes(x=siglengthse)) +
  geom_bar()

table(siglengthse)
## siglengthse
##  2  4  5  6  8  9 11 
##  4  1  4  2  2  1  1

Create a matrix of Jaccard similarities (0 for no overlap, 1 for 100% overlap)

jmate <- BugSigDBStats::calcJaccardSimilarity(allsige)

Create a Clustered heatmap

library(ComplexHeatmap)

hae <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengthse))
hre <- rowAnnotation(`Signature Length` = anno_barplot(siglengthse))
hme <- Heatmap(
  jmate,
  top_annotation = hae, left_annotation = hre,
  row_names_max_width = unit(.10,"cm"),
  column_names_max_height = unit(1.0, "cm"),
#  row_labels = sub(".+:", "", rownames(jmate)),  #get rid of study labels
  column_labels = sub(".+:", "", colnames(jmate))
)

hme

Create a wide format dataframe

dat_withsigse <- filter(dat.sub_ethnic , !is.na(dat.sub_ethnic$`NCBI Taxonomy IDs`))
sigse <- bugsigdbr::getSignatures(dat_withsigse, tax.id.type = "taxname")
cmate <- t(safe::getCmatrix(sigse, as.matrix = TRUE, min.size = 0, prune = FALSE))
## WARNING: rows are sorted elements of keyword.list
## 31 categories formed
cdfe <- data.frame(cmate, stringsAsFactors = FALSE, check.names = FALSE)
cdfe <- cbind(dat_withsigse, cdfe)
colnames(cdfe)[1:25]
##  [1] "BSDB ID"               "Study"                 "Study design"         
##  [4] "PMID"                  "DOI"                   "URL"                  
##  [7] "Authors list"          "Title"                 "Journal"              
## [10] "Year"                  "Keywords"              "Experiment"           
## [13] "Location of subjects"  "Host species"          "Body site"            
## [16] "UBERON ID"             "Condition"             "EFO ID"               
## [19] "Group 0 name"          "Group 1 name"          "Group 1 definition"   
## [22] "Group 0 sample size"   "Group 1 sample size"   "Antibiotics exclusion"
## [25] "Sequencing type"

Create another heatmap on correlations of presence/absence of taxa.

sigcorse <- cor(t(cmate))
siglengthse <- sapply(sigse, length)
hae <- HeatmapAnnotation(`Signature Length` = anno_barplot(siglengthse))
hre <- rowAnnotation(`Signature Length` = anno_barplot(siglengthse))
hme <- Heatmap(
  sigcorse,
  top_annotation = hae, left_annotation = hre,
  row_names_max_width = unit(.05, "cm"),
  column_names_max_height = unit(.1, "cm"),
 # row_labels = sub(".+:", "", rownames(sigcors)), ##removing study just to make signature names legible
  column_labels = sub(".+:", "", colnames(sigcorse))
)
hme