Analysis objectives

  1. Import, recode, and subset data from bugsigdb.org
  2. Create a table of studies
  3. Calculate the frequency of appearance of each taxa in independent signatures and identify the most frequently reported taxa

Making sure packages are installed

Not evaluated in vignette:

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(c("devtools", "tidyverse", "kableExtra"))
BiocManager::install(c("waldronlab/bugSigSimple", "waldronlab/BugSigDBStats", "waldronlab/bugsigdbr"))

Load and subset data

# use version="devel" and cache = FALSE to take the latest version from bugsigdb.org
dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE) 
dim(dat)
## [1] 7349   50
names(dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"

Subsetting

included.pmid <-
  c(
    28018325,
    24614698,
    29207565,
    29459704,
    29538354,
    32012716,
    20566857,
    28512451,
    28112736,
    27362264
  )
subset.dat <-
  filter(dat, PMID %in% included.pmid) 
unique(subset.dat$`Group 0 name`)
##  [1] "vaginal delivery (Va)"                        
##  [2] "vaginal delivery"                             
##  [3] ">/=33 weeks"                                  
##  [4] "vaginal delivery after 7 days of delivery"    
##  [5] "vaginal delivery after 3 months of delivery"  
##  [6] "vaginal delivery after 6 months of delivery"  
##  [7] "Maternal samples taken at delivery."          
##  [8] "Maternal samples taken at 6  weeks postpartum"
##  [9] "vaginal delivery at day 3"                    
## [10] "vaginal delivery at day 5"                    
## [11] "vaginal delivery at day 28"                   
## [12] "vaginal delivery at day 150"                  
## [13] "vaginal delivery at day 365"                  
## [14] "full-term delivery >39 weeks at day 3"        
## [15] "mecomium in vaginal delivery"                 
## [16] "mecomium in C-section delivery"               
## [17] "Antimicrobials use during delivery (no)"      
## [18] "Maternal consumption of probiotics (NO)"      
## [19] "Furry pets at home (NO)"
included.group0 <- "vaginal delivery"
unique(subset.dat$`Group 1 name`)
##  [1] "C-section (Cesarean section)"                   
##  [2] "C-section"                                      
##  [3] "infants <33 weeks gestational age"              
##  [4] "C-section delivery"                             
##  [5] "Neonatal samples taken at delivery"             
##  [6] "Infant samples taken at 6 weeks postpartum"     
##  [7] "c-section"                                      
##  [8] "late preterm 34-36 weeks"                       
##  [9] "overall time point infants samples of c-section"
## [10] "transitional stool"                             
## [11] "Antimicrobials use during delivery (yes)"       
## [12] "Maternal consumption of probiotics (YES)"       
## [13] "Furry pets at home (YES)"
included.group1 <- "C-section"
subset.final <-
  filter(subset.dat, `Group 0 name` %in% included.group0 & `Group 1 name` %in% included.group1) %>%
  filter(`Body site` == "Meconium") %>%
  arrange(PMID)

Show key characteristics of the included signatures:

detach("package:dplyr", unload = TRUE)
## Warning: 'dplyr' namespace cannot be unloaded:
##   namespace 'dplyr' is imported by 'BiocFileCache', 'bugSigSimple', 'tidyr', 'dbplyr' so cannot be unloaded
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
select(subset.final, "PMID", "Source", "Group 0 name", "Group 1 name", "Abundance in Group 1")
##       PMID              Source     Group 0 name Group 1 name
## 1 24614698 Table S6 in File S1 vaginal delivery    C-section
## 2 27362264            Figure 1 vaginal delivery    C-section
## 3 28512451                <NA> vaginal delivery    C-section
## 4 29459704            Figure 1 vaginal delivery    C-section
## 5 29459704            Figure 1 vaginal delivery    C-section
## 6 29538354             Table 3 vaginal delivery    C-section
## 7 32012716     Figure 4 & text vaginal delivery    C-section
## 8 32012716     Figure 4 & text vaginal delivery    C-section
##   Abundance in Group 1
## 1            increased
## 2            decreased
## 3                 <NA>
## 4            increased
## 5            decreased
## 6            decreased
## 7            increased
## 8            decreased

Are any studies missing?

sort(setdiff(included.pmid, subset.dat$PMID))
## numeric(0)
sort(setdiff(included.pmid, subset.final$PMID))
## [1] 20566857 28018325 28112736 29207565
sort(setdiff(subset.dat$PMID, subset.final$PMID))
## [1] 20566857 28018325 28112736 29207565

Table of studies

These are the studies included in the review:

bugSigSimple::createStudyTable(subset.final)
## # A tibble: 6 × 9
##   `Study code`  MaxCases MaxControls `Study design` Condition N_signatures PMID 
##   <chr>            <dbl>       <dbl> <chr>          <chr>            <int> <chr>
## 1 ArdissoneAN_…       33          19 cross-section… Cesarean…            1 2461…
## 2 MartinR_2016        28          80 cross-section… Cesarean…            1 2736…
## 3 ShiYC_2018          10           8 cross-section… Cesarean…            2 2945…
## 4 TapiainenT_2…       40         172 cross-section… Cesarean…            1 2953…
## 5 WampachL_2017        6           4 time series /… Cesarean…            1 2851…
## 6 WongWSW_2020        43          62 prospective c… Cesarean…            2 3201…
## # ℹ 2 more variables: DOI <chr>, URL <chr>

Summary of taxa reported

This table summarizes the results for the top n most frequently identified taxa.

kable_styling(kbl(bugSigSimple::createTaxonTable(subset.final, n = 20)))
## Warning: Expected 7 pieces. Additional pieces discarded in 5 rows [6,
## 14, 15, 16, 19].
Taxon Name Taxonomic Level total_signatures increased_signatures decreased_signatures Binomial Test pval kingdom phylum class order family genus species n_signatures metaphlan_name
Staphylococcus genus 3 1 2 1.0 Bacteria Bacillati Bacillota Bacilli Bacillales Staphylococcaceae Staphylococcus 3 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Bacillales|f__Staphylococcaceae|g__Staphylococcus
Corynebacterium genus 2 1 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Mycobacteriales Corynebacteriaceae Corynebacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Mycobacteriales|f__Corynebacteriaceae|g__Corynebacterium
Propionibacterium genus 2 1 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Propionibacteriales Propionibacteriaceae Propionibacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Propionibacteriales|f__Propionibacteriaceae|g__Propionibacterium
Enterococcus genus 2 0 2 0.5 Bacteria Bacillati Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Enterococcaceae|g__Enterococcus
Streptococcus genus 2 1 1 1.0 Bacteria Bacillati Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus 2 k__Bacteria|k__Bacillati|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus
Phocaeicola vulgatus species 2 1 1 1.0 Bacteria Pseudomonadati Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae Phocaeicola 2 k__Bacteria|k__Pseudomonadati|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Phocaeicola|s__Phocaeicola vulgatus
Comamonas genus 2 1 1 1.0 Bacteria Pseudomonadati Pseudomonadota Betaproteobacteria Burkholderiales Comamonadaceae Comamonas 2 k__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Betaproteobacteria|o__Burkholderiales|f__Comamonadaceae|g__Comamonas
Citrobacter genus 2 1 1 1.0 Bacteria Pseudomonadati Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Citrobacter 2 k__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Citrobacter
Stenotrophomonas genus 2 2 0 0.5 Bacteria Pseudomonadati Pseudomonadota Gammaproteobacteria Lysobacterales Lysobacteraceae Stenotrophomonas 2 k__Bacteria|k__Pseudomonadati|p__Pseudomonadota|c__Gammaproteobacteria|o__Lysobacterales|f__Lysobacteraceae|g__Stenotrophomonas
Actinomycetota phylum 1 0 1 1.0 Bacteria Bacillati Actinomycetota NA NA NA NA 4 k__Bacteria|k__Bacillati|p__Actinomycetota
Actinomycetes class 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes NA NA NA 4 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes
Actinomycetales order 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Actinomycetales NA NA 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales
Bifidobacterium genus 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium
Bifidobacterium bifidum species 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium bifidum
Bifidobacterium catenulatum species 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium catenulatum
Bifidobacterium longum subsp. longum species 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Bifidobacterium 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Bifidobacterium|s__Bifidobacterium longum|s__Bifidobacterium longum subsp. longum
Pseudoscardovia genus 1 1 0 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Bifidobacteriales Bifidobacteriaceae Pseudoscardovia 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Pseudoscardovia
Propionibacteriaceae family 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Propionibacteriales Propionibacteriaceae NA 2 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Propionibacteriales|f__Propionibacteriaceae
Cutibacterium acnes species 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Propionibacteriales Propionibacteriaceae Cutibacterium 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium acnes
Pseudonocardiaceae family 1 0 1 1.0 Bacteria Bacillati Actinomycetota Actinomycetes Pseudonocardiales Pseudonocardiaceae NA 1 k__Bacteria|k__Bacillati|p__Actinomycetota|c__Actinomycetes|o__Pseudonocardiales|f__Pseudonocardiaceae

Long list of most frequently identified taxa

These are not needed because of the taxon table above, but they list a larger number of taxa.

getMostFrequentTaxa(subset.final, n = 50)
##    1279    1301    1350    1716    1743     283   40323     544     821  114248 
##       3       2       2       2       2       2       2       2       2       1 
##  117563  118964    1236    1239    1243   12916    1297    1298 1302778  135614 
##       1       1       1       1       1       1       1       1       1       1 
##    1357    1380    1385    1386    1390    1402    1485    1502  150247    1578 
##       1       1       1       1       1       1       1       1       1       1 
##  158851    1596    1598    1678    1679    1681    1686    1730    1747    1760 
##       1       1       1       1       1       1       1       1       1       1 
##  183710  186806  186817  188787  201174    2037    2070  216851     222    2737 
##       1       1       1       1       1       1       1       1       1       1
getMostFrequentTaxa(subset.final, direction="UP")
##  40323 114248 117563 118964   1239   1243   1279  12916   1297   1298 
##      2      1      1      1      1      1      1      1      1      1
getMostFrequentTaxa(subset.final, direction="DOWN")
##   1279   1350   1236   1301   1380   1485   1502 158851   1596   1598 
##      2      2      1      1      1      1      1      1      1      1