Reading data

Get bulk export from bugsigdb.org:

full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)
## [1] 2942   50
colnames(full.dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"

Stripping illformed entries:

is.study <- grepl("^Study [0-9]+$", full.dat[["Study"]])
is.exp <- grepl("^Experiment [0-9]+$", full.dat[["Experiment"]])
full.dat <- full.dat[is.study & is.exp, ]

Curation output

Number of papers and signatures curated:

pmids <- unique(full.dat[,"PMID"])
length(pmids)
## [1] 726
nrow(full.dat)
## [1] 2942

Publication date of the curated papers:

pmids <- pmids[!is.na(pmids)]
pubyear1 <- pmid2pubyear(pmids[1:361])
pubyear2 <- pmid2pubyear(pmids[362:length(pmids)])
pubyear <- c(pubyear1, pubyear2)
head(cbind(pmids, pubyear))
##      pmids      pubyear
## [1,] "28038683" "2016" 
## [2,] "28173873" "2017" 
## [3,] "27015276" "2016" 
## [4,] "27625705" "2016" 
## [5,] "23071781" "2012" 
## [6,] "28467925" "2017"
tab <- table(pubyear)
tab <- tab[-length(tab)]
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers", 
          label = TRUE, fill = "steelblue",
          ggtheme = theme_bw())

Stripping empty signatures:

ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)
## [1] 2942

Papers containing only empty UP and DOWN signatures (under curation?):

setdiff(pmids, unique(dat[,"PMID"]))
## numeric(0)

Progress over time:

dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)
## Warning in rbind(cdbm, cpbm): number of columns of result is not a multiple of
## vector length (arg 2)

plotProgressOverTime(dat, diff = TRUE)
## Warning in rbind(cdbm, cpbm): number of columns of result is not a multiple of
## vector length (arg 2)

Stratified by curator:

npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)

Number of complete and revised signatures:

table(df[["State"]])
## < table of extent 0 >
table(dat[,"Revision editor"])
## 
##                                   Adi13                               Aiyshaaaa 
##                                       8                                      21 
##                      Aiyshaaaa,Atrayees                  Aiyshaaaa,Claregrieve1 
##                                       2                                       4 
##                     Aiyshaaaa,WikiWorks               Aiyshaaaa,WikiWorks,Merit 
##                                       1                                       4 
##                                   Andre                             Andre,Chloe 
##                                       2                                       1 
##                   Annabelcute,Aiyshaaaa                                Atrayees 
##                                       1                                      93 
##                      Atrayees,Aiyshaaaa         Atrayees,Aiyshaaaa,Claregrieve1 
##                                       1                                       1 
##                   Atrayees,Claregrieve1                       Atrayees,Lwaldron 
##                                       6                                       2 
##          Atrayees,Lwaldron,Claregrieve1                      Atrayees,WikiWorks 
##                                       1                                      20 
##                Atrayees,WikiWorks,Merit                     Barakat Dindi,Chloe 
##                                       2                                       2 
##         Blessing Kaz,Aiyshaaaa,Atrayees                             BLESSING123 
##                                       1                                      15 
##                       BLESSING123,Chloe                           Brian,Suwaiba 
##                                       2                                       1 
##                  Brian,Suwaiba,Atrayees                                  Busayo 
##                                       1                                       1 
##                           Busayo,Fatima                         Busayo,Mcarlson 
##                                       1                                       2 
##                                  Chioma                           Chioma,Fatima 
##                                       4                                       2 
##                                   Chloe                         Chloe,Kwekuamoo 
##                                       7                                       1 
##           Chloe,Lorakasselman,Aiyshaaaa                             Chloe,Merit 
##                                       1                                       1 
##                         Chloe,WikiWorks                            Claregrieve1 
##                                       4                                     157 
##                  Claregrieve1,Aiyshaaaa                   Claregrieve1,Atrayees 
##                                       2                                       1 
##         Claregrieve1,Atrayees,WikiWorks   Claregrieve1,Atrayees,WikiWorks,Merit 
##                                       2                                       1 
##                      Claregrieve1,Chloe      Claregrieve1,Chloe,WikiWorks,Merit 
##                                       1                                       1 
##                     Claregrieve1,Fatima        Claregrieve1,Fatima,LGeistlinger 
##                                      10                                       1 
##             Claregrieve1,Fatima,Yu Wang                   Claregrieve1,Lwaldron 
##                                       1                                       2 
##           Claregrieve1,Lwaldron,Suwaiba                      Claregrieve1,Merit 
##                                       1                                       8 
##            Claregrieve1,Merit,WikiWorks            Claregrieve1,Rukky,WikiWorks 
##                                       2                                       1 
##                    Claregrieve1,Suwaiba              Claregrieve1,Suwaiba,Merit 
##                                       1                                       1 
##                  Claregrieve1,WikiWorks         Claregrieve1,WikiWorks,Atrayees 
##                                     262                                       1 
##            Claregrieve1,WikiWorks,Merit                                Cyberian 
##                                       6                                       4 
##                          Cyberian,Chloe                Cyberian,Chloe,Aiyshaaaa 
##                                       1                                       1 
##                        Cynthia Anderson           Cynthia Anderson,Claregrieve1 
##                                      28                                       2 
##                 Cynthia Anderson,Fatima Cynthia Anderson,LGeistlinger,WikiWorks 
##                                       2                                       2 
##     Cynthia Anderson,Lwaldron,WikiWorks                                Danyab56 
##                                       1                                       2 
##         Danyab56,Aiyshaaaa,Claregrieve1               Deacme,Aiyshaaaa,Atrayees 
##                                       5                                       8 
##                         Deacme,Atrayees                                    Dupe 
##                                       6                                       1 
##                 Dupe,Aiyshaaaa,Atrayees                           Dupe,Atrayees 
##                                       1                                       2 
##                           Dupe,Mcarlson                             Ellajessica 
##                                       2                                       2 
##                   Ellajessica,Aiyshaaaa                                  Fatima 
##                                       2                                      23 
##                        Fatima,Aiyshaaaa                     Fatima,Claregrieve1 
##                                       2                                       2 
##           Fatima,Claregrieve1,WikiWorks              Fatima,Kwekuamoo,WikiWorks 
##                                      11                                       2 
##           Fatima,LGeistlinger,WikiWorks               Fatima,Lwaldron,WikiWorks 
##                                       1                                       3 
##                  Fatima,Merit,WikiWorks                        Fatima,WikiWorks 
##                                       2                                      40 
##                  Fatima,WikiWorks,Merit                                Fcuevas3 
##                                       3                                      48 
##                      Fcuevas3,Aiyshaaaa             Fcuevas3,Aiyshaaaa,Atrayees 
##                                       2                                       1 
##                       Fcuevas3,Atrayees                   Fcuevas3,Claregrieve1 
##                                       1                                       6 
##                         Fcuevas3,Fatima             Fcuevas3,Lwaldron,Aiyshaaaa 
##                                       2                                       2 
##                         Fcuevas3,Rimsha                                    Gina 
##                                       8                                      14 
##                                Haoyanzh                       Haoyanzh,Lwaldron 
##                                      19                                       2 
##                             Itslanapark                   Itslanapark,Aiyshaaaa 
##                                      27                                       5 
##                    Itslanapark,Atrayees                       Itslanapark,Chloe 
##                                       1                                       3 
##       Itslanapark,Chloe,Aiyshaaaa,Merit                Itslanapark,Claregrieve1 
##                                       1                                       4 
##      Itslanapark,Claregrieve1,Aiyshaaaa       Itslanapark,Claregrieve1,Atrayees 
##                                       1                                       1 
##                      Itslanapark,Fatima          Itslanapark,Fatima,Chloe,Merit 
##                                       2                                       1 
##                      Itslanapark,Rimsha                         Jacquelynshevin 
##                                       1                                      37 
##                   Jacquelynshevin,Chloe         Jacquelynshevin,Chloe,WikiWorks 
##                                       1                                       1 
##                  Jacquelynshevin,Fatima                                 Jeshudy 
##                                       7                                      55 
##                       Jeshudy,Aiyshaaaa                    Jeshudy,Claregrieve1 
##                                       3                                      38 
##                          Jeshudy,Fatima                         Jeshudy,Suwaiba 
##                                       4                                       2 
##                                 Joyessa                       Joyessa,Aiyshaaaa 
##                                       1                                       3 
##                    Joyessa,Claregrieve1              Joyessa,Claregrieve1,Merit 
##                                      18                                       1 
##             Joyessa,Fatima,Claregrieve1       Kahvecirem,Aiyshaaaa,Claregrieve1 
##                                       2                                       2 
## Kahvecirem,Aiyshaaaa,Merit,Claregrieve1                     Kahvecirem,Atrayees 
##                                       3                                       2 
##  Kahvecirem,Atrayees,Merit,Claregrieve1           Kahvecirem,Merit,Claregrieve1 
##                                       1                                       2 
##                          Kaluifeanyi101                Kaluifeanyi101,Aiyshaaaa 
##                                      50                                       2 
##                 Kaluifeanyi101,Atrayees             Kaluifeanyi101,Claregrieve1 
##                                       3                                      16 
##                   Kaluifeanyi101,Fatima                  KathyWaldron,WikiWorks 
##                                       2                                       3 
##            KathyWaldron,WikiWorks,Merit                  Kelvin Joseph,Atrayees 
##                                       1                                       6 
##              Kelvin Joseph,Claregrieve1           Khadeeejah,Aiyshaaaa,Atrayees 
##                                       2                                       3 
##     Khadeeejah,Aiyshaaaa,Chloe,Atrayees     Khadeeejah,Atrayees,Chloe,Aiyshaaaa 
##                                       1                                       1 
##        Khadeeejah,Atrayees,Claregrieve1                               Kwekuamoo 
##                                       2                                      22 
##                     Kwekuamoo,Aiyshaaaa                      Kwekuamoo,Atrayees 
##                                       1                                       3 
##                  Kwekuamoo,Claregrieve1               Kwekuamoo,Merit,WikiWorks 
##                                       4                                       1 
##                Levitest,WikiWorks,Merit           Lorakasselman,Aiyshaaaa,Merit 
##                                       1                                       1 
##                     Lorakasselman,Chloe              Lorakasselman,Claregrieve1 
##                                       2                                       2 
##                                Lwaldron   Lwaldron,Atrayees,WikiWorks,Aiyshaaaa 
##                                      11                                       1 
##         Lwaldron,Claregrieve1,WikiWorks   Lwaldron,Claregrieve1,WikiWorks,Merit 
##                                       5                                       2 
##               Lwaldron,Fatima,WikiWorks                      Lwaldron,WikiWorks 
##                                       1                                      30 
##         Lwaldron,WikiWorks,LGeistlinger                Lwaldron,WikiWorks,Merit 
##                                       1                                       4 
##                           Madhubani Dey                 Madhubani Dey,Aiyshaaaa 
##                                      11                                       1 
##                  Madhubani Dey,Atrayees               Madhubani Dey,Chloe,Merit 
##                                       2                                       1 
##              Madhubani Dey,Claregrieve1       Madhubani Dey,Fatima,Claregrieve1 
##                                      26                                       2 
##                  Madhubani Dey,Lwaldron                     Madhubani Dey,Merit 
##                                       1                                       2 
##                                 Manuela                          Mary Bearkland 
##                                      11                                      24 
##                Mary Bearkland,Aiyshaaaa   Mary Bearkland,Aiyshaaaa,Claregrieve1 
##                                       1                                       1 
##             Mary Bearkland,Claregrieve1                   Mary Bearkland,Fatima 
##                                      20                                       3 
##             Mary Bearkland,Fatima,Merit                    Mary Bearkland,Merit 
##                                       1                                       6 
##                     Maryemzaki,Lwaldron                                   Merit 
##                                       2                                       2 
##                         Merit,Aiyshaaaa                      Merit,Claregrieve1 
##                                       1                                       1 
##                         Merit,WikiWorks                                  Mmarin 
##                                      18                                      21 
##                         Mmarin,Atrayees                     Mmarin,Claregrieve1 
##                                       1                                      13 
##                                  Nice25          Nnadichioma,Aiyshaaaa,Atrayees 
##                                       1                                       3 
##    Nnadichioma,Aiyshaaaa,Merit,Atrayees    Nnadichioma,Atrayees,Aiyshaaaa,Merit 
##                                       2                                       1 
##                         Ombati,Atrayees                   Ombati,Chloe,Atrayees 
##                                       3                                       1 
##                                  Rimsha    Rimsha,Fatima,LGeistlinger,WikiWorks 
##                                       4                                       1 
##                 Rimsha,Fatima,WikiWorks                         Rimsha,Lwaldron 
##                                       1                                       1 
##                             Samara.Khan                Samara.Khan,Claregrieve1 
##                                      38                                       9 
##                      Samara.Khan,Fatima                               Sharmilac 
##                                       1                                       5 
##                     Sharmilac,Aiyshaaaa                  Sharmilac,Claregrieve1 
##                                       1                                       2 
##                        Sharmilac,Fatima              Sharmilac,Fatima,Aiyshaaaa 
##                                       6                                       1 
##                         Sharmilac,Merit                                   Sophy 
##                                       2                                       3 
##            Sophy,Aiyshaaaa,Claregrieve1                          Sophy,Atrayees 
##                                       4                                       4 
##                             Sophy,Chloe                      Sophy,Claregrieve1 
##                                       3                                       4 
##                 Sophy,Mcarlson,Atrayees                                 Suwaiba 
##                                       2                                      14 
##                        Suwaiba,Atrayees                                  Tislam 
##                                       9                                      29 
##                        Tislam,Aiyshaaaa           Tislam,Aiyshaaaa,Claregrieve1 
##                                       2                                       1 
##                         Tislam,Atrayees                     Tislam,Claregrieve1 
##                                       2                                       6 
##                           Tislam,Fatima              Tislam,Fatima,Claregrieve1 
##                                       8                                       1 
##        Tislam,Rimsha,Claregrieve1,Merit                                   Titas 
##                                       2                                      11 
##                            Titas,Fatima                          Titas,Lwaldron 
##                                       1                                       1 
##                            Ufuoma Ejite                   Ufuoma Ejite,Atrayees 
##                                       1                                       8 
##            Ufuoma Ejite,Chloe,Aiyshaaaa   Ufuoma Ejite,Chloe,Aiyshaaaa,Atrayees 
##                                       1                                       1 
##         Ufuoma Ejite,Lwaldron,WikiWorks                            Uyokeeswaran 
##                                       7                                       2 
##                               Valentina                               WikiWorks 
##                                       2                                    1069 
##                     WikiWorks,Aiyshaaaa            WikiWorks,Aiyshaaaa,Atrayees 
##                                       2                                       1 
##                      WikiWorks,Atrayees                WikiWorks,Atrayees,Merit 
##                                      22                                       4 
##                  WikiWorks,Claregrieve1                        WikiWorks,Fatima 
##                                      13                                       2 
##                       WikiWorks,Jeshudy                         WikiWorks,Merit 
##                                       1                                      27 
##                WikiWorks,Merit,Atrayees                         WikiWorks,Rukky 
##                                       2                                       3 
##                       WikiWorks,Suwaiba                          Yu Wang,Fatima 
##                                       1                                       1 
##             Yu Wang,Fatima,Claregrieve1 
##                                       4

Study stats

Study design

spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)
##                                case-control,prospective cohort 
##                                                              1 
## laboratory experiment,time series / longitudinal observational 
##                                                              1 
##                                     case-control,meta-analysis 
##                                                              5 
##                                                  meta-analysis 
##                                                              5 
##                                    randomized controlled trial 
##                                                             30 
##                                          laboratory experiment 
##                                                             33 
##                       time series / longitudinal observational 
##                                                             60 
##                                             prospective cohort 
##                                                             73 
##                cross-sectional observational, not case-control 
##                                                            214 
##                                                   case-control 
##                                                            317

Experiment stats

Columns of the full dataset that describe experiments:

# Experiment ID
exp.cols <- c("Study", "Experiment")

# Subjects
sub.cols <- c("Host species",    
              "Location of subjects", 
              "Body site",
              "Condition", 
              "Antibiotics exclusion",
              "Group 0 sample size",
              "Group 1 sample size")

# Lab analysis              
lab.cols <-  c("Sequencing type",
              "16S variable region",
              "Sequencing platform")

# Statistical analysis
stat.cols <-  c("Statistical test",
              "MHT correction",
              "Significance threshold")

# Alpha diversity
div.cols <- c("Pielou",
              "Shannon",
              "Chao1",
              "Simpson", 
              "Inverse Simpson",
              "Richness")

Restrict dataset to experiment information:

exps <- dat[,c(exp.cols, sub.cols, lab.cols, stat.cols, div.cols)]
exps <- unique(exps)

Subjects

Number of experiments for the top 10 categories for each subjects column:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##      Homo sapiens      Mus musculus Rattus norvegicus 
##              1601                72                 6 
## 
## $`Location of subjects`
## 
##                    China United States of America                    Italy 
##                      504                      446                       72 
##                    Spain                    Japan              South Korea 
##                       53                       49                       45 
##                  Finland                   Canada              Netherlands 
##                       38                       31                       31 
##                   Brazil 
##                       25 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                       980                       112                        52 
##            Uterine cervix                     Mouth              Skin of body 
##                        40                        30                        29 
##               Nasopharynx                 Intestine                  Meconium 
##                        28                        26                        25 
## Subgingival dental plaque 
##                        24 
## 
## $Condition
## 
##                                      obesity 
##                                          101 
##                                     COVID-19 
##                                           89 
##                            colorectal cancer 
##                                           81 
##                          antimicrobial agent 
##                                           77 
##                          Parkinson's disease 
##                                           60 
##                                         diet 
##                                           56 
##                             smoking behavior 
##                                           52 
##              human papilloma virus infection 
##                                           42 
## cervical glandular intraepithelial neoplasia 
##                                           38 
##                               gastric cancer 
##                                           36 
## 
## $`Antibiotics exclusion`
## 
##       3 months        1 month       6 months       2 months        4 weeks 
##            206            119             85             61             54 
##        2 weeks           None        30 days        8 weeks None specified 
##             31             16             15             12             12

Proportions instead:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##      Homo sapiens      Mus musculus Rattus norvegicus 
##           0.95400           0.04290           0.00357 
## 
## $`Location of subjects`
## 
##                    China United States of America                    Italy 
##                   0.3010                   0.2660                   0.0430 
##                    Spain                    Japan              South Korea 
##                   0.0316                   0.0293                   0.0269 
##                  Finland                   Canada              Netherlands 
##                   0.0227                   0.0185                   0.0185 
##                   Brazil 
##                   0.0149 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                    0.5830                    0.0667                    0.0310 
##            Uterine cervix                     Mouth              Skin of body 
##                    0.0238                    0.0179                    0.0173 
##               Nasopharynx                 Intestine                  Meconium 
##                    0.0167                    0.0155                    0.0149 
## Subgingival dental plaque 
##                    0.0143 
## 
## $Condition
## 
##                                      obesity 
##                                       0.0603 
##                                     COVID-19 
##                                       0.0532 
##                            colorectal cancer 
##                                       0.0484 
##                          antimicrobial agent 
##                                       0.0460 
##                          Parkinson's disease 
##                                       0.0358 
##                                         diet 
##                                       0.0335 
##                             smoking behavior 
##                                       0.0311 
##              human papilloma virus infection 
##                                       0.0251 
## cervical glandular intraepithelial neoplasia 
##                                       0.0227 
##                               gastric cancer 
##                                       0.0215 
## 
## $`Antibiotics exclusion`
## 
##       3 months        1 month       6 months       2 months        4 weeks 
##         0.1990         0.1150         0.0820         0.0589         0.0521 
##        2 weeks           None        30 days        8 weeks None specified 
##         0.0299         0.0154         0.0145         0.0116         0.0116

Sample size:

ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize
##         Group 0 sample size Group 1 sample size
## Min.                0.00000             1.00000
## 1st Qu.            14.00000            13.00000
## Median             27.00000            25.00000
## Mean               64.62247            78.79228
## 3rd Qu.            54.00000            47.00000
## Max.             1883.00000          9623.00000
## NA's               53.00000            50.00000

Lab analysis

Number of experiments for the top 10 categories for each lab analysis column:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##  16S  WMS  PCR 
## 1527  138    2 
## 
## $`16S variable region`
## 
##   34    4  123   12   45  345    3   56   23 1234 
##  513  402  128   77   63   61   38   18   15   12 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                        1172                         237 
##                 Ion Torrent                     RT-qPCR 
##                          95                          85 
## Human Intestinal Tract Chip                 MGISEQ-2000 
##                          12                          11 
##                      Sanger           Mass spectrometry 
##                           6                           5 
##       BGISEQ-500 Sequencing           HTF-Microbi.Array 
##                           4                           4

Proportions instead:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##    16S    WMS    PCR 
## 0.9160 0.0828 0.0012 
## 
## $`16S variable region`
## 
##      34       4     123      12      45     345       3      56      23    1234 
## 0.37500 0.29400 0.09360 0.05630 0.04610 0.04460 0.02780 0.01320 0.01100 0.00877 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                     0.71100                     0.14400 
##                 Ion Torrent                     RT-qPCR 
##                     0.05760                     0.05150 
## Human Intestinal Tract Chip                 MGISEQ-2000 
##                     0.00728                     0.00667 
##                      Sanger           Mass spectrometry 
##                     0.00364                     0.00303 
##       BGISEQ-500 Sequencing           HTF-Microbi.Array 
##                     0.00243                     0.00243

Statistical analysis

Number of experiments for the top 10 categories for each statistical analysis column:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                        LEfSe      Mann-Whitney (Wilcoxon) 
##                          461                          455 
##                       DESeq2              Kruskall-Wallis 
##                          123                          108 
##                       T-Test                        ANOVA 
##                           88                           71 
##            Linear Regression                    PERMANOVA 
##                           49                           33 
## Negative Binomial Regression                    Metastats 
##                           31                           28 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##   784   723 
## 
## $`Significance threshold`
## 
##  0.05   0.1  0.01 0.001  0.15   0.2     2 0.005     0  0.25 
##  1424    70    44    17    16    13    11     4     3     3

Proportions instead:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                        LEfSe      Mann-Whitney (Wilcoxon) 
##                       0.2800                       0.2760 
##                       DESeq2              Kruskall-Wallis 
##                       0.0747                       0.0656 
##                       T-Test                        ANOVA 
##                       0.0535                       0.0431 
##            Linear Regression                    PERMANOVA 
##                       0.0298                       0.0200 
## Negative Binomial Regression                    Metastats 
##                       0.0188                       0.0170 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##  0.52  0.48 
## 
## $`Significance threshold`
## 
##    0.05     0.1    0.01   0.001    0.15     0.2       2   0.005       0    0.25 
## 0.88200 0.04340 0.02730 0.01050 0.00991 0.00805 0.00682 0.00248 0.00186 0.00186

Alpha diversity

Overall distribution:

apply(exps[,div.cols], 2, table)
##           Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased     18     243   143      77              17      124
## increased     14     176    83      47              18      121
## unchanged     42     575   340     225              40      294

Correspondence of Shannon diversity and Richness:

table(exps$Shannon, exps$Richness)
##            
##             decreased increased unchanged
##   decreased        61         5        20
##   increased         3        48        18
##   unchanged        21        31       222

Conditions with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Condition")
##                                              increased decreased unchanged
## COVID-19                                             5        15        33
## gastric cancer                                       3        12        14
## Aging                                                0         7         0
## human papilloma virus infection                      7         0        24
## HIV infection                                        1         7        10
## lung cancer                                          2         8         5
## urinary tract infection                              0         6         3
## cesarean section                                     5         0        14
## Gut microbiome measurement                           0         5         0
## Parkinson's disease                                 11         6        15
## smoking behavior                                    10         5        17
## acute lymphoblastic leukemia                         0         4         4
## antimicrobial agent                                  5         9        22
## cervical cancer                                      4         0         4
## hypertension                                         4         0         2
## obesity                                              3         7        36
## age                                                  2         5         2
## alcohol drinking                                     3         0         2
## Alzheimer's disease                                  0         3         2
## atopic asthma                                        4         1         7
## Crohn's disease                                      0         3         2
## periodontitis                                        3         0         3
## squamous cell carcinoma                              3         0         3
## cervical glandular intraepithelial neoplasia         2         0         9
## colorectal cancer                                    6         8        24
## Eczema                                               0         2        10
## ethnic group                                         4         6         7
## food allergy                                         0         2        10
## schizophrenia                                        1         3         7
## socioeconomic status                                 4         2         4
## acute myeloid leukemia                               2         3         2
## air pollution                                        7         6         3
## asthma                                               1         0        11
## atopic eczema                                        2         3        17
## chronic fatigue syndrome                             0         1         4
## chronic hepatitis B virus infection                  0         1         5
## diet                                                 6         5        14
## endometriosis                                        2         3        17
## esophageal cancer                                    1         2         2
## irritable bowel syndrome                             1         0         6
## milk allergic reaction                               1         0         5
## multiple sclerosis                                   0         1         7
## pancreatic carcinoma                                 0         1         4
## Response to immunochemotherapy                       2         1         3
## arthritis                                            1         1         5
## colorectal adenoma                                   2         2         5
## gestational diabetes                                 0         0         5
## hepatocellular carcinoma                             0         0         6
## HIV mother to child transmission                     0         0         5
## obsessive-compulsive disorder                        0         0         5
## rheumatoid arthritis                                 3         3         4
## type II diabetes mellitus                            2         2         9
tabDiv(exps, "Shannon", "Condition", perc = TRUE)
##                                              increased decreased unchanged
## COVID-19                                         0.094     0.280      0.62
## gastric cancer                                   0.100     0.410      0.48
## Aging                                            0.000     1.000      0.00
## human papilloma virus infection                  0.230     0.000      0.77
## HIV infection                                    0.056     0.390      0.56
## lung cancer                                      0.130     0.530      0.33
## urinary tract infection                          0.000     0.670      0.33
## cesarean section                                 0.260     0.000      0.74
## Gut microbiome measurement                       0.000     1.000      0.00
## Parkinson's disease                              0.340     0.190      0.47
## smoking behavior                                 0.310     0.160      0.53
## acute lymphoblastic leukemia                     0.000     0.500      0.50
## antimicrobial agent                              0.140     0.250      0.61
## cervical cancer                                  0.500     0.000      0.50
## hypertension                                     0.670     0.000      0.33
## obesity                                          0.065     0.150      0.78
## age                                              0.220     0.560      0.22
## alcohol drinking                                 0.600     0.000      0.40
## Alzheimer's disease                              0.000     0.600      0.40
## atopic asthma                                    0.330     0.083      0.58
## Crohn's disease                                  0.000     0.600      0.40
## periodontitis                                    0.500     0.000      0.50
## squamous cell carcinoma                          0.500     0.000      0.50
## cervical glandular intraepithelial neoplasia     0.180     0.000      0.82
## colorectal cancer                                0.160     0.210      0.63
## Eczema                                           0.000     0.170      0.83
## ethnic group                                     0.240     0.350      0.41
## food allergy                                     0.000     0.170      0.83
## schizophrenia                                    0.091     0.270      0.64
## socioeconomic status                             0.400     0.200      0.40
## acute myeloid leukemia                           0.290     0.430      0.29
## air pollution                                    0.440     0.380      0.19
## asthma                                           0.083     0.000      0.92
## atopic eczema                                    0.091     0.140      0.77
## chronic fatigue syndrome                         0.000     0.200      0.80
## chronic hepatitis B virus infection              0.000     0.170      0.83
## diet                                             0.240     0.200      0.56
## endometriosis                                    0.091     0.140      0.77
## esophageal cancer                                0.200     0.400      0.40
## irritable bowel syndrome                         0.140     0.000      0.86
## milk allergic reaction                           0.170     0.000      0.83
## multiple sclerosis                               0.000     0.120      0.88
## pancreatic carcinoma                             0.000     0.200      0.80
## Response to immunochemotherapy                   0.330     0.170      0.50
## arthritis                                        0.140     0.140      0.71
## colorectal adenoma                               0.220     0.220      0.56
## gestational diabetes                             0.000     0.000      1.00
## hepatocellular carcinoma                         0.000     0.000      1.00
## HIV mother to child transmission                 0.000     0.000      1.00
## obsessive-compulsive disorder                    0.000     0.000      1.00
## rheumatoid arthritis                             0.300     0.300      0.40
## type II diabetes mellitus                        0.150     0.150      0.69
tabDiv(exps, "Richness", "Condition")
##                                              increased decreased unchanged
## COVID-19                                             4        15        17
## air pollution                                       14         5         4
## antimicrobial agent                                  1        10         5
## Parkinson's disease                                 13         4         3
## alcohol drinking                                     5         0         0
## acute lymphoblastic leukemia                         5         1         0
## cervical glandular intraepithelial neoplasia         4         0         2
## atopic asthma                                        4         1         7
## food allergy                                         0         3         9
## gastric cancer                                       3         6        13
## HIV infection                                        0         3         7
## human papilloma virus infection                      4         1        13
## smoking behavior                                     1         4         8
## colorectal adenoma                                   0         2         8
## endometriosis                                        3         1        15
## asthma                                               1         0        10
## atopic eczema                                        2         1         7
## colorectal cancer                                    6         5         9
## diet                                                 3         2         6
## ethnic group                                         3         2         1
## irritable bowel syndrome                             0         1         7
## lung cancer                                          0         1         6
## obesity                                              6         7        18
## obsessive-compulsive disorder                        0         1         4
## schizophrenia                                        1         2         2
## cesarean section                                     2         2         7
## HIV mother to child transmission                     0         0         5
## multiple sclerosis                                   0         0         9
## socioeconomic status                                 2         2         1
tabDiv(exps, "Richness", "Condition", perc = TRUE)
##                                              increased decreased unchanged
## COVID-19                                         0.110     0.420      0.47
## air pollution                                    0.610     0.220      0.17
## antimicrobial agent                              0.062     0.620      0.31
## Parkinson's disease                              0.650     0.200      0.15
## alcohol drinking                                 1.000     0.000      0.00
## acute lymphoblastic leukemia                     0.830     0.170      0.00
## cervical glandular intraepithelial neoplasia     0.670     0.000      0.33
## atopic asthma                                    0.330     0.083      0.58
## food allergy                                     0.000     0.250      0.75
## gastric cancer                                   0.140     0.270      0.59
## HIV infection                                    0.000     0.300      0.70
## human papilloma virus infection                  0.220     0.056      0.72
## smoking behavior                                 0.077     0.310      0.62
## colorectal adenoma                               0.000     0.200      0.80
## endometriosis                                    0.160     0.053      0.79
## asthma                                           0.091     0.000      0.91
## atopic eczema                                    0.200     0.100      0.70
## colorectal cancer                                0.300     0.250      0.45
## diet                                             0.270     0.180      0.55
## ethnic group                                     0.500     0.330      0.17
## irritable bowel syndrome                         0.000     0.120      0.88
## lung cancer                                      0.000     0.140      0.86
## obesity                                          0.190     0.230      0.58
## obsessive-compulsive disorder                    0.000     0.200      0.80
## schizophrenia                                    0.200     0.400      0.40
## cesarean section                                 0.180     0.180      0.64
## HIV mother to child transmission                 0.000     0.000      1.00
## multiple sclerosis                               0.000     0.000      1.00
## socioeconomic status                             0.400     0.400      0.20

Body sites with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Body site")
##                            increased decreased unchanged
## Feces                             75       147       343
## Mouth                              9         2        11
## Duodenum                           0         6         0
## Stomach                            3         9         5
## Meconium                           5         0         7
## Posterior fornix of vagina         5         0         3
## Subgingival dental plaque          7         2         7
## Tongue                             0         5         8
## Buccal mucosa                      4         0         1
## Vagina                             9         5        10
## Caecum                             1         4         1
## Dental plaque                      0         3         3
## Oral cavity                        3         0         2
## Skin of body                       4         7         6
## Uterine cervix                     3         0        23
## Nasopharynx                        1         3        18
## Breast                             3         2         0
## Lung                               1         2         7
## Oropharynx                         1         2         3
## Saliva                            19        20        40
## Uterus                             0         1         7
## Blood                              0         0         5
## Bronchus                           0         0         6
## Intestine                          1         1         6
## Rectum                             0         0        12
tabDiv(exps, "Shannon", "Body site", perc = TRUE)
##                            increased decreased unchanged
## Feces                          0.130     0.260      0.61
## Mouth                          0.410     0.091      0.50
## Duodenum                       0.000     1.000      0.00
## Stomach                        0.180     0.530      0.29
## Meconium                       0.420     0.000      0.58
## Posterior fornix of vagina     0.620     0.000      0.38
## Subgingival dental plaque      0.440     0.120      0.44
## Tongue                         0.000     0.380      0.62
## Buccal mucosa                  0.800     0.000      0.20
## Vagina                         0.380     0.210      0.42
## Caecum                         0.170     0.670      0.17
## Dental plaque                  0.000     0.500      0.50
## Oral cavity                    0.600     0.000      0.40
## Skin of body                   0.240     0.410      0.35
## Uterine cervix                 0.120     0.000      0.88
## Nasopharynx                    0.045     0.140      0.82
## Breast                         0.600     0.400      0.00
## Lung                           0.100     0.200      0.70
## Oropharynx                     0.170     0.330      0.50
## Saliva                         0.240     0.250      0.51
## Uterus                         0.000     0.120      0.88
## Blood                          0.000     0.000      1.00
## Bronchus                       0.000     0.000      1.00
## Intestine                      0.120     0.120      0.75
## Rectum                         0.000     0.000      1.00
tabDiv(exps, "Richness", "Body site")
##                            increased decreased unchanged
## Feces                             57        70       157
## Colon                              8         0         2
## Mouth                              7         1         5
## Oropharynx                         0         6         3
## Posterior fornix of vagina         6         0         2
## Uterine cervix                     6         0        16
## Stomach                            2         7         3
## Subgingival dental plaque          4         0         4
## Nasopharynx                        3         5        10
## Rectum                             0         2         7
## Saliva                             5         7        19
## Tongue                             0         2         5
## Vagina                             3         1        11
## Caecum                             2         3         0
## Meconium                           1         2         4
## Bronchus                           0         0         6
## Intestine                          0         0         7
tabDiv(exps, "Richness", "Body site", perc = TRUE)
##                            increased decreased unchanged
## Feces                           0.20     0.250      0.55
## Colon                           0.80     0.000      0.20
## Mouth                           0.54     0.077      0.38
## Oropharynx                      0.00     0.670      0.33
## Posterior fornix of vagina      0.75     0.000      0.25
## Uterine cervix                  0.27     0.000      0.73
## Stomach                         0.17     0.580      0.25
## Subgingival dental plaque       0.50     0.000      0.50
## Nasopharynx                     0.17     0.280      0.56
## Rectum                          0.00     0.220      0.78
## Saliva                          0.16     0.230      0.61
## Tongue                          0.00     0.290      0.71
## Vagina                          0.20     0.067      0.73
## Caecum                          0.40     0.600      0.00
## Meconium                        0.14     0.290      0.57
## Bronchus                        0.00     0.000      1.00
## Intestine                       0.00     0.000      1.00

Signature stats

sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")

Unique microbes

Number unique microbes contained in the signatures:

(nuniq <- length(unique(unlist(sigs))))
## [1] 2406

Development of unique microbes captured over time:

Microbe set size distribution

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   4.000   7.477   9.000 100.000
gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
    xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())

sum(lengths(sigs) > 4)
## [1] 1440

Microbe co-occurrence

dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus")
## Loading required namespace: safe

antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", antagonistic = TRUE)

Get the top 20 genera most frequently reported as differentially abundant:

sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname", 
                            tax.level = "genus", exact.tax.level = FALSE) 
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20
## 
##      Bacteroides  Bifidobacterium Faecalibacterium       Prevotella 
##              313              216              202              197 
##    Streptococcus          Blautia      Clostridium        Roseburia 
##              196              173              171              170 
##     Ruminococcus    Lactobacillus  Parabacteroides        Alistipes 
##              168              146              137              126 
##      Coprococcus            Dorea      Eubacterium      Veillonella 
##              123              121              121              116 
##     Enterococcus     Anaerostipes      Lachnospira      Akkermansia 
##              111              104              104              103

Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:

all(names(top20) %in% rownames(cooc.mat))
## [1] TRUE
cooc.mat <- cooc.mat[names(top20), names(top20)]
all(names(top20) %in% rownames(antag.mat))
## [1] TRUE
antag.mat <- antag.mat[names(top20), names(top20)]

Distinguish by direction of abundance change (increased / decreased):

# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                               tax.level = "genus", exact.tax.level = FALSE) 
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up
## 
##      Bacteroides  Bifidobacterium Faecalibacterium       Prevotella 
##              126               90               68              102 
##    Streptococcus          Blautia      Clostridium        Roseburia 
##              122               71               86               50 
##     Ruminococcus    Lactobacillus  Parabacteroides        Alistipes 
##               66              101               67               46 
##      Coprococcus            Dorea      Eubacterium      Veillonella 
##               37               48               49               76 
##     Enterococcus     Anaerostipes      Lachnospira      Akkermansia 
##               92               37               23               64
# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                                 tax.level = "genus", exact.tax.level = FALSE) 
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down
## 
##      Bacteroides  Bifidobacterium Faecalibacterium       Prevotella 
##              179              117              129               92 
##    Streptococcus          Blautia      Clostridium        Roseburia 
##               65               97               81              115 
##     Ruminococcus    Lactobacillus  Parabacteroides        Alistipes 
##               98               44               65               75 
##      Coprococcus            Dorea      Eubacterium      Veillonella 
##               81               68               67               36 
##     Enterococcus     Anaerostipes      Lachnospira      Akkermansia 
##               16               62               76               34

Plot the heatmap

# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
                                                  col = c("#D55E00", "#0072B2")),
                                   height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)

lgd_list <- list(
    Legend(labels = c("increased", "decreased"), 
           title = "Abundance in Group 1", 
           type = "grid",
           legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
                                            
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
                               col = col, 
                               name = "Relative frequency (top)",
                               cluster_columns = FALSE, 
                               row_km = 3, 
                               row_title = "same direction", 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
                               col = col, 
                               name = "Relative frequency (bottom)",
                               cluster_columns = FALSE, 
                               row_title = "opposite direction", 
                               row_km = 3, 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan", 
                                tax.level = "genus", exact.tax.level = FALSE) 
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
                                    tax.id.type = "taxname",
                                    tax.level = "phylum",
                                    exact.tax.level = FALSE) 
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)

uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
                                 col = pcols[phyla.grid],
                                 cluster_columns = FALSE,
                                 column_names_rot = 45,
                                 column_names_gp = gpar(fontsize = 8))

# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)

decorate_annotation("Abundance in Group 1", {
    grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
    grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
    grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
    grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
    grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
    grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
    grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})

Signature similarity

Jaccard index

Inspect signature similarity for signatures from stomach samples based on Jaccard index:

stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)

Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).

jdist <- as.dist(1 - pair.jsim)
plot(hclust(jdist))