• BugSigDB Stats and Analysis

Setup

library(bugsigdbr)
library(BugSigDBStats)
library(ComplexHeatmap)
library(ggpubr)

Reading data

Get bulk export from bugsigdb.org:

full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)

## [1] 9079   50

colnames(full.dat)

##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"

Stripping illformed entries:

is.study <- !is.na(full.dat[["Study"]])
is.exp <- !is.na(full.dat[["Experiment"]])
full.dat <- full.dat[is.study & is.exp, ]

Curation output

Number of papers and signatures curated:

pmids <- unique(full.dat[,"PMID"])
length(pmids)

## [1] 1465

nrow(full.dat)

## [1] 9079

Publication date of the curated papers:

pmids <- pmids[!is.na(pmids)]
pubyear <- pmid2pubyear(pmids)
head(cbind(pmids, pubyear))

tab <- table(pubyear)
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers", 
          label = TRUE, fill = "steelblue",
          ggtheme = theme_bw())

Stripping empty signatures:

ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)

## [1] 9079

Papers containing only empty UP and DOWN signatures (under curation?):

setdiff(pmids, unique(dat[,"PMID"]))

## numeric(0)

Progress over time:

dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)

plotProgressOverTime(dat, diff = TRUE)

Stratified by curator:

npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)

Number of complete and revised signatures: Turned off because it’s way too long these days

table(dat[["State"]])
table(dat[,"Revision editor"])

Study stats

Study design

spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)

##                                       case-control,prospective cohort 
##                                                                     1 
##    cross-sectional observational, not case-control,prospective cohort 
##                                                                     2 
##                                   laboratory experiment,meta-analysis 
##                                                                     2 
##           prospective cohort,time series / longitudinal observational 
##                                                                     2 
##                 case-control,time series / longitudinal observational 
##                                                                     3 
## cross-sectional observational, not case-control,laboratory experiment 
##                                                                     4 
##                                            case-control,meta-analysis 
##                                                                     5 
##        laboratory experiment,time series / longitudinal observational 
##                                                                     5 
##                                    case-control,laboratory experiment 
##                                                                     6 
##                                                         meta-analysis 
##                                                                    18 
##                                           randomized controlled trial 
##                                                                    67 
##                                                    prospective cohort 
##                                                                   115 
##                              time series / longitudinal observational 
##                                                                   131 
##                                                 laboratory experiment 
##                                                                   157 
##                       cross-sectional observational, not case-control 
##                                                                   393 
##                                                          case-control 
##                                                                   568

Experiment stats

Columns of the full dataset that describe experiments:

# Experiment ID
exp.cols <- c("Study", "Experiment")

# Subjects
sub.cols <- c("Host species",    
              "Location of subjects", 
              "Body site",
              "Condition", 
              "Antibiotics exclusion",
              "Group 0 sample size",
              "Group 1 sample size")

# Lab analysis              
lab.cols <-  c("Sequencing type",
              "16S variable region",
              "Sequencing platform")

# Statistical analysis
stat.cols <-  c("Statistical test",
              "MHT correction",
              "Significance threshold")

# Alpha diversity
div.cols <- c("Pielou",
              "Shannon",
              "Chao1",
              "Simpson", 
              "Inverse Simpson",
              "Richness")

Restrict dataset to experiment information:

exps <- dat[,c(exp.cols, sub.cols, lab.cols, stat.cols, div.cols)]
exps <- unique(exps)

Subjects

Number of experiments for the top 10 categories for each subjects column:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10)
names(sub.tab) <- sub.cols[1:5]
sub.tab

## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                   4408                    583                    135 
##  Sus scrofa domesticus Canis lupus familiaris             Ovis aries 
##                     88                     74                     24 
##          Not specified             Bos taurus           Capra hircus 
##                     23                     18                     14 
##          Gallus gallus 
##                     14 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                     1844                      956                      173 
##                    Japan                  Denmark                    Italy 
##                      168                      150                      130 
##                Australia              Netherlands              South Korea 
##                      112                      112                      101 
##                    Spain 
##                       96 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                      3529                       292                       112 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                        67                        55                        49 
##              Skin of body            Uterine cervix                     Colon 
##                        47                        47                        46 
##                    Throat 
##                        41 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                           197                           148 
## Treatment outcome measurement                       Obesity 
##                           146                           141 
##             Colorectal cancer                      COVID-19 
##                           134                           116 
##                 Atopic eczema           Antimicrobial agent 
##                            90                            85 
##           Alzheimer's disease        Response to transplant 
##                            77                            75 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                      592                      505                      242 
##                 6 months                  2 weeks                 3 Months 
##                      180                      132                       50 
##                 6 Months                  1 Month currently on antibiotics 
##                       39                       28                       28 
##                3 Months. 
##                       25

Proportions instead:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab

## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                0.79900                0.10600                0.02450 
##  Sus scrofa domesticus Canis lupus familiaris             Ovis aries 
##                0.01590                0.01340                0.00435 
##          Not specified             Bos taurus           Capra hircus 
##                0.00417                0.00326                0.00254 
##          Gallus gallus 
##                0.00254 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                   0.3340                   0.1730                   0.0314 
##                    Japan                  Denmark                    Italy 
##                   0.0305                   0.0272                   0.0236 
##                Australia              Netherlands              South Korea 
##                   0.0203                   0.0203                   0.0183 
##                    Spain 
##                   0.0174 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                   0.64000                   0.05290                   0.02030 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                   0.01210                   0.00997                   0.00888 
##              Skin of body            Uterine cervix                     Colon 
##                   0.00852                   0.00852                   0.00834 
##                    Throat 
##                   0.00743 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                        0.0371                        0.0278 
## Treatment outcome measurement                       Obesity 
##                        0.0275                        0.0265 
##             Colorectal cancer                      COVID-19 
##                        0.0252                        0.0218 
##                 Atopic eczema           Antimicrobial agent 
##                        0.0169                        0.0160 
##           Alzheimer's disease        Response to transplant 
##                        0.0145                        0.0141 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                   0.2460                   0.2100                   0.1000 
##                 6 months                  2 weeks                 3 Months 
##                   0.0747                   0.0548                   0.0207 
##                 6 Months                  1 Month currently on antibiotics 
##                   0.0162                   0.0116                   0.0116 
##                3 Months. 
##                   0.0104

Sample size:

ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize

##         Group 0 sample size Group 1 sample size
## Min.                 0.0000             1.00000
## 1st Qu.             12.0000            10.00000
## Median              24.0000            21.00000
## Mean               552.9231            66.91259
## 3rd Qu.             50.0000            43.00000
## Max.            308633.0000         10413.00000
## NA's               798.0000           793.00000

Lab analysis

Number of experiments for the top 10 categories for each lab analysis column:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10)
names(lab.tab) <- lab.cols
lab.tab

## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##       4365        799         57         29          5 
## 
## $`16S variable region`
## 
##   34    4   12  123   45  345    3   56  678   23 
## 1761 1214  290  214  153  140   62   51   31   21 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                        4389                         328 
##                 Ion Torrent                     RT-qPCR 
##                         203                         103 
##                 MGISEQ-2000                   PacBio RS 
##                          37                          25 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                          18                          18 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                          16                          11

Proportions instead:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab

## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##   0.831000   0.152000   0.010800   0.005520   0.000951 
## 
## $`16S variable region`
## 
##      34       4      12     123      45     345       3      56     678      23 
## 0.43500 0.30000 0.07160 0.05290 0.03780 0.03460 0.01530 0.01260 0.00766 0.00519 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                     0.84600                     0.06320 
##                 Ion Torrent                     RT-qPCR 
##                     0.03910                     0.01990 
##                 MGISEQ-2000                   PacBio RS 
##                     0.00713                     0.00482 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                     0.00347                     0.00347 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                     0.00308                     0.00212

Statistical analysis

Number of experiments for the top 10 categories for each statistical analysis column:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10)
names(stat.tab) <- stat.cols
stat.tab

## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                    1629                     841                     514 
##         Kruskall-Wallis       Linear Regression                   ANOVA 
##                     283                     257                     244 
##                  T-Test                MaAsLin2                   ANCOM 
##                     209                     182                     167 
##    Spearman Correlation 
##                      90 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##  2733  2133 
## 
## $`Significance threshold`
## 
##  0.05   0.1  0.01 0.001  0.25   0.2  0.15 0.005     2 1e-04 
##  4603   332   109    34    34    25    19    15    15     6

Proportions instead:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab

## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                  0.3120                  0.1610                  0.0984 
##         Kruskall-Wallis       Linear Regression                   ANOVA 
##                  0.0542                  0.0492                  0.0467 
##                  T-Test                MaAsLin2                   ANCOM 
##                  0.0400                  0.0348                  0.0320 
##    Spearman Correlation 
##                  0.0172 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
## 0.562 0.438 
## 
## $`Significance threshold`
## 
##    0.05     0.1    0.01   0.001    0.25     0.2    0.15   0.005       2   1e-04 
## 0.88200 0.06360 0.02090 0.00652 0.00652 0.00479 0.00364 0.00288 0.00288 0.00115

Alpha diversity

Overall distribution:

apply(exps[,div.cols], 2, table)

##           Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased     48     698   448     224              64      431
## increased     37     542   312     162              36      326
## unchanged    174    1950   875     696             202      983

Correspondence of Shannon diversity and Richness:

table(exps$Shannon, exps$Richness)

##            
##             decreased increased unchanged
##   decreased       231        15        54
##   increased         9       156        52
##   unchanged        97        83       794

Conditions with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Condition")

##                                                           increased decreased
## Treatment outcome measurement                                    11        28
## COVID-19                                                          9        24
## Obesity                                                           3        16
## Polycystic ovary syndrome                                         0        13
## Pulmonary tuberculosis                                            2        14
## HIV infection                                                     1        12
## Smoking behaviour measurement                                     2        13
## Clostridium difficile infection                                  10         0
## Dry eye syndrome                                                  1        11
## Systemic inflammatory response syndrome                           5        15
## Chronic constipation                                              9         0
## Human papilloma virus infection                                  10         1
## Ulcerative colitis                                                0         9
## Alzheimer's disease                                               2        10
## Gastric cancer                                                    6        14
## Age                                                               5        12
## Aging                                                             0         7
## Atopic eczema                                                     5        11
## Autism spectrum disorder                                          7         1
## Cesarean section                                                  6         0
## Constipation                                                      6         0
## Epilepsy                                                          6         0
## Lung cancer                                                       2         8
## Parkinson's disease                                              20        14
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Urinary tract infection                                           0         6
## Cervical cancer                                                   5         0
## Diet                                                             14        19
## Helminthiasis                                                     5         0
## Population                                                        2         7
## Spontaneous preterm birth                                        12         7
## Acute lymphoblastic leukemia                                      0         4
## Acute pancreatitis                                                0         4
## Colitis                                                           4         0
## Colorectal cancer                                                10        14
## Crohn's disease                                                   0         4
## Ethnic group                                                      3         7
## Food allergy                                                      6         2
## Human immunodeficiency virus                                      0         4
## Hypertension                                                      7         3
## Periodontitis                                                     5         1
## Pregnancy                                                         4         0
## Response to antibiotic                                            0         4
## Alcohol drinking                                                  3         0
## Atopic asthma                                                     4         1
## Birth measurement                                                 3         0
## Delivery method                                                   1         4
## Extraction protocol                                              23        26
## Male homosexuality                                                3         0
## Oral lichen planus                                                3         0
## SARS-CoV-2-related disease                                        0         3
## Schizophrenia                                                     1         4
## Type II diabetes mellitus                                         2         5
## Age at assessment                                                 3         1
## Antimicrobial agent                                               8        10
## Breed                                                             0         2
## Cervical glandular intraepithelial neoplasia                      2         0
## Chronic kidney disease                                            2         4
## Cognitive impairment                                              1         3
## Depressive disorder                                               0         2
## Diarrhea                                                          6         4
## Eczema                                                            0         2
## Endometrial cancer                                                4         2
## Esophageal adenocarcinoma                                         0         2
## Iron biomarker measurement                                        1         3
## Milk allergic reaction                                            2         0
## Papillary thyroid carcinoma                                       2         0
## Phenylketonuria                                                   1         3
## Response to anti-tuberculosis drug                                8        10
## Response to antiviral drug                                        2         4
## Response to immunochemotherapy                                    3         1
## Sampling site                                                     3         1
## Smoking behavior                                                 10         8
## Smoking status measurement                                        2         0
## Squamous cell carcinoma                                           2         0
## Streptococcus pneumoniae                                          0         2
## Stroke                                                            2         0
## Traditional Chinese medicine type                                 2         4
## Acute respiratory failure                                         6         5
## Air pollution                                                     7         6
## Anxiety disorder                                                  0         1
## Breastfeeding duration                                            2         3
## Chlamydia trachomatis                                             1         2
## Chronic fatigue syndrome                                          0         1
## Chronic hepatitis B virus infection                               0         1
## Chronic obstructive pulmonary disease                             3         2
## Diabetes mellitus                                                 0         1
## Endometriosis                                                     2         3
## Esophageal cancer                                                 1         2
## Gestational diabetes                                              1         0
## Hepatocellular carcinoma                                          0         1
## Hypertrophy                                                       1         0
## Irritable bowel syndrome                                          5         6
## Lifestyle measurement                                             0         1
## Multiple sclerosis                                                0         1
## Oral cavity carcinoma                                             0         1
## Oral squamous cell carcinoma                                      3         2
## Oxalate measurement                                               8         7
## Pancreatic carcinoma                                              0         1
## Psoriasis                                                         1         0
## Respiratory Syncytial Virus Infection                             0         1
## Respiratory tract infectious disease                              0         1
## Response to diet                                                  3         4
## Response to transplant                                            9         8
## Response to vaccine                                               1         0
## Rheumatoid arthritis                                              5         4
## Sample treatment protocol                                         1         0
## Sampling time                                                     4         3
## Social interaction measurement                                    2         1
## Socioeconomic status                                              3         4
## Treatment                                                         1         0
## Type I diabetes mellitus                                          0         1
## Vesicle membrane                                                  3         2
## Vitiligo                                                          0         1
## Abnormal stool composition                                        0         0
## Acute myeloid leukemia                                            1         1
## Arthritis                                                         0         0
## Asthma                                                            1         1
## Biological sex                                                    1         1
## Bipolar disorder                                                  0         0
## Breast cancer                                                     4         4
## Celiac disease                                                    0         0
## Clinical treatment                                                1         1
## Colorectal adenoma                                                2         2
## Contraception                                                     0         0
## COVID-19 symptoms measurement                                     0         0
## Diet measurement                                                  0         0
## Disease progression measurement                                   0         0
## Functional abnormality of the gastrointestinal tract              0         0
## Gastric adenocarcinoma                                            0         0
## Head and neck squamous cell carcinoma                             0         0
## Health study participation                                        2         2
## HIV mother to child transmission                                  0         0
## Lactose intolerance                                               0         0
## Lung transplantation                                              2         2
## Obsessive-compulsive disorder                                     0         0
## Ovarian cancer                                                    3         3
## Phenotype                                                         2         2
## Psoriasis vulgaris                                                0         0
## Response to ketogenic diet                                        2         2
## Sample collection protocol                                        0         0
## SARS coronavirus                                                  0         0
## Simian immunodeficiency virus infection                           0         0
## Timepoint                                                         0         0
## Transplant outcome measurement                                    0         0
## Viral load                                                        0         0
## Waist circumference                                               0         0
##                                                           unchanged
## Treatment outcome measurement                                    68
## COVID-19                                                         42
## Obesity                                                          58
## Polycystic ovary syndrome                                        13
## Pulmonary tuberculosis                                            0
## HIV infection                                                    26
## Smoking behaviour measurement                                     0
## Clostridium difficile infection                                   1
## Dry eye syndrome                                                 11
## Systemic inflammatory response syndrome                           4
## Chronic constipation                                             10
## Human papilloma virus infection                                  28
## Ulcerative colitis                                                3
## Alzheimer's disease                                              24
## Gastric cancer                                                   26
## Age                                                               9
## Aging                                                             0
## Atopic eczema                                                    72
## Autism spectrum disorder                                          8
## Cesarean section                                                 16
## Constipation                                                      2
## Epilepsy                                                          5
## Lung cancer                                                       7
## Parkinson's disease                                              86
## Response to allogeneic hematopoietic stem cell transplant         0
## Urinary tract infection                                           8
## Cervical cancer                                                   5
## Diet                                                             63
## Helminthiasis                                                     8
## Population                                                       25
## Spontaneous preterm birth                                         5
## Acute lymphoblastic leukemia                                      5
## Acute pancreatitis                                                2
## Colitis                                                           1
## Colorectal cancer                                                44
## Crohn's disease                                                   5
## Ethnic group                                                      6
## Food allergy                                                     19
## Human immunodeficiency virus                                      6
## Hypertension                                                      6
## Periodontitis                                                    10
## Pregnancy                                                         2
## Response to antibiotic                                            8
## Alcohol drinking                                                  2
## Atopic asthma                                                     7
## Birth measurement                                                 4
## Delivery method                                                   2
## Extraction protocol                                              20
## Male homosexuality                                                6
## Oral lichen planus                                                4
## SARS-CoV-2-related disease                                        4
## Schizophrenia                                                    14
## Type II diabetes mellitus                                        24
## Age at assessment                                                 1
## Antimicrobial agent                                              25
## Breed                                                             7
## Cervical glandular intraepithelial neoplasia                      9
## Chronic kidney disease                                            5
## Cognitive impairment                                             10
## Depressive disorder                                               4
## Diarrhea                                                          8
## Eczema                                                           10
## Endometrial cancer                                                3
## Esophageal adenocarcinoma                                         4
## Iron biomarker measurement                                        2
## Milk allergic reaction                                            5
## Papillary thyroid carcinoma                                      10
## Phenylketonuria                                                   4
## Response to anti-tuberculosis drug                               13
## Response to antiviral drug                                        5
## Response to immunochemotherapy                                    3
## Sampling site                                                     7
## Smoking behavior                                                 20
## Smoking status measurement                                        4
## Squamous cell carcinoma                                           4
## Streptococcus pneumoniae                                          4
## Stroke                                                           16
## Traditional Chinese medicine type                                 6
## Acute respiratory failure                                         0
## Air pollution                                                     3
## Anxiety disorder                                                  7
## Breastfeeding duration                                            9
## Chlamydia trachomatis                                             2
## Chronic fatigue syndrome                                          4
## Chronic hepatitis B virus infection                               5
## Chronic obstructive pulmonary disease                             2
## Diabetes mellitus                                                 5
## Endometriosis                                                    14
## Esophageal cancer                                                 2
## Gestational diabetes                                             35
## Hepatocellular carcinoma                                          6
## Hypertrophy                                                       4
## Irritable bowel syndrome                                         22
## Lifestyle measurement                                             8
## Multiple sclerosis                                               17
## Oral cavity carcinoma                                             7
## Oral squamous cell carcinoma                                      3
## Oxalate measurement                                               1
## Pancreatic carcinoma                                              6
## Psoriasis                                                        12
## Respiratory Syncytial Virus Infection                             5
## Respiratory tract infectious disease                              5
## Response to diet                                                 37
## Response to transplant                                           23
## Response to vaccine                                               5
## Rheumatoid arthritis                                              9
## Sample treatment protocol                                         4
## Sampling time                                                     5
## Social interaction measurement                                    6
## Socioeconomic status                                              8
## Treatment                                                         7
## Type I diabetes mellitus                                          6
## Vesicle membrane                                                  1
## Vitiligo                                                          4
## Abnormal stool composition                                        6
## Acute myeloid leukemia                                            4
## Arthritis                                                         6
## Asthma                                                           14
## Biological sex                                                    6
## Bipolar disorder                                                  5
## Breast cancer                                                    20
## Celiac disease                                                    6
## Clinical treatment                                                5
## Colorectal adenoma                                               10
## Contraception                                                     5
## COVID-19 symptoms measurement                                     5
## Diet measurement                                                  8
## Disease progression measurement                                   5
## Functional abnormality of the gastrointestinal tract             27
## Gastric adenocarcinoma                                            8
## Head and neck squamous cell carcinoma                             8
## Health study participation                                       35
## HIV mother to child transmission                                  8
## Lactose intolerance                                               5
## Lung transplantation                                              2
## Obsessive-compulsive disorder                                     5
## Ovarian cancer                                                   27
## Phenotype                                                        19
## Psoriasis vulgaris                                               14
## Response to ketogenic diet                                        3
## Sample collection protocol                                        9
## SARS coronavirus                                                  6
## Simian immunodeficiency virus infection                           5
## Timepoint                                                         5
## Transplant outcome measurement                                   11
## Viral load                                                        6
## Waist circumference                                               5

tabDiv(exps, "Shannon", "Condition", perc = TRUE)

##                                                           increased decreased
## Treatment outcome measurement                                 0.100     0.260
## COVID-19                                                      0.120     0.320
## Obesity                                                       0.039     0.210
## Polycystic ovary syndrome                                     0.000     0.500
## Pulmonary tuberculosis                                        0.120     0.880
## HIV infection                                                 0.026     0.310
## Smoking behaviour measurement                                 0.130     0.870
## Clostridium difficile infection                               0.910     0.000
## Dry eye syndrome                                              0.043     0.480
## Systemic inflammatory response syndrome                       0.210     0.620
## Chronic constipation                                          0.470     0.000
## Human papilloma virus infection                               0.260     0.026
## Ulcerative colitis                                            0.000     0.750
## Alzheimer's disease                                           0.056     0.280
## Gastric cancer                                                0.130     0.300
## Age                                                           0.190     0.460
## Aging                                                         0.000     1.000
## Atopic eczema                                                 0.057     0.120
## Autism spectrum disorder                                      0.440     0.062
## Cesarean section                                              0.270     0.000
## Constipation                                                  0.750     0.000
## Epilepsy                                                      0.550     0.000
## Lung cancer                                                   0.120     0.470
## Parkinson's disease                                           0.170     0.120
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Urinary tract infection                                       0.000     0.430
## Cervical cancer                                               0.500     0.000
## Diet                                                          0.150     0.200
## Helminthiasis                                                 0.380     0.000
## Population                                                    0.059     0.210
## Spontaneous preterm birth                                     0.500     0.290
## Acute lymphoblastic leukemia                                  0.000     0.440
## Acute pancreatitis                                            0.000     0.670
## Colitis                                                       0.800     0.000
## Colorectal cancer                                             0.150     0.210
## Crohn's disease                                               0.000     0.440
## Ethnic group                                                  0.190     0.440
## Food allergy                                                  0.220     0.074
## Human immunodeficiency virus                                  0.000     0.400
## Hypertension                                                  0.440     0.190
## Periodontitis                                                 0.310     0.062
## Pregnancy                                                     0.670     0.000
## Response to antibiotic                                        0.000     0.330
## Alcohol drinking                                              0.600     0.000
## Atopic asthma                                                 0.330     0.083
## Birth measurement                                             0.430     0.000
## Delivery method                                               0.140     0.570
## Extraction protocol                                           0.330     0.380
## Male homosexuality                                            0.330     0.000
## Oral lichen planus                                            0.430     0.000
## SARS-CoV-2-related disease                                    0.000     0.430
## Schizophrenia                                                 0.053     0.210
## Type II diabetes mellitus                                     0.065     0.160
## Age at assessment                                             0.600     0.200
## Antimicrobial agent                                           0.190     0.230
## Breed                                                         0.000     0.220
## Cervical glandular intraepithelial neoplasia                  0.180     0.000
## Chronic kidney disease                                        0.180     0.360
## Cognitive impairment                                          0.071     0.210
## Depressive disorder                                           0.000     0.330
## Diarrhea                                                      0.330     0.220
## Eczema                                                        0.000     0.170
## Endometrial cancer                                            0.440     0.220
## Esophageal adenocarcinoma                                     0.000     0.330
## Iron biomarker measurement                                    0.170     0.500
## Milk allergic reaction                                        0.290     0.000
## Papillary thyroid carcinoma                                   0.170     0.000
## Phenylketonuria                                               0.120     0.380
## Response to anti-tuberculosis drug                            0.260     0.320
## Response to antiviral drug                                    0.180     0.360
## Response to immunochemotherapy                                0.430     0.140
## Sampling site                                                 0.270     0.091
## Smoking behavior                                              0.260     0.210
## Smoking status measurement                                    0.330     0.000
## Squamous cell carcinoma                                       0.330     0.000
## Streptococcus pneumoniae                                      0.000     0.330
## Stroke                                                        0.110     0.000
## Traditional Chinese medicine type                             0.170     0.330
## Acute respiratory failure                                     0.550     0.450
## Air pollution                                                 0.440     0.380
## Anxiety disorder                                              0.000     0.120
## Breastfeeding duration                                        0.140     0.210
## Chlamydia trachomatis                                         0.200     0.400
## Chronic fatigue syndrome                                      0.000     0.200
## Chronic hepatitis B virus infection                           0.000     0.170
## Chronic obstructive pulmonary disease                         0.430     0.290
## Diabetes mellitus                                             0.000     0.170
## Endometriosis                                                 0.110     0.160
## Esophageal cancer                                             0.200     0.400
## Gestational diabetes                                          0.028     0.000
## Hepatocellular carcinoma                                      0.000     0.140
## Hypertrophy                                                   0.200     0.000
## Irritable bowel syndrome                                      0.150     0.180
## Lifestyle measurement                                         0.000     0.110
## Multiple sclerosis                                            0.000     0.056
## Oral cavity carcinoma                                         0.000     0.120
## Oral squamous cell carcinoma                                  0.380     0.250
## Oxalate measurement                                           0.500     0.440
## Pancreatic carcinoma                                          0.000     0.140
## Psoriasis                                                     0.077     0.000
## Respiratory Syncytial Virus Infection                         0.000     0.170
## Respiratory tract infectious disease                          0.000     0.170
## Response to diet                                              0.068     0.091
## Response to transplant                                        0.220     0.200
## Response to vaccine                                           0.170     0.000
## Rheumatoid arthritis                                          0.280     0.220
## Sample treatment protocol                                     0.200     0.000
## Sampling time                                                 0.330     0.250
## Social interaction measurement                                0.220     0.110
## Socioeconomic status                                          0.200     0.270
## Treatment                                                     0.120     0.000
## Type I diabetes mellitus                                      0.000     0.140
## Vesicle membrane                                              0.500     0.330
## Vitiligo                                                      0.000     0.200
## Abnormal stool composition                                    0.000     0.000
## Acute myeloid leukemia                                        0.170     0.170
## Arthritis                                                     0.000     0.000
## Asthma                                                        0.062     0.062
## Biological sex                                                0.120     0.120
## Bipolar disorder                                              0.000     0.000
## Breast cancer                                                 0.140     0.140
## Celiac disease                                                0.000     0.000
## Clinical treatment                                            0.140     0.140
## Colorectal adenoma                                            0.140     0.140
## Contraception                                                 0.000     0.000
## COVID-19 symptoms measurement                                 0.000     0.000
## Diet measurement                                              0.000     0.000
## Disease progression measurement                               0.000     0.000
## Functional abnormality of the gastrointestinal tract          0.000     0.000
## Gastric adenocarcinoma                                        0.000     0.000
## Head and neck squamous cell carcinoma                         0.000     0.000
## Health study participation                                    0.051     0.051
## HIV mother to child transmission                              0.000     0.000
## Lactose intolerance                                           0.000     0.000
## Lung transplantation                                          0.330     0.330
## Obsessive-compulsive disorder                                 0.000     0.000
## Ovarian cancer                                                0.091     0.091
## Phenotype                                                     0.087     0.087
## Psoriasis vulgaris                                            0.000     0.000
## Response to ketogenic diet                                    0.290     0.290
## Sample collection protocol                                    0.000     0.000
## SARS coronavirus                                              0.000     0.000
## Simian immunodeficiency virus infection                       0.000     0.000
## Timepoint                                                     0.000     0.000
## Transplant outcome measurement                                0.000     0.000
## Viral load                                                    0.000     0.000
## Waist circumference                                           0.000     0.000
##                                                           unchanged
## Treatment outcome measurement                                 0.640
## COVID-19                                                      0.560
## Obesity                                                       0.750
## Polycystic ovary syndrome                                     0.500
## Pulmonary tuberculosis                                        0.000
## HIV infection                                                 0.670
## Smoking behaviour measurement                                 0.000
## Clostridium difficile infection                               0.091
## Dry eye syndrome                                              0.480
## Systemic inflammatory response syndrome                       0.170
## Chronic constipation                                          0.530
## Human papilloma virus infection                               0.720
## Ulcerative colitis                                            0.250
## Alzheimer's disease                                           0.670
## Gastric cancer                                                0.570
## Age                                                           0.350
## Aging                                                         0.000
## Atopic eczema                                                 0.820
## Autism spectrum disorder                                      0.500
## Cesarean section                                              0.730
## Constipation                                                  0.250
## Epilepsy                                                      0.450
## Lung cancer                                                   0.410
## Parkinson's disease                                           0.720
## Response to allogeneic hematopoietic stem cell transplant     0.000
## Urinary tract infection                                       0.570
## Cervical cancer                                               0.500
## Diet                                                          0.660
## Helminthiasis                                                 0.620
## Population                                                    0.740
## Spontaneous preterm birth                                     0.210
## Acute lymphoblastic leukemia                                  0.560
## Acute pancreatitis                                            0.330
## Colitis                                                       0.200
## Colorectal cancer                                             0.650
## Crohn's disease                                               0.560
## Ethnic group                                                  0.380
## Food allergy                                                  0.700
## Human immunodeficiency virus                                  0.600
## Hypertension                                                  0.380
## Periodontitis                                                 0.620
## Pregnancy                                                     0.330
## Response to antibiotic                                        0.670
## Alcohol drinking                                              0.400
## Atopic asthma                                                 0.580
## Birth measurement                                             0.570
## Delivery method                                               0.290
## Extraction protocol                                           0.290
## Male homosexuality                                            0.670
## Oral lichen planus                                            0.570
## SARS-CoV-2-related disease                                    0.570
## Schizophrenia                                                 0.740
## Type II diabetes mellitus                                     0.770
## Age at assessment                                             0.200
## Antimicrobial agent                                           0.580
## Breed                                                         0.780
## Cervical glandular intraepithelial neoplasia                  0.820
## Chronic kidney disease                                        0.450
## Cognitive impairment                                          0.710
## Depressive disorder                                           0.670
## Diarrhea                                                      0.440
## Eczema                                                        0.830
## Endometrial cancer                                            0.330
## Esophageal adenocarcinoma                                     0.670
## Iron biomarker measurement                                    0.330
## Milk allergic reaction                                        0.710
## Papillary thyroid carcinoma                                   0.830
## Phenylketonuria                                               0.500
## Response to anti-tuberculosis drug                            0.420
## Response to antiviral drug                                    0.450
## Response to immunochemotherapy                                0.430
## Sampling site                                                 0.640
## Smoking behavior                                              0.530
## Smoking status measurement                                    0.670
## Squamous cell carcinoma                                       0.670
## Streptococcus pneumoniae                                      0.670
## Stroke                                                        0.890
## Traditional Chinese medicine type                             0.500
## Acute respiratory failure                                     0.000
## Air pollution                                                 0.190
## Anxiety disorder                                              0.880
## Breastfeeding duration                                        0.640
## Chlamydia trachomatis                                         0.400
## Chronic fatigue syndrome                                      0.800
## Chronic hepatitis B virus infection                           0.830
## Chronic obstructive pulmonary disease                         0.290
## Diabetes mellitus                                             0.830
## Endometriosis                                                 0.740
## Esophageal cancer                                             0.400
## Gestational diabetes                                          0.970
## Hepatocellular carcinoma                                      0.860
## Hypertrophy                                                   0.800
## Irritable bowel syndrome                                      0.670
## Lifestyle measurement                                         0.890
## Multiple sclerosis                                            0.940
## Oral cavity carcinoma                                         0.880
## Oral squamous cell carcinoma                                  0.380
## Oxalate measurement                                           0.062
## Pancreatic carcinoma                                          0.860
## Psoriasis                                                     0.920
## Respiratory Syncytial Virus Infection                         0.830
## Respiratory tract infectious disease                          0.830
## Response to diet                                              0.840
## Response to transplant                                        0.570
## Response to vaccine                                           0.830
## Rheumatoid arthritis                                          0.500
## Sample treatment protocol                                     0.800
## Sampling time                                                 0.420
## Social interaction measurement                                0.670
## Socioeconomic status                                          0.530
## Treatment                                                     0.880
## Type I diabetes mellitus                                      0.860
## Vesicle membrane                                              0.170
## Vitiligo                                                      0.800
## Abnormal stool composition                                    1.000
## Acute myeloid leukemia                                        0.670
## Arthritis                                                     1.000
## Asthma                                                        0.880
## Biological sex                                                0.750
## Bipolar disorder                                              1.000
## Breast cancer                                                 0.710
## Celiac disease                                                1.000
## Clinical treatment                                            0.710
## Colorectal adenoma                                            0.710
## Contraception                                                 1.000
## COVID-19 symptoms measurement                                 1.000
## Diet measurement                                              1.000
## Disease progression measurement                               1.000
## Functional abnormality of the gastrointestinal tract          1.000
## Gastric adenocarcinoma                                        1.000
## Head and neck squamous cell carcinoma                         1.000
## Health study participation                                    0.900
## HIV mother to child transmission                              1.000
## Lactose intolerance                                           1.000
## Lung transplantation                                          0.330
## Obsessive-compulsive disorder                                 1.000
## Ovarian cancer                                                0.820
## Phenotype                                                     0.830
## Psoriasis vulgaris                                            1.000
## Response to ketogenic diet                                    0.430
## Sample collection protocol                                    1.000
## SARS coronavirus                                              1.000
## Simian immunodeficiency virus infection                       1.000
## Timepoint                                                     1.000
## Transplant outcome measurement                                1.000
## Viral load                                                    1.000
## Waist circumference                                           1.000

tabDiv(exps, "Richness", "Condition")

##                                                           increased decreased
## Treatment outcome measurement                                     7        25
## Diet                                                              4        19
## Helminthiasis                                                    13         0
## HIV infection                                                     3        15
## COVID-19                                                          9        20
## Pulmonary tuberculosis                                            1        11
## Chronic constipation                                              8         0
## Parkinson's disease                                              18        26
## Phenotype                                                         9         1
## Diarrhea                                                          8         1
## Head and neck squamous cell carcinoma                             0         7
## Polycystic ovary syndrome                                         0         7
## Increased intestinal transit time                                 6         0
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Alcohol drinking                                                  5         0
## Antimicrobial agent                                               2         7
## Human immunodeficiency virus                                      1         6
## Human papilloma virus infection                                   7         2
## Response to diet                                                  3         8
## Acute lymphoblastic leukemia                                      5         1
## Age                                                               1         5
## Air pollution                                                     9         5
## Cervical glandular intraepithelial neoplasia                      4         0
## Dry eye syndrome                                                  0         4
## Endometriosis                                                     4         0
## Epilepsy                                                          4         0
## Periodontitis                                                     5         1
## Response to transplant                                            3         7
## Schizophrenia                                                     1         5
## Vesicle membrane                                                  5         1
## Atopic asthma                                                     4         1
## Delivery method                                                   4         1
## Food allergy                                                      0         3
## Gastric cancer                                                    5         8
## Gestational diabetes                                              3         6
## Hypertrophy                                                       3         0
## Iron biomarker measurement                                        1         4
## Oral squamous cell carcinoma                                      1         4
## Ulcerative colitis                                                0         3
## Asthma                                                            2         0
## Autism spectrum disorder                                          4         6
## Breast cancer                                                     2         0
## Colorectal cancer                                                 8        10
## Esophageal adenocarcinoma                                         0         2
## Hypertension                                                      1         3
## Phenylketonuria                                                   1         3
## Smoking behavior                                                  6         8
## Smoking status measurement                                        2         0
## Streptococcus pneumoniae                                          0         2
## Traditional Chinese medicine type                                 1         3
## Transplant outcome measurement                                    0         2
## Treatment                                                         1         3
## Alzheimer's disease                                               6         5
## Atopic eczema                                                     2         1
## Breastfeeding duration                                            1         0
## Cesarean section                                                  3         2
## Colorectal adenoma                                                1         2
## Constipation                                                      4         5
## Endometrial cancer                                                1         2
## Health study participation                                        1         0
## Inflammatory bowel disease                                        2         3
## Irritable bowel syndrome                                          4         5
## Lung cancer                                                       0         1
## Obesity                                                           8         7
## Obsessive-compulsive disorder                                     0         1
## Ovarian cancer                                                    1         0
## Psoriasis                                                         0         1
## Rheumatoid arthritis                                              3         4
## Sampling site                                                     1         2
## Socioeconomic status                                              2         1
## Transport                                                         1         2
## Type II diabetes mellitus                                         2         3
## Urinary tract infection                                           0         1
## Abnormal stool composition                                        0         0
## Chlamydia trachomatis                                             1         1
## Crohn's disease                                                   2         2
## Diet measurement                                                  0         0
## Ethnic group                                                      2         2
## Functional abnormality of the gastrointestinal tract              0         0
## HIV mother to child transmission                                  0         0
## Male homosexuality                                                0         0
## Multiple sclerosis                                                0         0
## Papillary thyroid carcinoma                                       0         0
## Physical activity                                                 2         2
## Psoriasis vulgaris                                                0         0
## Sample collection protocol                                        0         0
## Stroke                                                            2         2
## Viral load                                                        0         0
##                                                           unchanged
## Treatment outcome measurement                                    51
## Diet                                                             30
## Helminthiasis                                                     0
## HIV infection                                                    10
## COVID-19                                                         24
## Pulmonary tuberculosis                                            1
## Chronic constipation                                              4
## Parkinson's disease                                              28
## Phenotype                                                        11
## Diarrhea                                                          4
## Head and neck squamous cell carcinoma                             4
## Polycystic ovary syndrome                                         1
## Increased intestinal transit time                                 0
## Response to allogeneic hematopoietic stem cell transplant         0
## Alcohol drinking                                                  0
## Antimicrobial agent                                              10
## Human immunodeficiency virus                                      2
## Human papilloma virus infection                                  12
## Response to diet                                                 10
## Acute lymphoblastic leukemia                                      0
## Age                                                               1
## Air pollution                                                     6
## Cervical glandular intraepithelial neoplasia                      2
## Dry eye syndrome                                                  3
## Endometriosis                                                     8
## Epilepsy                                                          1
## Periodontitis                                                     6
## Response to transplant                                           13
## Schizophrenia                                                     8
## Vesicle membrane                                                  0
## Atopic asthma                                                     7
## Delivery method                                                   1
## Food allergy                                                      9
## Gastric cancer                                                   14
## Gestational diabetes                                             25
## Hypertrophy                                                       2
## Iron biomarker measurement                                        1
## Oral squamous cell carcinoma                                      0
## Ulcerative colitis                                                2
## Asthma                                                           10
## Autism spectrum disorder                                          0
## Breast cancer                                                     9
## Colorectal cancer                                                21
## Esophageal adenocarcinoma                                         4
## Hypertension                                                      6
## Phenylketonuria                                                   4
## Smoking behavior                                                  8
## Smoking status measurement                                        5
## Streptococcus pneumoniae                                          3
## Traditional Chinese medicine type                                 4
## Transplant outcome measurement                                    5
## Treatment                                                         6
## Alzheimer's disease                                              23
## Atopic eczema                                                     6
## Breastfeeding duration                                            9
## Cesarean section                                                 10
## Colorectal adenoma                                               11
## Constipation                                                      8
## Endometrial cancer                                                3
## Health study participation                                       28
## Inflammatory bowel disease                                        0
## Irritable bowel syndrome                                         14
## Lung cancer                                                      10
## Obesity                                                          19
## Obsessive-compulsive disorder                                     4
## Ovarian cancer                                                   30
## Psoriasis                                                         8
## Rheumatoid arthritis                                              1
## Sampling site                                                     2
## Socioeconomic status                                              2
## Transport                                                         3
## Type II diabetes mellitus                                        10
## Urinary tract infection                                           6
## Abnormal stool composition                                        6
## Chlamydia trachomatis                                             3
## Crohn's disease                                                   2
## Diet measurement                                                  7
## Ethnic group                                                      1
## Functional abnormality of the gastrointestinal tract             27
## HIV mother to child transmission                                  8
## Male homosexuality                                                9
## Multiple sclerosis                                               17
## Papillary thyroid carcinoma                                      12
## Physical activity                                                 1
## Psoriasis vulgaris                                               14
## Sample collection protocol                                        9
## Stroke                                                           17
## Viral load                                                        5

tabDiv(exps, "Richness", "Condition", perc = TRUE)

##                                                           increased decreased
## Treatment outcome measurement                                 0.084     0.300
## Diet                                                          0.075     0.360
## Helminthiasis                                                 1.000     0.000
## HIV infection                                                 0.110     0.540
## COVID-19                                                      0.170     0.380
## Pulmonary tuberculosis                                        0.077     0.850
## Chronic constipation                                          0.670     0.000
## Parkinson's disease                                           0.250     0.360
## Phenotype                                                     0.430     0.048
## Diarrhea                                                      0.620     0.077
## Head and neck squamous cell carcinoma                         0.000     0.640
## Polycystic ovary syndrome                                     0.000     0.880
## Increased intestinal transit time                             1.000     0.000
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Alcohol drinking                                              1.000     0.000
## Antimicrobial agent                                           0.110     0.370
## Human immunodeficiency virus                                  0.110     0.670
## Human papilloma virus infection                               0.330     0.095
## Response to diet                                              0.140     0.380
## Acute lymphoblastic leukemia                                  0.830     0.170
## Age                                                           0.140     0.710
## Air pollution                                                 0.450     0.250
## Cervical glandular intraepithelial neoplasia                  0.670     0.000
## Dry eye syndrome                                              0.000     0.570
## Endometriosis                                                 0.330     0.000
## Epilepsy                                                      0.800     0.000
## Periodontitis                                                 0.420     0.083
## Response to transplant                                        0.130     0.300
## Schizophrenia                                                 0.071     0.360
## Vesicle membrane                                              0.830     0.170
## Atopic asthma                                                 0.330     0.083
## Delivery method                                               0.670     0.170
## Food allergy                                                  0.000     0.250
## Gastric cancer                                                0.190     0.300
## Gestational diabetes                                          0.088     0.180
## Hypertrophy                                                   0.600     0.000
## Iron biomarker measurement                                    0.170     0.670
## Oral squamous cell carcinoma                                  0.200     0.800
## Ulcerative colitis                                            0.000     0.600
## Asthma                                                        0.170     0.000
## Autism spectrum disorder                                      0.400     0.600
## Breast cancer                                                 0.180     0.000
## Colorectal cancer                                             0.210     0.260
## Esophageal adenocarcinoma                                     0.000     0.330
## Hypertension                                                  0.100     0.300
## Phenylketonuria                                               0.120     0.380
## Smoking behavior                                              0.270     0.360
## Smoking status measurement                                    0.290     0.000
## Streptococcus pneumoniae                                      0.000     0.400
## Traditional Chinese medicine type                             0.120     0.380
## Transplant outcome measurement                                0.000     0.290
## Treatment                                                     0.100     0.300
## Alzheimer's disease                                           0.180     0.150
## Atopic eczema                                                 0.220     0.110
## Breastfeeding duration                                        0.100     0.000
## Cesarean section                                              0.200     0.130
## Colorectal adenoma                                            0.071     0.140
## Constipation                                                  0.240     0.290
## Endometrial cancer                                            0.170     0.330
## Health study participation                                    0.034     0.000
## Inflammatory bowel disease                                    0.400     0.600
## Irritable bowel syndrome                                      0.170     0.220
## Lung cancer                                                   0.000     0.091
## Obesity                                                       0.240     0.210
## Obsessive-compulsive disorder                                 0.000     0.200
## Ovarian cancer                                                0.032     0.000
## Psoriasis                                                     0.000     0.110
## Rheumatoid arthritis                                          0.380     0.500
## Sampling site                                                 0.200     0.400
## Socioeconomic status                                          0.400     0.200
## Transport                                                     0.170     0.330
## Type II diabetes mellitus                                     0.130     0.200
## Urinary tract infection                                       0.000     0.140
## Abnormal stool composition                                    0.000     0.000
## Chlamydia trachomatis                                         0.200     0.200
## Crohn's disease                                               0.330     0.330
## Diet measurement                                              0.000     0.000
## Ethnic group                                                  0.400     0.400
## Functional abnormality of the gastrointestinal tract          0.000     0.000
## HIV mother to child transmission                              0.000     0.000
## Male homosexuality                                            0.000     0.000
## Multiple sclerosis                                            0.000     0.000
## Papillary thyroid carcinoma                                   0.000     0.000
## Physical activity                                             0.400     0.400
## Psoriasis vulgaris                                            0.000     0.000
## Sample collection protocol                                    0.000     0.000
## Stroke                                                        0.095     0.095
## Viral load                                                    0.000     0.000
##                                                           unchanged
## Treatment outcome measurement                                 0.610
## Diet                                                          0.570
## Helminthiasis                                                 0.000
## HIV infection                                                 0.360
## COVID-19                                                      0.450
## Pulmonary tuberculosis                                        0.077
## Chronic constipation                                          0.330
## Parkinson's disease                                           0.390
## Phenotype                                                     0.520
## Diarrhea                                                      0.310
## Head and neck squamous cell carcinoma                         0.360
## Polycystic ovary syndrome                                     0.120
## Increased intestinal transit time                             0.000
## Response to allogeneic hematopoietic stem cell transplant     0.000
## Alcohol drinking                                              0.000
## Antimicrobial agent                                           0.530
## Human immunodeficiency virus                                  0.220
## Human papilloma virus infection                               0.570
## Response to diet                                              0.480
## Acute lymphoblastic leukemia                                  0.000
## Age                                                           0.140
## Air pollution                                                 0.300
## Cervical glandular intraepithelial neoplasia                  0.330
## Dry eye syndrome                                              0.430
## Endometriosis                                                 0.670
## Epilepsy                                                      0.200
## Periodontitis                                                 0.500
## Response to transplant                                        0.570
## Schizophrenia                                                 0.570
## Vesicle membrane                                              0.000
## Atopic asthma                                                 0.580
## Delivery method                                               0.170
## Food allergy                                                  0.750
## Gastric cancer                                                0.520
## Gestational diabetes                                          0.740
## Hypertrophy                                                   0.400
## Iron biomarker measurement                                    0.170
## Oral squamous cell carcinoma                                  0.000
## Ulcerative colitis                                            0.400
## Asthma                                                        0.830
## Autism spectrum disorder                                      0.000
## Breast cancer                                                 0.820
## Colorectal cancer                                             0.540
## Esophageal adenocarcinoma                                     0.670
## Hypertension                                                  0.600
## Phenylketonuria                                               0.500
## Smoking behavior                                              0.360
## Smoking status measurement                                    0.710
## Streptococcus pneumoniae                                      0.600
## Traditional Chinese medicine type                             0.500
## Transplant outcome measurement                                0.710
## Treatment                                                     0.600
## Alzheimer's disease                                           0.680
## Atopic eczema                                                 0.670
## Breastfeeding duration                                        0.900
## Cesarean section                                              0.670
## Colorectal adenoma                                            0.790
## Constipation                                                  0.470
## Endometrial cancer                                            0.500
## Health study participation                                    0.970
## Inflammatory bowel disease                                    0.000
## Irritable bowel syndrome                                      0.610
## Lung cancer                                                   0.910
## Obesity                                                       0.560
## Obsessive-compulsive disorder                                 0.800
## Ovarian cancer                                                0.970
## Psoriasis                                                     0.890
## Rheumatoid arthritis                                          0.120
## Sampling site                                                 0.400
## Socioeconomic status                                          0.400
## Transport                                                     0.500
## Type II diabetes mellitus                                     0.670
## Urinary tract infection                                       0.860
## Abnormal stool composition                                    1.000
## Chlamydia trachomatis                                         0.600
## Crohn's disease                                               0.330
## Diet measurement                                              1.000
## Ethnic group                                                  0.200
## Functional abnormality of the gastrointestinal tract          1.000
## HIV mother to child transmission                              1.000
## Male homosexuality                                            1.000
## Multiple sclerosis                                            1.000
## Papillary thyroid carcinoma                                   1.000
## Physical activity                                             0.200
## Psoriasis vulgaris                                            1.000
## Sample collection protocol                                    1.000
## Stroke                                                        0.810
## Viral load                                                    1.000

Body sites with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Body site")

##                                increased decreased unchanged
## Feces                                294       437      1189
## Vagina                                16         6        27
## Posterior fornix of vagina             9         0         7
## Skin of body                           7        15         8
## Uterine cervix                         9         1        20
## Uterine cervix,Vaginal fluid           9         1         0
## Buccal epithelium                      0         7         0
## Saliva                                36        43       122
## Subgingival dental plaque              9         3        20
## Buccal mucosa                          5         0         2
## Meconium                               5         0        10
## Space surrounding organism             2         7        13
## Sputum                                 6        11         8
## Stomach                                5        10         5
## Tongue                                 0         5        12
## Axilla skin                            5         1        11
## Tear film                              0         4         1
## Throat                                 0         4        11
## Caecum                                 1         4        22
## Cecum mucosa                           1         4         6
## Colorectal mucosa                      0         3         8
## Dental plaque                          0         3         3
## Duodenum                               0         3         6
## Nasopharynx                            3         6        32
## Rectum                                 0         3        12
## Skin of forearm                        3         0         3
## Bile                                   2         0         3
## Brachialis muscle                      0         2         3
## Conjunctiva                            1         3         6
## Conjunctival sac                       1         3         1
## Esophagus                              0         2         4
## Forelimb skin                          2         0         4
## Lung                                   2         4         7
## Mouth                                  8         6        28
## Oral cavity                            7         5         7
## Oropharynx                             1         3         5
## Rumen                                  2         0         4
## Supragingival dental plaque            1         3         1
## Thyroid gland                          2         0        10
## Urine                                  3         1        16
## Uterus                                 3         1        11
## Blood                                  0         1         6
## Breast                                 3         4         8
## Breast,Milk                            1         0         4
## Bulbar conjunctiva                     3         2         5
## Colon                                  3         2        19
## Ileum                                  1         0        11
## Nasal cavity                           0         1         5
## Small intestine                        3         4         1
## Vagina,Uterine cervix                  3         2         7
## Vaginal fluid                          1         0         8
## Bronchus                               0         0         6
## Endothelium of trachea                 3         3         0
## Internal cheek pouch                   0         0        11
## Intestine                              1         1        16
## Jejunum                                1         1         8
## Milk                                   0         0         9
## Ovary                                  0         0         7
## Peritoneal fluid                       0         0         6
## Posterior wall of oropharynx           2         2         1
## Skin of abdomen                        0         0         5
## Surface of tongue                      2         2         3
## Ventral side of post-anal tail         0         0         6

tabDiv(exps, "Shannon", "Body site", perc = TRUE)

##                                increased decreased unchanged
## Feces                              0.150     0.230      0.62
## Vagina                             0.330     0.120      0.55
## Posterior fornix of vagina         0.560     0.000      0.44
## Skin of body                       0.230     0.500      0.27
## Uterine cervix                     0.300     0.033      0.67
## Uterine cervix,Vaginal fluid       0.900     0.100      0.00
## Buccal epithelium                  0.000     1.000      0.00
## Saliva                             0.180     0.210      0.61
## Subgingival dental plaque          0.280     0.094      0.62
## Buccal mucosa                      0.710     0.000      0.29
## Meconium                           0.330     0.000      0.67
## Space surrounding organism         0.091     0.320      0.59
## Sputum                             0.240     0.440      0.32
## Stomach                            0.250     0.500      0.25
## Tongue                             0.000     0.290      0.71
## Axilla skin                        0.290     0.059      0.65
## Tear film                          0.000     0.800      0.20
## Throat                             0.000     0.270      0.73
## Caecum                             0.037     0.150      0.81
## Cecum mucosa                       0.091     0.360      0.55
## Colorectal mucosa                  0.000     0.270      0.73
## Dental plaque                      0.000     0.500      0.50
## Duodenum                           0.000     0.330      0.67
## Nasopharynx                        0.073     0.150      0.78
## Rectum                             0.000     0.200      0.80
## Skin of forearm                    0.500     0.000      0.50
## Bile                               0.400     0.000      0.60
## Brachialis muscle                  0.000     0.400      0.60
## Conjunctiva                        0.100     0.300      0.60
## Conjunctival sac                   0.200     0.600      0.20
## Esophagus                          0.000     0.330      0.67
## Forelimb skin                      0.330     0.000      0.67
## Lung                               0.150     0.310      0.54
## Mouth                              0.190     0.140      0.67
## Oral cavity                        0.370     0.260      0.37
## Oropharynx                         0.110     0.330      0.56
## Rumen                              0.330     0.000      0.67
## Supragingival dental plaque        0.200     0.600      0.20
## Thyroid gland                      0.170     0.000      0.83
## Urine                              0.150     0.050      0.80
## Uterus                             0.200     0.067      0.73
## Blood                              0.000     0.140      0.86
## Breast                             0.200     0.270      0.53
## Breast,Milk                        0.200     0.000      0.80
## Bulbar conjunctiva                 0.300     0.200      0.50
## Colon                              0.120     0.083      0.79
## Ileum                              0.083     0.000      0.92
## Nasal cavity                       0.000     0.170      0.83
## Small intestine                    0.380     0.500      0.12
## Vagina,Uterine cervix              0.250     0.170      0.58
## Vaginal fluid                      0.110     0.000      0.89
## Bronchus                           0.000     0.000      1.00
## Endothelium of trachea             0.500     0.500      0.00
## Internal cheek pouch               0.000     0.000      1.00
## Intestine                          0.056     0.056      0.89
## Jejunum                            0.100     0.100      0.80
## Milk                               0.000     0.000      1.00
## Ovary                              0.000     0.000      1.00
## Peritoneal fluid                   0.000     0.000      1.00
## Posterior wall of oropharynx       0.400     0.400      0.20
## Skin of abdomen                    0.000     0.000      1.00
## Surface of tongue                  0.290     0.290      0.43
## Ventral side of post-anal tail     0.000     0.000      1.00

tabDiv(exps, "Richness", "Body site")

##                              increased decreased unchanged
## Feces                              177       255       619
## Mouth                               10         3         9
## Posterior fornix of vagina           8         1         2
## Uterine cervix                       8         1        11
## Oropharynx                           0         6         6
## Skin of body                         3         9         6
## Sputum                               0         6         2
## Rectum                               0         5         7
## Subgingival dental plaque            7         2        17
## Uterine cervix,Vaginal fluid         7         2         1
## Nasopharynx                          5         9        19
## Stomach                              4         8         3
## Throat                               1         5         5
## Cecum mucosa                         3         6         3
## Small intestine                      1         4         0
## Colon                                6         4        11
## Ear                                  2         0         3
## Esophagus                            0         2         4
## Saliva                              20        22        43
## Surface of tongue                    4         2         1
## Caecum                               2         3         1
## Ileum                                2         1         9
## Meconium                             2         3         7
## Milk                                 2         1         5
## Nasal cavity                         1         2        10
## Oral cavity                          4         5         1
## Urine                                3         2        13
## Vagina                               3         2        11
## Vagina,Uterine cervix                1         0        11
## Breast                               1         1         7
## Bronchus                             0         0         6
## Conjunctiva                          1         1         5
## Internal cheek pouch                 0         0         7
## Intestine                            0         0        15
## Ovary                                0         0         7
## Peritoneal fluid                     0         0         6
## Thyroid gland                        0         0        12
## Tongue                               2         2         7

tabDiv(exps, "Richness", "Body site", perc = TRUE)

##                              increased decreased unchanged
## Feces                            0.170     0.240      0.59
## Mouth                            0.450     0.140      0.41
## Posterior fornix of vagina       0.730     0.091      0.18
## Uterine cervix                   0.400     0.050      0.55
## Oropharynx                       0.000     0.500      0.50
## Skin of body                     0.170     0.500      0.33
## Sputum                           0.000     0.750      0.25
## Rectum                           0.000     0.420      0.58
## Subgingival dental plaque        0.270     0.077      0.65
## Uterine cervix,Vaginal fluid     0.700     0.200      0.10
## Nasopharynx                      0.150     0.270      0.58
## Stomach                          0.270     0.530      0.20
## Throat                           0.091     0.450      0.45
## Cecum mucosa                     0.250     0.500      0.25
## Small intestine                  0.200     0.800      0.00
## Colon                            0.290     0.190      0.52
## Ear                              0.400     0.000      0.60
## Esophagus                        0.000     0.330      0.67
## Saliva                           0.240     0.260      0.51
## Surface of tongue                0.570     0.290      0.14
## Caecum                           0.330     0.500      0.17
## Ileum                            0.170     0.083      0.75
## Meconium                         0.170     0.250      0.58
## Milk                             0.250     0.120      0.62
## Nasal cavity                     0.077     0.150      0.77
## Oral cavity                      0.400     0.500      0.10
## Urine                            0.170     0.110      0.72
## Vagina                           0.190     0.120      0.69
## Vagina,Uterine cervix            0.083     0.000      0.92
## Breast                           0.110     0.110      0.78
## Bronchus                         0.000     0.000      1.00
## Conjunctiva                      0.140     0.140      0.71
## Internal cheek pouch             0.000     0.000      1.00
## Intestine                        0.000     0.000      1.00
## Ovary                            0.000     0.000      1.00
## Peritoneal fluid                 0.000     0.000      1.00
## Thyroid gland                    0.000     0.000      1.00
## Tongue                           0.180     0.180      0.64

Signature stats

sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")

Unique microbes

Number unique microbes contained in the signatures:

(nuniq <- length(unique(unlist(sigs))))

## [1] 7909

Development of unique microbes captured over time:

plotUniqueMicrobesOverTime(dat)

Microbe set size distribution

summary(lengths(sigs))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   5.000   8.287  10.000 470.000

gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
    xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())

sum(lengths(sigs) > 4)

## [1] 4345

Microbe co-occurrence

dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus")

## Loading required namespace: safe

antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus", antagonistic = TRUE)

Get the top 20 genera most frequently reported as differentially abundant:

sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname", 
                            tax.level = "genus", exact.tax.level = FALSE) 
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20

## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              910              605              604              537 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              530              516              479              474 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              470              457              436              407 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              354              333              331              308 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              283              280              279              264

Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:

all(names(top20) %in% rownames(cooc.mat))

## [1] TRUE

cooc.mat <- cooc.mat[names(top20), names(top20)]
all(names(top20) %in% rownames(antag.mat))

## [1] TRUE

antag.mat <- antag.mat[names(top20), names(top20)]

Distinguish by direction of abundance change (increased / decreased):

# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                               tax.level = "genus", exact.tax.level = FALSE) 
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up

## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              432              293              219              275 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              238              211              309              155 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              248              217              271              172 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              215              137              119              131 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              177               96              201              182

# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                                 tax.level = "genus", exact.tax.level = FALSE) 
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down

## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              471              304              379              257 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              286              300              162              313 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              216              237              163              229 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              135              190              206              171 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              103              178               76               76

Plot the heatmap

# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
                                                  col = c("#D55E00", "#0072B2")),
                                   height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)

lgd_list <- list(
    Legend(labels = c("increased", "decreased"), 
           title = "Abundance in Group 1", 
           type = "grid",
           legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
                                            
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
                               col = col, 
                               name = "Relative frequency (top)",
                               cluster_columns = FALSE, 
                               row_km = 3, 
                               row_title = "same direction", 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
                               col = col, 
                               name = "Relative frequency (bottom)",
                               cluster_columns = FALSE, 
                               row_title = "opposite direction", 
                               row_km = 3, 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan", 
                                tax.level = "genus", exact.tax.level = FALSE) 
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
                                    tax.id.type = "taxname",
                                    tax.level = "phylum",
                                    exact.tax.level = FALSE) 
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)

uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
                                 col = pcols[phyla.grid],
                                 cluster_columns = FALSE,
                                 column_names_rot = 45,
                                 column_names_gp = gpar(fontsize = 8))

# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)

decorate_annotation("Abundance in Group 1", {
    grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
    grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
    grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
    grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
    grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
    grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
    grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})

Signature similarity

Jaccard index

Inspect signature similarity for signatures from stomach samples based on Jaccard index:

stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)

Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).

jdist <- as.dist(1 - pair.jsim)
plot(hclust(jdist))

June 30, 2025

Setup

Reading data

Curation output

Publication date of the curated papers:

Progress over time:

Study stats

Study design

Experiment stats

Subjects

Lab analysis

Statistical analysis

Alpha diversity

Signature stats

Unique microbes

Microbe set size distribution

Microbe co-occurrence

Signature similarity

Jaccard index