Skip to contents

Reading data

Get bulk export from bugsigdb.org:

full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)
## [1] 8972   50
colnames(full.dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"

Stripping illformed entries:

is.study <- !is.na(full.dat[["Study"]])
is.exp <- !is.na(full.dat[["Experiment"]])
full.dat <- full.dat[is.study & is.exp, ]

Curation output

Number of papers and signatures curated:

pmids <- unique(full.dat[,"PMID"])
length(pmids)
## [1] 1441
nrow(full.dat)
## [1] 8972

Publication date of the curated papers:

pmids <- pmids[!is.na(pmids)]
pubyear <- pmid2pubyear(pmids)
head(cbind(pmids, pubyear))
tab <- table(pubyear)
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers", 
          label = TRUE, fill = "steelblue",
          ggtheme = theme_bw())

Stripping empty signatures:

ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)
## [1] 8972

Papers containing only empty UP and DOWN signatures (under curation?):

setdiff(pmids, unique(dat[,"PMID"]))
## numeric(0)

Progress over time:

dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)

plotProgressOverTime(dat, diff = TRUE)

Stratified by curator:

npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)

Number of complete and revised signatures: Turned off because it’s way too long these days

table(dat[["State"]])
table(dat[,"Revision editor"])

Study stats

Study design

spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)
##                                       case-control,prospective cohort 
##                                                                     1 
##    cross-sectional observational, not case-control,prospective cohort 
##                                                                     2 
##                                   laboratory experiment,meta-analysis 
##                                                                     2 
##           prospective cohort,time series / longitudinal observational 
##                                                                     2 
##                 case-control,time series / longitudinal observational 
##                                                                     3 
## cross-sectional observational, not case-control,laboratory experiment 
##                                                                     4 
##                                            case-control,meta-analysis 
##                                                                     5 
##        laboratory experiment,time series / longitudinal observational 
##                                                                     5 
##                                    case-control,laboratory experiment 
##                                                                     6 
##                                                         meta-analysis 
##                                                                    18 
##                                           randomized controlled trial 
##                                                                    66 
##                                                    prospective cohort 
##                                                                   113 
##                              time series / longitudinal observational 
##                                                                   130 
##                                                 laboratory experiment 
##                                                                   156 
##                       cross-sectional observational, not case-control 
##                                                                   387 
##                                                          case-control 
##                                                                   555

Experiment stats

Columns of the full dataset that describe experiments:

# Experiment ID
exp.cols <- c("Study", "Experiment")

# Subjects
sub.cols <- c("Host species",    
              "Location of subjects", 
              "Body site",
              "Condition", 
              "Antibiotics exclusion",
              "Group 0 sample size",
              "Group 1 sample size")

# Lab analysis              
lab.cols <-  c("Sequencing type",
              "16S variable region",
              "Sequencing platform")

# Statistical analysis
stat.cols <-  c("Statistical test",
              "MHT correction",
              "Significance threshold")

# Alpha diversity
div.cols <- c("Pielou",
              "Shannon",
              "Chao1",
              "Simpson", 
              "Inverse Simpson",
              "Richness")

Restrict dataset to experiment information:

exps <- dat[,c(exp.cols, sub.cols, lab.cols, stat.cols, div.cols)]
exps <- unique(exps)

Subjects

Number of experiments for the top 10 categories for each subjects column:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                   4350                    585                    112 
##  Sus scrofa domesticus Canis lupus familiaris             Ovis aries 
##                     88                     73                     24 
##          Not specified             Bos taurus           Capra hircus 
##                     23                     18                     14 
##          Gallus gallus 
##                     14 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                     1799                      947                      173 
##                    Japan                  Denmark                    Italy 
##                      166                      150                      129 
##                Australia              Netherlands              South Korea 
##                      112                      112                      101 
##                    Spain 
##                       96 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                      3477                       292                       103 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                        67                        55                        49 
##              Skin of body            Uterine cervix                     Colon 
##                        47                        47                        46 
##                    Caecum 
##                        41 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                           197                           149 
##                       Obesity Treatment outcome measurement 
##                           141                           136 
##             Colorectal cancer                      COVID-19 
##                           134                           116 
##                 Atopic eczema           Antimicrobial agent 
##                            90                            85 
##           Alzheimer's disease        Response to transplant 
##                            77                            73 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                      589                      497                      241 
##                 6 months                  2 weeks                 3 Months 
##                      180                      130                       44 
##                 6 Months                  1 Month currently on antibiotics 
##                       40                       28                       28 
##                3 Months. 
##                       25

Proportions instead:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                0.80000                0.10800                0.02060 
##  Sus scrofa domesticus Canis lupus familiaris             Ovis aries 
##                0.01620                0.01340                0.00441 
##          Not specified             Bos taurus           Capra hircus 
##                0.00423                0.00331                0.00257 
##          Gallus gallus 
##                0.00257 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                   0.3310                   0.1740                   0.0318 
##                    Japan                  Denmark                    Italy 
##                   0.0305                   0.0276                   0.0237 
##                Australia              Netherlands              South Korea 
##                   0.0206                   0.0206                   0.0186 
##                    Spain 
##                   0.0177 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                   0.63900                   0.05370                   0.01890 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                   0.01230                   0.01010                   0.00901 
##              Skin of body            Uterine cervix                     Colon 
##                   0.00864                   0.00864                   0.00846 
##                    Caecum 
##                   0.00754 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                        0.0376                        0.0284 
##                       Obesity Treatment outcome measurement 
##                        0.0269                        0.0260 
##             Colorectal cancer                      COVID-19 
##                        0.0256                        0.0221 
##                 Atopic eczema           Antimicrobial agent 
##                        0.0172                        0.0162 
##           Alzheimer's disease        Response to transplant 
##                        0.0147                        0.0139 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                   0.2470                   0.2090                   0.1010 
##                 6 months                  2 weeks                 3 Months 
##                   0.0756                   0.0546                   0.0185 
##                 6 Months                  1 Month currently on antibiotics 
##                   0.0168                   0.0118                   0.0118 
##                3 Months. 
##                   0.0105

Sample size:

ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize
##         Group 0 sample size Group 1 sample size
## Min.                  0.000             1.00000
## 1st Qu.              12.000            10.00000
## Median               24.000            22.00000
## Mean                559.801            68.17235
## 3rd Qu.              50.000            43.00000
## Max.             308633.000         10413.00000
## NA's                775.000           773.00000

Lab analysis

Number of experiments for the top 10 categories for each lab analysis column:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##       4297        787         68         25          5 
## 
## $`16S variable region`
## 
##   34    4   12  123   45  345    3   56  678   23 
## 1730 1205  279  214  153  140   61   45   31   21 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                        4321                         328 
##                 Ion Torrent                     RT-qPCR 
##                         200                         105 
##                 MGISEQ-2000                   PacBio RS 
##                          37                          24 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                          18                          18 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                          16                          11

Proportions instead:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##   0.829000   0.152000   0.013100   0.004820   0.000965 
## 
## $`16S variable region`
## 
##      34       4      12     123      45     345       3      56     678      23 
## 0.43500 0.30300 0.07010 0.05380 0.03840 0.03520 0.01530 0.01130 0.00779 0.00528 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                     0.84400                     0.06410 
##                 Ion Torrent                     RT-qPCR 
##                     0.03910                     0.02050 
##                 MGISEQ-2000                   PacBio RS 
##                     0.00723                     0.00469 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                     0.00352                     0.00352 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                     0.00313                     0.00215

Statistical analysis

Number of experiments for the top 10 categories for each statistical analysis column:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                    1612                     832                     509 
##         Kruskall-Wallis       Linear Regression                   ANOVA 
##                     282                     253                     238 
##                  T-Test                MaAsLin2                   ANCOM 
##                     202                     182                     167 
##    Spearman Correlation 
##                      90 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##  2693  2080 
## 
## $`Significance threshold`
## 
##  0.05   0.1  0.01 0.001  0.25   0.2  0.15 0.005     2 1e-04 
##  4520   332   108    34    34    25    19    15    15     6

Proportions instead:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                  0.3130                  0.1620                  0.0989 
##         Kruskall-Wallis       Linear Regression                   ANOVA 
##                  0.0548                  0.0492                  0.0462 
##                  T-Test                MaAsLin2                   ANCOM 
##                  0.0393                  0.0354                  0.0325 
##    Spearman Correlation 
##                  0.0175 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
## 0.564 0.436 
## 
## $`Significance threshold`
## 
##    0.05     0.1    0.01   0.001    0.25     0.2    0.15   0.005       2   1e-04 
## 0.88100 0.06470 0.02100 0.00662 0.00662 0.00487 0.00370 0.00292 0.00292 0.00117

Alpha diversity

Overall distribution:

apply(exps[,div.cols], 2, table)
##           Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased     46     701   450     207              62      421
## increased     37     546   312     166              34      337
## unchanged    172    1924   851     682             204      964

Correspondence of Shannon diversity and Richness:

table(exps$Shannon, exps$Richness)
##            
##             decreased increased unchanged
##   decreased       221        13        51
##   increased         9       167        52
##   unchanged        95        87       781

Conditions with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Condition")
##                                                           increased decreased
## Oxalate measurement                                               0        17
## COVID-19                                                          9        24
## Obesity                                                           3        16
## Polycystic ovary syndrome                                         0        13
## HIV infection                                                     1        12
## Smoking behaviour measurement                                     2        13
## Clostridium difficile infection                                  10         0
## Dry eye syndrome                                                  1        11
## Systemic inflammatory response syndrome                           5        15
## Treatment outcome measurement                                    11        21
## Chronic constipation                                              9         0
## Human papilloma virus infection                                  10         1
## Alzheimer's disease                                               2        10
## Gastric cancer                                                    6        14
## Ulcerative colitis                                                1         9
## Age                                                               5        12
## Aging                                                             0         7
## Balanced reciprocal translocation                                 7         0
## Atopic eczema                                                     5        11
## Autism spectrum disorder                                          7         1
## Cesarean section                                                  6         0
## Epilepsy                                                          6         0
## Lung cancer                                                       2         8
## Parkinson's disease                                              20        14
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Response to transplant                                            9        15
## Urinary tract infection                                           0         6
## Cervical cancer                                                   5         0
## Diet                                                             14        19
## Helminthiasis                                                     5         0
## Population                                                        2         7
## Spontaneous preterm birth                                        12         7
## Acute lymphoblastic leukemia                                      0         4
## Acute pancreatitis                                                0         4
## Colitis                                                           4         0
## Colorectal cancer                                                10        14
## Ethnic group                                                      3         7
## Food allergy                                                      6         2
## Human immunodeficiency virus                                      0         4
## Hypertension                                                      7         3
## Periodontitis                                                     5         1
## Pregnancy                                                         4         0
## Response to antibiotic                                            0         4
## Alcohol drinking                                                  3         0
## Atopic asthma                                                     4         1
## Birth measurement                                                 3         0
## Constipation                                                      6         3
## Delivery method                                                   1         4
## Extraction protocol                                              23        26
## Male homosexuality                                                3         0
## Oral lichen planus                                                3         0
## SARS-CoV-2-related disease                                        0         3
## Schizophrenia                                                     1         4
## Type II diabetes mellitus                                         2         5
## Age at assessment                                                 3         1
## Antimicrobial agent                                               8        10
## Breed                                                             0         2
## Cervical glandular intraepithelial neoplasia                      2         0
## Chronic kidney disease                                            2         4
## Cognitive impairment                                              1         3
## Crohn's disease                                                   2         4
## Depressive disorder                                               0         2
## Diarrhea                                                          6         4
## Eczema                                                            0         2
## Endometrial cancer                                                4         2
## Esophageal adenocarcinoma                                         0         2
## Iron biomarker measurement                                        1         3
## Milk allergic reaction                                            2         0
## Papillary thyroid carcinoma                                       2         0
## Phenylketonuria                                                   1         3
## Response to anti-tuberculosis drug                                8        10
## Response to antiviral drug                                        2         4
## Response to immunochemotherapy                                    3         1
## Sampling site                                                     3         1
## Smoking behavior                                                 10         8
## Squamous cell carcinoma                                           2         0
## Streptococcus pneumoniae                                          0         2
## Stroke                                                            2         0
## Acute respiratory failure                                         6         5
## Air pollution                                                     7         6
## Anxiety disorder                                                  0         1
## Breast cancer                                                     3         4
## Breastfeeding duration                                            2         3
## Chlamydia trachomatis                                             1         2
## Chronic fatigue syndrome                                          0         1
## Chronic hepatitis B virus infection                               0         1
## Chronic obstructive pulmonary disease                             3         2
## Diabetes mellitus                                                 0         1
## Endometriosis                                                     2         3
## Esophageal cancer                                                 1         2
## Gestational diabetes                                              1         0
## Hepatocellular carcinoma                                          0         1
## Hypertrophy                                                       1         0
## Irritable bowel syndrome                                          5         6
## Multiple sclerosis                                                0         1
## Oral cavity carcinoma                                             0         1
## Oral squamous cell carcinoma                                      3         2
## Pancreatic carcinoma                                              0         1
## Psoriasis                                                         1         0
## Respiratory Syncytial Virus Infection                             0         1
## Respiratory tract infectious disease                              1         2
## Response to vaccine                                               1         0
## Rheumatoid arthritis                                              5         4
## Sample treatment protocol                                         1         0
## Sampling time                                                     4         3
## Social interaction measurement                                    2         1
## Socioeconomic status                                              3         4
## Traditional Chinese medicine type                                 5         4
## Treatment                                                         1         0
## Type I diabetes mellitus                                          0         1
## Vesicle membrane                                                  3         2
## Vitiligo                                                          0         1
## Abnormal stool composition                                        0         0
## Acute myeloid leukemia                                            1         1
## Arthritis                                                         0         0
## Asthma                                                            1         1
## Biological sex                                                    1         1
## Bipolar disorder                                                  0         0
## Celiac disease                                                    0         0
## Clinical treatment                                                1         1
## Colorectal adenoma                                                2         2
## Contraception                                                     0         0
## COVID-19 symptoms measurement                                     0         0
## Disease progression measurement                                   0         0
## Functional abnormality of the gastrointestinal tract              0         0
## Gastric adenocarcinoma                                            0         0
## Head and neck squamous cell carcinoma                             0         0
## Health study participation                                        2         2
## HIV mother to child transmission                                  0         0
## Lactose intolerance                                               0         0
## Lifestyle measurement                                             2         2
## Lung transplantation                                              2         2
## Obsessive-compulsive disorder                                     0         0
## Ovarian cancer                                                    3         3
## Phenotype                                                         2         2
## Psoriasis vulgaris                                                0         0
## Response to diet                                                  3         3
## Response to ketogenic diet                                        2         2
## Sample collection protocol                                        0         0
## SARS coronavirus                                                  0         0
## Simian immunodeficiency virus infection                           0         0
## Smoking cessation                                                 0         0
## Transplant outcome measurement                                    0         0
## Viral load                                                        0         0
## Waist circumference                                               0         0
##                                                           unchanged
## Oxalate measurement                                               0
## COVID-19                                                         42
## Obesity                                                          58
## Polycystic ovary syndrome                                        12
## HIV infection                                                    26
## Smoking behaviour measurement                                     0
## Clostridium difficile infection                                   1
## Dry eye syndrome                                                 11
## Systemic inflammatory response syndrome                           4
## Treatment outcome measurement                                    66
## Chronic constipation                                             10
## Human papilloma virus infection                                  28
## Alzheimer's disease                                              24
## Gastric cancer                                                   26
## Ulcerative colitis                                                3
## Age                                                               9
## Aging                                                             0
## Balanced reciprocal translocation                                 0
## Atopic eczema                                                    72
## Autism spectrum disorder                                          8
## Cesarean section                                                 16
## Epilepsy                                                          5
## Lung cancer                                                       7
## Parkinson's disease                                              86
## Response to allogeneic hematopoietic stem cell transplant         0
## Response to transplant                                           23
## Urinary tract infection                                           8
## Cervical cancer                                                   5
## Diet                                                             63
## Helminthiasis                                                     8
## Population                                                       25
## Spontaneous preterm birth                                         5
## Acute lymphoblastic leukemia                                      5
## Acute pancreatitis                                                2
## Colitis                                                           1
## Colorectal cancer                                                44
## Ethnic group                                                      6
## Food allergy                                                     19
## Human immunodeficiency virus                                      6
## Hypertension                                                      6
## Periodontitis                                                    10
## Pregnancy                                                         2
## Response to antibiotic                                            8
## Alcohol drinking                                                  2
## Atopic asthma                                                     7
## Birth measurement                                                 4
## Constipation                                                      2
## Delivery method                                                   2
## Extraction protocol                                              20
## Male homosexuality                                                6
## Oral lichen planus                                                4
## SARS-CoV-2-related disease                                        4
## Schizophrenia                                                    14
## Type II diabetes mellitus                                        24
## Age at assessment                                                 1
## Antimicrobial agent                                              25
## Breed                                                             7
## Cervical glandular intraepithelial neoplasia                      9
## Chronic kidney disease                                            5
## Cognitive impairment                                              9
## Crohn's disease                                                   5
## Depressive disorder                                               4
## Diarrhea                                                          8
## Eczema                                                           10
## Endometrial cancer                                                3
## Esophageal adenocarcinoma                                         4
## Iron biomarker measurement                                        2
## Milk allergic reaction                                            5
## Papillary thyroid carcinoma                                      10
## Phenylketonuria                                                   4
## Response to anti-tuberculosis drug                               13
## Response to antiviral drug                                        5
## Response to immunochemotherapy                                    3
## Sampling site                                                     7
## Smoking behavior                                                 20
## Squamous cell carcinoma                                           4
## Streptococcus pneumoniae                                          4
## Stroke                                                           16
## Acute respiratory failure                                         0
## Air pollution                                                     3
## Anxiety disorder                                                  7
## Breast cancer                                                    16
## Breastfeeding duration                                            9
## Chlamydia trachomatis                                             2
## Chronic fatigue syndrome                                          4
## Chronic hepatitis B virus infection                               5
## Chronic obstructive pulmonary disease                             2
## Diabetes mellitus                                                 5
## Endometriosis                                                    14
## Esophageal cancer                                                 2
## Gestational diabetes                                             35
## Hepatocellular carcinoma                                          6
## Hypertrophy                                                       4
## Irritable bowel syndrome                                         22
## Multiple sclerosis                                               17
## Oral cavity carcinoma                                             7
## Oral squamous cell carcinoma                                      3
## Pancreatic carcinoma                                              4
## Psoriasis                                                        12
## Respiratory Syncytial Virus Infection                             5
## Respiratory tract infectious disease                              6
## Response to vaccine                                               5
## Rheumatoid arthritis                                              9
## Sample treatment protocol                                         4
## Sampling time                                                     5
## Social interaction measurement                                    6
## Socioeconomic status                                              8
## Traditional Chinese medicine type                                 6
## Treatment                                                         7
## Type I diabetes mellitus                                          6
## Vesicle membrane                                                  1
## Vitiligo                                                          4
## Abnormal stool composition                                        6
## Acute myeloid leukemia                                            4
## Arthritis                                                         6
## Asthma                                                           14
## Biological sex                                                    6
## Bipolar disorder                                                  5
## Celiac disease                                                    6
## Clinical treatment                                                5
## Colorectal adenoma                                               10
## Contraception                                                     5
## COVID-19 symptoms measurement                                     5
## Disease progression measurement                                   5
## Functional abnormality of the gastrointestinal tract             27
## Gastric adenocarcinoma                                            8
## Head and neck squamous cell carcinoma                             8
## Health study participation                                       35
## HIV mother to child transmission                                  8
## Lactose intolerance                                               5
## Lifestyle measurement                                             8
## Lung transplantation                                              2
## Obsessive-compulsive disorder                                     5
## Ovarian cancer                                                   27
## Phenotype                                                        19
## Psoriasis vulgaris                                               14
## Response to diet                                                 31
## Response to ketogenic diet                                        3
## Sample collection protocol                                        9
## SARS coronavirus                                                  6
## Simian immunodeficiency virus infection                           5
## Smoking cessation                                                 6
## Transplant outcome measurement                                   13
## Viral load                                                        6
## Waist circumference                                               5
tabDiv(exps, "Shannon", "Condition", perc = TRUE)
##                                                           increased decreased
## Oxalate measurement                                           0.000     1.000
## COVID-19                                                      0.120     0.320
## Obesity                                                       0.039     0.210
## Polycystic ovary syndrome                                     0.000     0.520
## HIV infection                                                 0.026     0.310
## Smoking behaviour measurement                                 0.130     0.870
## Clostridium difficile infection                               0.910     0.000
## Dry eye syndrome                                              0.043     0.480
## Systemic inflammatory response syndrome                       0.210     0.620
## Treatment outcome measurement                                 0.110     0.210
## Chronic constipation                                          0.470     0.000
## Human papilloma virus infection                               0.260     0.026
## Alzheimer's disease                                           0.056     0.280
## Gastric cancer                                                0.130     0.300
## Ulcerative colitis                                            0.077     0.690
## Age                                                           0.190     0.460
## Aging                                                         0.000     1.000
## Balanced reciprocal translocation                             1.000     0.000
## Atopic eczema                                                 0.057     0.120
## Autism spectrum disorder                                      0.440     0.062
## Cesarean section                                              0.270     0.000
## Epilepsy                                                      0.550     0.000
## Lung cancer                                                   0.120     0.470
## Parkinson's disease                                           0.170     0.120
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Response to transplant                                        0.190     0.320
## Urinary tract infection                                       0.000     0.430
## Cervical cancer                                               0.500     0.000
## Diet                                                          0.150     0.200
## Helminthiasis                                                 0.380     0.000
## Population                                                    0.059     0.210
## Spontaneous preterm birth                                     0.500     0.290
## Acute lymphoblastic leukemia                                  0.000     0.440
## Acute pancreatitis                                            0.000     0.670
## Colitis                                                       0.800     0.000
## Colorectal cancer                                             0.150     0.210
## Ethnic group                                                  0.190     0.440
## Food allergy                                                  0.220     0.074
## Human immunodeficiency virus                                  0.000     0.400
## Hypertension                                                  0.440     0.190
## Periodontitis                                                 0.310     0.062
## Pregnancy                                                     0.670     0.000
## Response to antibiotic                                        0.000     0.330
## Alcohol drinking                                              0.600     0.000
## Atopic asthma                                                 0.330     0.083
## Birth measurement                                             0.430     0.000
## Constipation                                                  0.550     0.270
## Delivery method                                               0.140     0.570
## Extraction protocol                                           0.330     0.380
## Male homosexuality                                            0.330     0.000
## Oral lichen planus                                            0.430     0.000
## SARS-CoV-2-related disease                                    0.000     0.430
## Schizophrenia                                                 0.053     0.210
## Type II diabetes mellitus                                     0.065     0.160
## Age at assessment                                             0.600     0.200
## Antimicrobial agent                                           0.190     0.230
## Breed                                                         0.000     0.220
## Cervical glandular intraepithelial neoplasia                  0.180     0.000
## Chronic kidney disease                                        0.180     0.360
## Cognitive impairment                                          0.077     0.230
## Crohn's disease                                               0.180     0.360
## Depressive disorder                                           0.000     0.330
## Diarrhea                                                      0.330     0.220
## Eczema                                                        0.000     0.170
## Endometrial cancer                                            0.440     0.220
## Esophageal adenocarcinoma                                     0.000     0.330
## Iron biomarker measurement                                    0.170     0.500
## Milk allergic reaction                                        0.290     0.000
## Papillary thyroid carcinoma                                   0.170     0.000
## Phenylketonuria                                               0.120     0.380
## Response to anti-tuberculosis drug                            0.260     0.320
## Response to antiviral drug                                    0.180     0.360
## Response to immunochemotherapy                                0.430     0.140
## Sampling site                                                 0.270     0.091
## Smoking behavior                                              0.260     0.210
## Squamous cell carcinoma                                       0.330     0.000
## Streptococcus pneumoniae                                      0.000     0.330
## Stroke                                                        0.110     0.000
## Acute respiratory failure                                     0.550     0.450
## Air pollution                                                 0.440     0.380
## Anxiety disorder                                              0.000     0.120
## Breast cancer                                                 0.130     0.170
## Breastfeeding duration                                        0.140     0.210
## Chlamydia trachomatis                                         0.200     0.400
## Chronic fatigue syndrome                                      0.000     0.200
## Chronic hepatitis B virus infection                           0.000     0.170
## Chronic obstructive pulmonary disease                         0.430     0.290
## Diabetes mellitus                                             0.000     0.170
## Endometriosis                                                 0.110     0.160
## Esophageal cancer                                             0.200     0.400
## Gestational diabetes                                          0.028     0.000
## Hepatocellular carcinoma                                      0.000     0.140
## Hypertrophy                                                   0.200     0.000
## Irritable bowel syndrome                                      0.150     0.180
## Multiple sclerosis                                            0.000     0.056
## Oral cavity carcinoma                                         0.000     0.120
## Oral squamous cell carcinoma                                  0.380     0.250
## Pancreatic carcinoma                                          0.000     0.200
## Psoriasis                                                     0.077     0.000
## Respiratory Syncytial Virus Infection                         0.000     0.170
## Respiratory tract infectious disease                          0.110     0.220
## Response to vaccine                                           0.170     0.000
## Rheumatoid arthritis                                          0.280     0.220
## Sample treatment protocol                                     0.200     0.000
## Sampling time                                                 0.330     0.250
## Social interaction measurement                                0.220     0.110
## Socioeconomic status                                          0.200     0.270
## Traditional Chinese medicine type                             0.330     0.270
## Treatment                                                     0.120     0.000
## Type I diabetes mellitus                                      0.000     0.140
## Vesicle membrane                                              0.500     0.330
## Vitiligo                                                      0.000     0.200
## Abnormal stool composition                                    0.000     0.000
## Acute myeloid leukemia                                        0.170     0.170
## Arthritis                                                     0.000     0.000
## Asthma                                                        0.062     0.062
## Biological sex                                                0.120     0.120
## Bipolar disorder                                              0.000     0.000
## Celiac disease                                                0.000     0.000
## Clinical treatment                                            0.140     0.140
## Colorectal adenoma                                            0.140     0.140
## Contraception                                                 0.000     0.000
## COVID-19 symptoms measurement                                 0.000     0.000
## Disease progression measurement                               0.000     0.000
## Functional abnormality of the gastrointestinal tract          0.000     0.000
## Gastric adenocarcinoma                                        0.000     0.000
## Head and neck squamous cell carcinoma                         0.000     0.000
## Health study participation                                    0.051     0.051
## HIV mother to child transmission                              0.000     0.000
## Lactose intolerance                                           0.000     0.000
## Lifestyle measurement                                         0.170     0.170
## Lung transplantation                                          0.330     0.330
## Obsessive-compulsive disorder                                 0.000     0.000
## Ovarian cancer                                                0.091     0.091
## Phenotype                                                     0.087     0.087
## Psoriasis vulgaris                                            0.000     0.000
## Response to diet                                              0.081     0.081
## Response to ketogenic diet                                    0.290     0.290
## Sample collection protocol                                    0.000     0.000
## SARS coronavirus                                              0.000     0.000
## Simian immunodeficiency virus infection                       0.000     0.000
## Smoking cessation                                             0.000     0.000
## Transplant outcome measurement                                0.000     0.000
## Viral load                                                    0.000     0.000
## Waist circumference                                           0.000     0.000
##                                                           unchanged
## Oxalate measurement                                           0.000
## COVID-19                                                      0.560
## Obesity                                                       0.750
## Polycystic ovary syndrome                                     0.480
## HIV infection                                                 0.670
## Smoking behaviour measurement                                 0.000
## Clostridium difficile infection                               0.091
## Dry eye syndrome                                              0.480
## Systemic inflammatory response syndrome                       0.170
## Treatment outcome measurement                                 0.670
## Chronic constipation                                          0.530
## Human papilloma virus infection                               0.720
## Alzheimer's disease                                           0.670
## Gastric cancer                                                0.570
## Ulcerative colitis                                            0.230
## Age                                                           0.350
## Aging                                                         0.000
## Balanced reciprocal translocation                             0.000
## Atopic eczema                                                 0.820
## Autism spectrum disorder                                      0.500
## Cesarean section                                              0.730
## Epilepsy                                                      0.450
## Lung cancer                                                   0.410
## Parkinson's disease                                           0.720
## Response to allogeneic hematopoietic stem cell transplant     0.000
## Response to transplant                                        0.490
## Urinary tract infection                                       0.570
## Cervical cancer                                               0.500
## Diet                                                          0.660
## Helminthiasis                                                 0.620
## Population                                                    0.740
## Spontaneous preterm birth                                     0.210
## Acute lymphoblastic leukemia                                  0.560
## Acute pancreatitis                                            0.330
## Colitis                                                       0.200
## Colorectal cancer                                             0.650
## Ethnic group                                                  0.380
## Food allergy                                                  0.700
## Human immunodeficiency virus                                  0.600
## Hypertension                                                  0.380
## Periodontitis                                                 0.620
## Pregnancy                                                     0.330
## Response to antibiotic                                        0.670
## Alcohol drinking                                              0.400
## Atopic asthma                                                 0.580
## Birth measurement                                             0.570
## Constipation                                                  0.180
## Delivery method                                               0.290
## Extraction protocol                                           0.290
## Male homosexuality                                            0.670
## Oral lichen planus                                            0.570
## SARS-CoV-2-related disease                                    0.570
## Schizophrenia                                                 0.740
## Type II diabetes mellitus                                     0.770
## Age at assessment                                             0.200
## Antimicrobial agent                                           0.580
## Breed                                                         0.780
## Cervical glandular intraepithelial neoplasia                  0.820
## Chronic kidney disease                                        0.450
## Cognitive impairment                                          0.690
## Crohn's disease                                               0.450
## Depressive disorder                                           0.670
## Diarrhea                                                      0.440
## Eczema                                                        0.830
## Endometrial cancer                                            0.330
## Esophageal adenocarcinoma                                     0.670
## Iron biomarker measurement                                    0.330
## Milk allergic reaction                                        0.710
## Papillary thyroid carcinoma                                   0.830
## Phenylketonuria                                               0.500
## Response to anti-tuberculosis drug                            0.420
## Response to antiviral drug                                    0.450
## Response to immunochemotherapy                                0.430
## Sampling site                                                 0.640
## Smoking behavior                                              0.530
## Squamous cell carcinoma                                       0.670
## Streptococcus pneumoniae                                      0.670
## Stroke                                                        0.890
## Acute respiratory failure                                     0.000
## Air pollution                                                 0.190
## Anxiety disorder                                              0.880
## Breast cancer                                                 0.700
## Breastfeeding duration                                        0.640
## Chlamydia trachomatis                                         0.400
## Chronic fatigue syndrome                                      0.800
## Chronic hepatitis B virus infection                           0.830
## Chronic obstructive pulmonary disease                         0.290
## Diabetes mellitus                                             0.830
## Endometriosis                                                 0.740
## Esophageal cancer                                             0.400
## Gestational diabetes                                          0.970
## Hepatocellular carcinoma                                      0.860
## Hypertrophy                                                   0.800
## Irritable bowel syndrome                                      0.670
## Multiple sclerosis                                            0.940
## Oral cavity carcinoma                                         0.880
## Oral squamous cell carcinoma                                  0.380
## Pancreatic carcinoma                                          0.800
## Psoriasis                                                     0.920
## Respiratory Syncytial Virus Infection                         0.830
## Respiratory tract infectious disease                          0.670
## Response to vaccine                                           0.830
## Rheumatoid arthritis                                          0.500
## Sample treatment protocol                                     0.800
## Sampling time                                                 0.420
## Social interaction measurement                                0.670
## Socioeconomic status                                          0.530
## Traditional Chinese medicine type                             0.400
## Treatment                                                     0.880
## Type I diabetes mellitus                                      0.860
## Vesicle membrane                                              0.170
## Vitiligo                                                      0.800
## Abnormal stool composition                                    1.000
## Acute myeloid leukemia                                        0.670
## Arthritis                                                     1.000
## Asthma                                                        0.880
## Biological sex                                                0.750
## Bipolar disorder                                              1.000
## Celiac disease                                                1.000
## Clinical treatment                                            0.710
## Colorectal adenoma                                            0.710
## Contraception                                                 1.000
## COVID-19 symptoms measurement                                 1.000
## Disease progression measurement                               1.000
## Functional abnormality of the gastrointestinal tract          1.000
## Gastric adenocarcinoma                                        1.000
## Head and neck squamous cell carcinoma                         1.000
## Health study participation                                    0.900
## HIV mother to child transmission                              1.000
## Lactose intolerance                                           1.000
## Lifestyle measurement                                         0.670
## Lung transplantation                                          0.330
## Obsessive-compulsive disorder                                 1.000
## Ovarian cancer                                                0.820
## Phenotype                                                     0.830
## Psoriasis vulgaris                                            1.000
## Response to diet                                              0.840
## Response to ketogenic diet                                    0.430
## Sample collection protocol                                    1.000
## SARS coronavirus                                              1.000
## Simian immunodeficiency virus infection                       1.000
## Smoking cessation                                             1.000
## Transplant outcome measurement                                1.000
## Viral load                                                    1.000
## Waist circumference                                           1.000
tabDiv(exps, "Richness", "Condition")
##                                                           increased decreased
## Treatment outcome measurement                                     5        22
## Diet                                                              4        19
## Helminthiasis                                                    13         0
## HIV infection                                                     3        15
## COVID-19                                                          9        20
## Chronic constipation                                              8         0
## Parkinson's disease                                              18        26
## Phenotype                                                         9         1
## Balanced reciprocal translocation                                 7         0
## Diarrhea                                                          8         1
## Head and neck squamous cell carcinoma                             0         7
## Polycystic ovary syndrome                                         0         7
## Increased intestinal transit time                                 6         0
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Alcohol drinking                                                  5         0
## Antimicrobial agent                                               2         7
## Human immunodeficiency virus                                      1         6
## Human papilloma virus infection                                   7         2
## Acute lymphoblastic leukemia                                      5         1
## Age                                                               1         5
## Air pollution                                                     9         5
## Cervical glandular intraepithelial neoplasia                      4         0
## Dry eye syndrome                                                  0         4
## Endometriosis                                                     4         0
## Epilepsy                                                          4         0
## Periodontitis                                                     5         1
## Schizophrenia                                                     1         5
## Vesicle membrane                                                  5         1
## Atopic asthma                                                     4         1
## Delivery method                                                   4         1
## Food allergy                                                      0         3
## Gastric cancer                                                    5         8
## Gestational diabetes                                              3         6
## Hypertrophy                                                       3         0
## Iron biomarker measurement                                        1         4
## Oral squamous cell carcinoma                                      1         4
## Response to transplant                                            5         8
## Asthma                                                            2         0
## Autism spectrum disorder                                          4         6
## Breast cancer                                                     2         0
## Colorectal cancer                                                 8        10
## Esophageal adenocarcinoma                                         0         2
## Hypertension                                                      1         3
## Phenylketonuria                                                   1         3
## Respiratory tract infectious disease                              3         1
## Smoking behavior                                                  6         8
## Smoking status measurement                                        2         0
## Streptococcus pneumoniae                                          0         2
## Traditional Chinese medicine type                                 1         3
## Transplant outcome measurement                                    0         2
## Treatment                                                         1         3
## Ulcerative colitis                                                1         3
## Alzheimer's disease                                               6         5
## Atopic eczema                                                     2         1
## Breastfeeding duration                                            1         0
## Cesarean section                                                  3         2
## Colorectal adenoma                                                1         2
## Constipation                                                      4         5
## Endometrial cancer                                                1         2
## Health study participation                                        1         0
## Inflammatory bowel disease                                        2         3
## Irritable bowel syndrome                                          4         5
## Lung cancer                                                       0         1
## Obesity                                                           8         9
## Obsessive-compulsive disorder                                     0         1
## Ovarian cancer                                                    1         0
## Psoriasis                                                         0         1
## Response to diet                                                  3         4
## Rheumatoid arthritis                                              3         4
## Sampling site                                                     1         2
## Socioeconomic status                                              2         1
## Transport                                                         1         2
## Type II diabetes mellitus                                         2         3
## Urinary tract infection                                           0         1
## Abnormal stool composition                                        0         0
## Chlamydia trachomatis                                             1         1
## Crohn's disease                                                   2         2
## Ethnic group                                                      2         2
## Functional abnormality of the gastrointestinal tract              0         0
## HIV mother to child transmission                                  0         0
## Male homosexuality                                                0         0
## Multiple sclerosis                                                0         0
## Papillary thyroid carcinoma                                       0         0
## Physical activity                                                 2         2
## Psoriasis vulgaris                                                0         0
## Sample collection protocol                                        0         0
## Smoking cessation                                                 0         0
## Stroke                                                            2         2
## Viral load                                                        0         0
##                                                           unchanged
## Treatment outcome measurement                                    47
## Diet                                                             30
## Helminthiasis                                                     0
## HIV infection                                                    10
## COVID-19                                                         24
## Chronic constipation                                              4
## Parkinson's disease                                              28
## Phenotype                                                        11
## Balanced reciprocal translocation                                 0
## Diarrhea                                                          4
## Head and neck squamous cell carcinoma                             4
## Polycystic ovary syndrome                                         1
## Increased intestinal transit time                                 0
## Response to allogeneic hematopoietic stem cell transplant         0
## Alcohol drinking                                                  0
## Antimicrobial agent                                              10
## Human immunodeficiency virus                                      2
## Human papilloma virus infection                                  12
## Acute lymphoblastic leukemia                                      0
## Age                                                               1
## Air pollution                                                     6
## Cervical glandular intraepithelial neoplasia                      2
## Dry eye syndrome                                                  3
## Endometriosis                                                     8
## Epilepsy                                                          1
## Periodontitis                                                     6
## Schizophrenia                                                     8
## Vesicle membrane                                                  0
## Atopic asthma                                                     7
## Delivery method                                                   1
## Food allergy                                                      9
## Gastric cancer                                                   14
## Gestational diabetes                                             25
## Hypertrophy                                                       2
## Iron biomarker measurement                                        1
## Oral squamous cell carcinoma                                      0
## Response to transplant                                           11
## Asthma                                                           10
## Autism spectrum disorder                                          0
## Breast cancer                                                     7
## Colorectal cancer                                                21
## Esophageal adenocarcinoma                                         4
## Hypertension                                                      6
## Phenylketonuria                                                   4
## Respiratory tract infectious disease                              1
## Smoking behavior                                                  8
## Smoking status measurement                                        3
## Streptococcus pneumoniae                                          3
## Traditional Chinese medicine type                                 4
## Transplant outcome measurement                                    5
## Treatment                                                         6
## Ulcerative colitis                                                1
## Alzheimer's disease                                              23
## Atopic eczema                                                     6
## Breastfeeding duration                                            9
## Cesarean section                                                 10
## Colorectal adenoma                                               11
## Constipation                                                      8
## Endometrial cancer                                                3
## Health study participation                                       28
## Inflammatory bowel disease                                        0
## Irritable bowel syndrome                                         14
## Lung cancer                                                      10
## Obesity                                                          19
## Obsessive-compulsive disorder                                     4
## Ovarian cancer                                                   30
## Psoriasis                                                         8
## Response to diet                                                  8
## Rheumatoid arthritis                                              1
## Sampling site                                                     2
## Socioeconomic status                                              2
## Transport                                                         3
## Type II diabetes mellitus                                        10
## Urinary tract infection                                           6
## Abnormal stool composition                                        6
## Chlamydia trachomatis                                             3
## Crohn's disease                                                   2
## Ethnic group                                                      1
## Functional abnormality of the gastrointestinal tract             27
## HIV mother to child transmission                                  8
## Male homosexuality                                                9
## Multiple sclerosis                                               17
## Papillary thyroid carcinoma                                      12
## Physical activity                                                 1
## Psoriasis vulgaris                                               14
## Sample collection protocol                                        9
## Smoking cessation                                                 6
## Stroke                                                           17
## Viral load                                                        5
tabDiv(exps, "Richness", "Condition", perc = TRUE)
##                                                           increased decreased
## Treatment outcome measurement                                 0.068     0.300
## Diet                                                          0.075     0.360
## Helminthiasis                                                 1.000     0.000
## HIV infection                                                 0.110     0.540
## COVID-19                                                      0.170     0.380
## Chronic constipation                                          0.670     0.000
## Parkinson's disease                                           0.250     0.360
## Phenotype                                                     0.430     0.048
## Balanced reciprocal translocation                             1.000     0.000
## Diarrhea                                                      0.620     0.077
## Head and neck squamous cell carcinoma                         0.000     0.640
## Polycystic ovary syndrome                                     0.000     0.880
## Increased intestinal transit time                             1.000     0.000
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Alcohol drinking                                              1.000     0.000
## Antimicrobial agent                                           0.110     0.370
## Human immunodeficiency virus                                  0.110     0.670
## Human papilloma virus infection                               0.330     0.095
## Acute lymphoblastic leukemia                                  0.830     0.170
## Age                                                           0.140     0.710
## Air pollution                                                 0.450     0.250
## Cervical glandular intraepithelial neoplasia                  0.670     0.000
## Dry eye syndrome                                              0.000     0.570
## Endometriosis                                                 0.330     0.000
## Epilepsy                                                      0.800     0.000
## Periodontitis                                                 0.420     0.083
## Schizophrenia                                                 0.071     0.360
## Vesicle membrane                                              0.830     0.170
## Atopic asthma                                                 0.330     0.083
## Delivery method                                               0.670     0.170
## Food allergy                                                  0.000     0.250
## Gastric cancer                                                0.190     0.300
## Gestational diabetes                                          0.088     0.180
## Hypertrophy                                                   0.600     0.000
## Iron biomarker measurement                                    0.170     0.670
## Oral squamous cell carcinoma                                  0.200     0.800
## Response to transplant                                        0.210     0.330
## Asthma                                                        0.170     0.000
## Autism spectrum disorder                                      0.400     0.600
## Breast cancer                                                 0.220     0.000
## Colorectal cancer                                             0.210     0.260
## Esophageal adenocarcinoma                                     0.000     0.330
## Hypertension                                                  0.100     0.300
## Phenylketonuria                                               0.120     0.380
## Respiratory tract infectious disease                          0.600     0.200
## Smoking behavior                                              0.270     0.360
## Smoking status measurement                                    0.400     0.000
## Streptococcus pneumoniae                                      0.000     0.400
## Traditional Chinese medicine type                             0.120     0.380
## Transplant outcome measurement                                0.000     0.290
## Treatment                                                     0.100     0.300
## Ulcerative colitis                                            0.200     0.600
## Alzheimer's disease                                           0.180     0.150
## Atopic eczema                                                 0.220     0.110
## Breastfeeding duration                                        0.100     0.000
## Cesarean section                                              0.200     0.130
## Colorectal adenoma                                            0.071     0.140
## Constipation                                                  0.240     0.290
## Endometrial cancer                                            0.170     0.330
## Health study participation                                    0.034     0.000
## Inflammatory bowel disease                                    0.400     0.600
## Irritable bowel syndrome                                      0.170     0.220
## Lung cancer                                                   0.000     0.091
## Obesity                                                       0.220     0.250
## Obsessive-compulsive disorder                                 0.000     0.200
## Ovarian cancer                                                0.032     0.000
## Psoriasis                                                     0.000     0.110
## Response to diet                                              0.200     0.270
## Rheumatoid arthritis                                          0.380     0.500
## Sampling site                                                 0.200     0.400
## Socioeconomic status                                          0.400     0.200
## Transport                                                     0.170     0.330
## Type II diabetes mellitus                                     0.130     0.200
## Urinary tract infection                                       0.000     0.140
## Abnormal stool composition                                    0.000     0.000
## Chlamydia trachomatis                                         0.200     0.200
## Crohn's disease                                               0.330     0.330
## Ethnic group                                                  0.400     0.400
## Functional abnormality of the gastrointestinal tract          0.000     0.000
## HIV mother to child transmission                              0.000     0.000
## Male homosexuality                                            0.000     0.000
## Multiple sclerosis                                            0.000     0.000
## Papillary thyroid carcinoma                                   0.000     0.000
## Physical activity                                             0.400     0.400
## Psoriasis vulgaris                                            0.000     0.000
## Sample collection protocol                                    0.000     0.000
## Smoking cessation                                             0.000     0.000
## Stroke                                                        0.095     0.095
## Viral load                                                    0.000     0.000
##                                                           unchanged
## Treatment outcome measurement                                  0.64
## Diet                                                           0.57
## Helminthiasis                                                  0.00
## HIV infection                                                  0.36
## COVID-19                                                       0.45
## Chronic constipation                                           0.33
## Parkinson's disease                                            0.39
## Phenotype                                                      0.52
## Balanced reciprocal translocation                              0.00
## Diarrhea                                                       0.31
## Head and neck squamous cell carcinoma                          0.36
## Polycystic ovary syndrome                                      0.12
## Increased intestinal transit time                              0.00
## Response to allogeneic hematopoietic stem cell transplant      0.00
## Alcohol drinking                                               0.00
## Antimicrobial agent                                            0.53
## Human immunodeficiency virus                                   0.22
## Human papilloma virus infection                                0.57
## Acute lymphoblastic leukemia                                   0.00
## Age                                                            0.14
## Air pollution                                                  0.30
## Cervical glandular intraepithelial neoplasia                   0.33
## Dry eye syndrome                                               0.43
## Endometriosis                                                  0.67
## Epilepsy                                                       0.20
## Periodontitis                                                  0.50
## Schizophrenia                                                  0.57
## Vesicle membrane                                               0.00
## Atopic asthma                                                  0.58
## Delivery method                                                0.17
## Food allergy                                                   0.75
## Gastric cancer                                                 0.52
## Gestational diabetes                                           0.74
## Hypertrophy                                                    0.40
## Iron biomarker measurement                                     0.17
## Oral squamous cell carcinoma                                   0.00
## Response to transplant                                         0.46
## Asthma                                                         0.83
## Autism spectrum disorder                                       0.00
## Breast cancer                                                  0.78
## Colorectal cancer                                              0.54
## Esophageal adenocarcinoma                                      0.67
## Hypertension                                                   0.60
## Phenylketonuria                                                0.50
## Respiratory tract infectious disease                           0.20
## Smoking behavior                                               0.36
## Smoking status measurement                                     0.60
## Streptococcus pneumoniae                                       0.60
## Traditional Chinese medicine type                              0.50
## Transplant outcome measurement                                 0.71
## Treatment                                                      0.60
## Ulcerative colitis                                             0.20
## Alzheimer's disease                                            0.68
## Atopic eczema                                                  0.67
## Breastfeeding duration                                         0.90
## Cesarean section                                               0.67
## Colorectal adenoma                                             0.79
## Constipation                                                   0.47
## Endometrial cancer                                             0.50
## Health study participation                                     0.97
## Inflammatory bowel disease                                     0.00
## Irritable bowel syndrome                                       0.61
## Lung cancer                                                    0.91
## Obesity                                                        0.53
## Obsessive-compulsive disorder                                  0.80
## Ovarian cancer                                                 0.97
## Psoriasis                                                      0.89
## Response to diet                                               0.53
## Rheumatoid arthritis                                           0.12
## Sampling site                                                  0.40
## Socioeconomic status                                           0.40
## Transport                                                      0.50
## Type II diabetes mellitus                                      0.67
## Urinary tract infection                                        0.86
## Abnormal stool composition                                     1.00
## Chlamydia trachomatis                                          0.60
## Crohn's disease                                                0.33
## Ethnic group                                                   0.20
## Functional abnormality of the gastrointestinal tract           1.00
## HIV mother to child transmission                               1.00
## Male homosexuality                                             1.00
## Multiple sclerosis                                             1.00
## Papillary thyroid carcinoma                                    1.00
## Physical activity                                              0.20
## Psoriasis vulgaris                                             1.00
## Sample collection protocol                                     1.00
## Smoking cessation                                              1.00
## Stroke                                                         0.81
## Viral load                                                     1.00

Body sites with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Body site")
##                                increased decreased unchanged
## Feces                                297       441      1169
## Vagina                                16         6        27
## Posterior fornix of vagina             9         0         7
## Skin of body                           7        15         8
## Uterine cervix                         9         1        20
## Uterine cervix,Vaginal fluid           9         1         0
## Buccal epithelium                      0         7         0
## Saliva                                36        43       122
## Subgingival dental plaque              9         3        20
## Buccal mucosa                          5         0         2
## Meconium                               5         0        10
## Space surrounding organism             2         7        13
## Stomach                                5        10         5
## Tongue                                 0         5        12
## Axilla skin                            5         1        11
## Tear film                              0         4         1
## Throat                                 0         4         8
## Caecum                                 4         7        22
## Cecum mucosa                           1         4         6
## Colorectal mucosa                      0         3         8
## Dental plaque                          0         3         3
## Duodenum                               0         3         6
## Nasopharynx                            3         6        32
## Rectum                                 0         3        12
## Skin of forearm                        3         0         3
## Bile                                   2         0         3
## Brachialis muscle                      0         2         3
## Conjunctiva                            1         3         6
## Conjunctival sac                       1         3         1
## Esophagus                              0         2         4
## Forelimb skin                          2         0         4
## Lung                                   2         4         7
## Mouth                                  8         6        28
## Oropharynx                             2         4         6
## Rumen                                  2         0         4
## Supragingival dental plaque            1         3         1
## Thyroid gland                          2         0        10
## Uterus                                 3         1        11
## Blood                                  0         1         6
## Breast                                 3         4         4
## Breast,Milk                            1         0         4
## Bulbar conjunctiva                     3         2         5
## Colon                                  3         2        19
## Ileum                                  1         0        11
## Nasal cavity                           0         1         5
## Small intestine                        3         4         1
## Vagina,Uterine cervix                  3         2         7
## Vaginal fluid                          1         0         8
## Bronchus                               0         0         6
## Endothelium of trachea                 3         3         0
## Internal cheek pouch                   0         0        11
## Intestine                              1         1        16
## Jejunum                                1         1         8
## Milk                                   0         0         9
## Oral cavity                            5         5         7
## Ovary                                  0         0         7
## Peritoneal fluid                       0         0         6
## Posterior wall of oropharynx           2         2         1
## Skin of abdomen                        0         0         5
## Sputum                                 6         6         8
## Surface of tongue                      2         2         3
## Urine                                  1         1        16
## Ventral side of post-anal tail         0         0         6
tabDiv(exps, "Shannon", "Body site", perc = TRUE)
##                                increased decreased unchanged
## Feces                              0.160     0.230      0.61
## Vagina                             0.330     0.120      0.55
## Posterior fornix of vagina         0.560     0.000      0.44
## Skin of body                       0.230     0.500      0.27
## Uterine cervix                     0.300     0.033      0.67
## Uterine cervix,Vaginal fluid       0.900     0.100      0.00
## Buccal epithelium                  0.000     1.000      0.00
## Saliva                             0.180     0.210      0.61
## Subgingival dental plaque          0.280     0.094      0.62
## Buccal mucosa                      0.710     0.000      0.29
## Meconium                           0.330     0.000      0.67
## Space surrounding organism         0.091     0.320      0.59
## Stomach                            0.250     0.500      0.25
## Tongue                             0.000     0.290      0.71
## Axilla skin                        0.290     0.059      0.65
## Tear film                          0.000     0.800      0.20
## Throat                             0.000     0.330      0.67
## Caecum                             0.120     0.210      0.67
## Cecum mucosa                       0.091     0.360      0.55
## Colorectal mucosa                  0.000     0.270      0.73
## Dental plaque                      0.000     0.500      0.50
## Duodenum                           0.000     0.330      0.67
## Nasopharynx                        0.073     0.150      0.78
## Rectum                             0.000     0.200      0.80
## Skin of forearm                    0.500     0.000      0.50
## Bile                               0.400     0.000      0.60
## Brachialis muscle                  0.000     0.400      0.60
## Conjunctiva                        0.100     0.300      0.60
## Conjunctival sac                   0.200     0.600      0.20
## Esophagus                          0.000     0.330      0.67
## Forelimb skin                      0.330     0.000      0.67
## Lung                               0.150     0.310      0.54
## Mouth                              0.190     0.140      0.67
## Oropharynx                         0.170     0.330      0.50
## Rumen                              0.330     0.000      0.67
## Supragingival dental plaque        0.200     0.600      0.20
## Thyroid gland                      0.170     0.000      0.83
## Uterus                             0.200     0.067      0.73
## Blood                              0.000     0.140      0.86
## Breast                             0.270     0.360      0.36
## Breast,Milk                        0.200     0.000      0.80
## Bulbar conjunctiva                 0.300     0.200      0.50
## Colon                              0.120     0.083      0.79
## Ileum                              0.083     0.000      0.92
## Nasal cavity                       0.000     0.170      0.83
## Small intestine                    0.380     0.500      0.12
## Vagina,Uterine cervix              0.250     0.170      0.58
## Vaginal fluid                      0.110     0.000      0.89
## Bronchus                           0.000     0.000      1.00
## Endothelium of trachea             0.500     0.500      0.00
## Internal cheek pouch               0.000     0.000      1.00
## Intestine                          0.056     0.056      0.89
## Jejunum                            0.100     0.100      0.80
## Milk                               0.000     0.000      1.00
## Oral cavity                        0.290     0.290      0.41
## Ovary                              0.000     0.000      1.00
## Peritoneal fluid                   0.000     0.000      1.00
## Posterior wall of oropharynx       0.400     0.400      0.20
## Skin of abdomen                    0.000     0.000      1.00
## Sputum                             0.300     0.300      0.40
## Surface of tongue                  0.290     0.290      0.43
## Urine                              0.056     0.056      0.89
## Ventral side of post-anal tail     0.000     0.000      1.00
tabDiv(exps, "Richness", "Body site")
##                              increased decreased unchanged
## Feces                              187       248       604
## Mouth                               10         3         9
## Posterior fornix of vagina           8         1         2
## Uterine cervix                       8         1        11
## Skin of body                         3         9         6
## Rectum                               0         5         7
## Subgingival dental plaque            7         2        17
## Uterine cervix,Vaginal fluid         7         2         1
## Nasopharynx                          5         9        19
## Oropharynx                           3         7         5
## Stomach                              4         8         3
## Throat                               1         5         5
## Cecum mucosa                         3         6         3
## Oral cavity                          2         5         0
## Small intestine                      1         4         0
## Colon                                6         4        11
## Ear                                  2         0         3
## Esophagus                            0         2         4
## Saliva                              20        22        43
## Surface of tongue                    4         2         1
## Caecum                               2         3         1
## Ileum                                2         1         9
## Meconium                             2         3         7
## Milk                                 2         1         5
## Nasal cavity                         1         2        10
## Urine                                3         2        12
## Vagina                               3         2        11
## Vagina,Uterine cervix                1         0        11
## Breast                               1         1         7
## Bronchus                             0         0         6
## Conjunctiva                          1         1         5
## Internal cheek pouch                 0         0         7
## Intestine                            0         0        15
## Ovary                                0         0         7
## Peritoneal fluid                     0         0         6
## Thyroid gland                        0         0        12
## Tongue                               2         2         7
tabDiv(exps, "Richness", "Body site", perc = TRUE)
##                              increased decreased unchanged
## Feces                            0.180     0.240      0.58
## Mouth                            0.450     0.140      0.41
## Posterior fornix of vagina       0.730     0.091      0.18
## Uterine cervix                   0.400     0.050      0.55
## Skin of body                     0.170     0.500      0.33
## Rectum                           0.000     0.420      0.58
## Subgingival dental plaque        0.270     0.077      0.65
## Uterine cervix,Vaginal fluid     0.700     0.200      0.10
## Nasopharynx                      0.150     0.270      0.58
## Oropharynx                       0.200     0.470      0.33
## Stomach                          0.270     0.530      0.20
## Throat                           0.091     0.450      0.45
## Cecum mucosa                     0.250     0.500      0.25
## Oral cavity                      0.290     0.710      0.00
## Small intestine                  0.200     0.800      0.00
## Colon                            0.290     0.190      0.52
## Ear                              0.400     0.000      0.60
## Esophagus                        0.000     0.330      0.67
## Saliva                           0.240     0.260      0.51
## Surface of tongue                0.570     0.290      0.14
## Caecum                           0.330     0.500      0.17
## Ileum                            0.170     0.083      0.75
## Meconium                         0.170     0.250      0.58
## Milk                             0.250     0.120      0.62
## Nasal cavity                     0.077     0.150      0.77
## Urine                            0.180     0.120      0.71
## Vagina                           0.190     0.120      0.69
## Vagina,Uterine cervix            0.083     0.000      0.92
## Breast                           0.110     0.110      0.78
## Bronchus                         0.000     0.000      1.00
## Conjunctiva                      0.140     0.140      0.71
## Internal cheek pouch             0.000     0.000      1.00
## Intestine                        0.000     0.000      1.00
## Ovary                            0.000     0.000      1.00
## Peritoneal fluid                 0.000     0.000      1.00
## Thyroid gland                    0.000     0.000      1.00
## Tongue                           0.180     0.180      0.64

Signature stats

sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")

Unique microbes

Number unique microbes contained in the signatures:

(nuniq <- length(unique(unlist(sigs))))
## [1] 7792

Development of unique microbes captured over time:

Microbe set size distribution

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   5.000   8.277  10.000 470.000
gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
    xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())

sum(lengths(sigs) > 4)
## [1] 4297

Microbe co-occurrence

dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus")
## Loading required namespace: safe

antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus", antagonistic = TRUE)

Get the top 20 genera most frequently reported as differentially abundant:

sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname", 
                            tax.level = "genus", exact.tax.level = FALSE) 
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              902              605              600              536 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              526              504              475              471 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              468              450              435              406 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              353              334              327              308 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              283              278              277              263

Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:

all(names(top20) %in% rownames(cooc.mat))
## [1] TRUE
cooc.mat <- cooc.mat[names(top20), names(top20)]
all(names(top20) %in% rownames(antag.mat))
## [1] TRUE
antag.mat <- antag.mat[names(top20), names(top20)]

Distinguish by direction of abundance change (increased / decreased):

# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                               tax.level = "genus", exact.tax.level = FALSE) 
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              429              294              218              276 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              237              205              307              155 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              247              211              274              172 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              216              138              120              131 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              178               96              198              182
# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                                 tax.level = "genus", exact.tax.level = FALSE) 
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              466              303              376              255 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              283              294              160              310 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              215              236              159              228 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              133              190              201              171 
##      Veillonella      Lachnospira     Enterococcus      Escherichia 
##              102              176               77               75

Plot the heatmap

# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
                                                  col = c("#D55E00", "#0072B2")),
                                   height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)

lgd_list <- list(
    Legend(labels = c("increased", "decreased"), 
           title = "Abundance in Group 1", 
           type = "grid",
           legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
                                            
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
                               col = col, 
                               name = "Relative frequency (top)",
                               cluster_columns = FALSE, 
                               row_km = 3, 
                               row_title = "same direction", 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
                               col = col, 
                               name = "Relative frequency (bottom)",
                               cluster_columns = FALSE, 
                               row_title = "opposite direction", 
                               row_km = 3, 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan", 
                                tax.level = "genus", exact.tax.level = FALSE) 
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
                                    tax.id.type = "taxname",
                                    tax.level = "phylum",
                                    exact.tax.level = FALSE) 
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)

uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
                                 col = pcols[phyla.grid],
                                 cluster_columns = FALSE,
                                 column_names_rot = 45,
                                 column_names_gp = gpar(fontsize = 8))

# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)

decorate_annotation("Abundance in Group 1", {
    grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
    grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
    grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
    grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
    grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
    grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
    grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})

Signature similarity

Jaccard index

Inspect signature similarity for signatures from stomach samples based on Jaccard index:

stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)

Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).

jdist <- as.dist(1 - pair.jsim)
plot(hclust(jdist))