Skip to contents

Reading data

Get bulk export from bugsigdb.org:

full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)
## [1] 8760   50
colnames(full.dat)
##  [1] "BSDB ID"                    "Study"                     
##  [3] "Study design"               "PMID"                      
##  [5] "DOI"                        "URL"                       
##  [7] "Authors list"               "Title"                     
##  [9] "Journal"                    "Year"                      
## [11] "Keywords"                   "Experiment"                
## [13] "Location of subjects"       "Host species"              
## [15] "Body site"                  "UBERON ID"                 
## [17] "Condition"                  "EFO ID"                    
## [19] "Group 0 name"               "Group 1 name"              
## [21] "Group 1 definition"         "Group 0 sample size"       
## [23] "Group 1 sample size"        "Antibiotics exclusion"     
## [25] "Sequencing type"            "16S variable region"       
## [27] "Sequencing platform"        "Statistical test"          
## [29] "Significance threshold"     "MHT correction"            
## [31] "LDA Score above"            "Matched on"                
## [33] "Confounders controlled for" "Pielou"                    
## [35] "Shannon"                    "Chao1"                     
## [37] "Simpson"                    "Inverse Simpson"           
## [39] "Richness"                   "Signature page name"       
## [41] "Source"                     "Curated date"              
## [43] "Curator"                    "Revision editor"           
## [45] "Description"                "Abundance in Group 1"      
## [47] "MetaPhlAn taxon names"      "NCBI Taxonomy IDs"         
## [49] "State"                      "Reviewer"

Stripping illformed entries:

is.study <- !is.na(full.dat[["Study"]])
is.exp <- !is.na(full.dat[["Experiment"]])
full.dat <- full.dat[is.study & is.exp, ]

Curation output

Number of papers and signatures curated:

pmids <- unique(full.dat[,"PMID"])
length(pmids)
## [1] 1422
nrow(full.dat)
## [1] 8760

Publication date of the curated papers:

pmids <- pmids[!is.na(pmids)]
pubyear <- pmid2pubyear(pmids)
head(cbind(pmids, pubyear))
tab <- table(pubyear)
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers", 
          label = TRUE, fill = "steelblue",
          ggtheme = theme_bw())

Stripping empty signatures:

ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)
## [1] 8760

Papers containing only empty UP and DOWN signatures (under curation?):

setdiff(pmids, unique(dat[,"PMID"]))
## numeric(0)

Progress over time:

dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)

plotProgressOverTime(dat, diff = TRUE)

Stratified by curator:

npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)

Number of complete and revised signatures: Turned off because it’s way too long these days

table(dat[["State"]])
table(dat[,"Revision editor"])

Study stats

Study design

spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)
##                                       case-control,prospective cohort 
##                                                                     1 
##                              laboratory experiment,prospective cohort 
##                                                                     1 
## cross-sectional observational, not case-control,laboratory experiment 
##                                                                     2 
##    cross-sectional observational, not case-control,prospective cohort 
##                                                                     2 
##                                   laboratory experiment,meta-analysis 
##                                                                     2 
##           prospective cohort,time series / longitudinal observational 
##                                                                     2 
##                 case-control,time series / longitudinal observational 
##                                                                     3 
##                                            case-control,meta-analysis 
##                                                                     5 
##        laboratory experiment,time series / longitudinal observational 
##                                                                     5 
##                                    case-control,laboratory experiment 
##                                                                     6 
##                                                         meta-analysis 
##                                                                    18 
##                                           randomized controlled trial 
##                                                                    66 
##                                                    prospective cohort 
##                                                                   114 
##                              time series / longitudinal observational 
##                                                                   127 
##                                                 laboratory experiment 
##                                                                   148 
##                       cross-sectional observational, not case-control 
##                                                                   384 
##                                                          case-control 
##                                                                   550

Experiment stats

Columns of the full dataset that describe experiments:

# Experiment ID
exp.cols <- c("Study", "Experiment")

# Subjects
sub.cols <- c("Host species",    
              "Location of subjects", 
              "Body site",
              "Condition", 
              "Antibiotics exclusion",
              "Group 0 sample size",
              "Group 1 sample size")

# Lab analysis              
lab.cols <-  c("Sequencing type",
              "16S variable region",
              "Sequencing platform")

# Statistical analysis
stat.cols <-  c("Statistical test",
              "MHT correction",
              "Significance threshold")

# Alpha diversity
div.cols <- c("Pielou",
              "Shannon",
              "Chao1",
              "Simpson", 
              "Inverse Simpson",
              "Richness")

Restrict dataset to experiment information:

exps <- dat[,c(exp.cols, sub.cols, lab.cols, stat.cols, div.cols)]
exps <- unique(exps)

Subjects

Number of experiments for the top 10 categories for each subjects column:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                   4227                    570                    109 
##  Sus scrofa domesticus Canis lupus familiaris          Not specified 
##                     78                     73                     33 
##             Ovis aries             Bos taurus           Capra hircus 
##                     24                     18                     14 
##          Gallus gallus 
##                     14 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                     1697                      939                      168 
##                    Japan                  Denmark                    Italy 
##                      164                      150                      128 
##                Australia              Netherlands              South Korea 
##                      115                      111                       96 
##                    Spain 
##                       96 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                      3351                       292                       103 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                        67                        55                        49 
##              Skin of body            Uterine cervix                     Colon 
##                        47                        47                        39 
##                    Throat 
##                        37 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                           192                           149 
##                       Obesity             Colorectal cancer 
##                           140                           138 
## Treatment outcome measurement                      COVID-19 
##                           132                           116 
##                 Atopic eczema           Antimicrobial agent 
##                            90                            85 
##           Alzheimer's disease           Extraction protocol 
##                            79                            69 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                      577                      485                      241 
##                 6 months                  2 weeks                 3 Months 
##                      177                      130                       43 
##                 6 Months currently on antibiotics                  1 Month 
##                       40                       28                       27 
##                3 Months. 
##                       25

Proportions instead:

sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
## 
##           Homo sapiens           Mus musculus      Rattus norvegicus 
##                0.79900                0.10800                0.02060 
##  Sus scrofa domesticus Canis lupus familiaris          Not specified 
##                0.01470                0.01380                0.00624 
##             Ovis aries             Bos taurus           Capra hircus 
##                0.00454                0.00340                0.00265 
##          Gallus gallus 
##                0.00265 
## 
## $`Location of subjects`
## 
##                    China United States of America                  Germany 
##                   0.3210                   0.1780                   0.0318 
##                    Japan                  Denmark                    Italy 
##                   0.0311                   0.0284                   0.0242 
##                Australia              Netherlands              South Korea 
##                   0.0218                   0.0210                   0.0182 
##                    Spain 
##                   0.0182 
## 
## $`Body site`
## 
##                     Feces                    Saliva                    Vagina 
##                   0.63400                   0.05530                   0.01950 
##                     Mouth               Nasopharynx Subgingival dental plaque 
##                   0.01270                   0.01040                   0.00927 
##              Skin of body            Uterine cervix                     Colon 
##                   0.00889                   0.00889                   0.00738 
##                    Throat 
##                   0.00700 
## 
## $Condition
## 
##           Parkinson's disease                          Diet 
##                        0.0374                        0.0290 
##                       Obesity             Colorectal cancer 
##                        0.0273                        0.0269 
## Treatment outcome measurement                      COVID-19 
##                        0.0257                        0.0226 
##                 Atopic eczema           Antimicrobial agent 
##                        0.0175                        0.0166 
##           Alzheimer's disease           Extraction protocol 
##                        0.0154                        0.0134 
## 
## $`Antibiotics exclusion`
## 
##                 3 months                  1 month                 2 months 
##                   0.2460                   0.2060                   0.1030 
##                 6 months                  2 weeks                 3 Months 
##                   0.0753                   0.0553                   0.0183 
##                 6 Months currently on antibiotics                  1 Month 
##                   0.0170                   0.0119                   0.0115 
##                3 Months. 
##                   0.0106

Sample size:

ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize
##         Group 0 sample size Group 1 sample size
## Min.                 0.0000             1.00000
## 1st Qu.             12.0000            10.00000
## Median              24.0000            22.00000
## Mean               567.8895            68.43638
## 3rd Qu.             50.0000            43.00000
## Max.            308633.0000         10413.00000
## NA's               703.0000           700.00000

Lab analysis

Number of experiments for the top 10 categories for each lab analysis column:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##       4234        763         58         25          5 
## 
## $`16S variable region`
## 
##   34    4   12  123   45  345    3   56  678   23 
## 1693 1199  279  213  150  139   61   43   31   21 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                        4221                         328 
##                 Ion Torrent                     RT-qPCR 
##                         200                         105 
##                 MGISEQ-2000                   PacBio RS 
##                          37                          24 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                          18                          18 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                          16                          11

Proportions instead:

lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
## 
##        16S        WMS        PCR ITS / ITS2        18S 
##   0.833000   0.150000   0.011400   0.004920   0.000983 
## 
## $`16S variable region`
## 
##      34       4      12     123      45     345       3      56     678      23 
## 0.43100 0.30500 0.07100 0.05420 0.03820 0.03540 0.01550 0.01090 0.00789 0.00534 
## 
## $`Sequencing platform`
## 
##                    Illumina                    Roche454 
##                     0.84100                     0.06540 
##                 Ion Torrent                     RT-qPCR 
##                     0.03990                     0.02090 
##                 MGISEQ-2000                   PacBio RS 
##                     0.00737                     0.00478 
##       BGISEQ-500 Sequencing           Mass spectrometry 
##                     0.00359                     0.00359 
## Human Intestinal Tract Chip           Illumina,Roche454 
##                     0.00319                     0.00219

Statistical analysis

Number of experiments for the top 10 categories for each statistical analysis column:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                    1567                     825                     508 
##         Kruskall-Wallis                   ANOVA       Linear Regression 
##                     278                     238                     230 
##                  T-Test                MaAsLin2                   ANCOM 
##                     201                     182                     167 
##     Logistic Regression 
##                      85 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##  2661  2010 
## 
## $`Significance threshold`
## 
##  0.05   0.1  0.01 0.001  0.25   0.2  0.15 0.005     2 1e-04 
##  4426   330   108    34    34    25    19    15    15     6

Proportions instead:

stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
## 
##                   LEfSe Mann-Whitney (Wilcoxon)                  DESeq2 
##                  0.3110                  0.1640                  0.1010 
##         Kruskall-Wallis                   ANOVA       Linear Regression 
##                  0.0551                  0.0472                  0.0456 
##                  T-Test                MaAsLin2                   ANCOM 
##                  0.0399                  0.0361                  0.0331 
##     Logistic Regression 
##                  0.0169 
## 
## $`MHT correction`
## 
##  TRUE FALSE 
##  0.57  0.43 
## 
## $`Significance threshold`
## 
##    0.05     0.1    0.01   0.001    0.25     0.2    0.15   0.005       2   1e-04 
## 0.87900 0.06560 0.02150 0.00675 0.00675 0.00497 0.00377 0.00298 0.00298 0.00119

Alpha diversity

Overall distribution:

apply(exps[,div.cols], 2, table)
##           Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased     46     685   443     199              59      415
## increased     37     540   309     161              34      331
## unchanged    171    1903   835     678             200      954

Correspondence of Shannon diversity and Richness:

table(exps$Shannon, exps$Richness)
##            
##             decreased increased unchanged
##   decreased       219        13        48
##   increased         9       162        51
##   unchanged        93        88       775

Conditions with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Condition")
##                                                           increased decreased
## Oxalate measurement                                               0        17
## COVID-19                                                          9        24
## Obesity                                                           3        16
## HIV infection                                                     1        12
## Smoking behaviour measurement                                     2        13
## Clostridium difficile infection                                  10         0
## Dry eye syndrome                                                  1        11
## Systemic inflammatory response syndrome                           5        15
## Treatment outcome measurement                                    11        21
## Cesarean section                                                  9         0
## Chronic constipation                                              9         0
## Human papilloma virus infection                                  10         1
## Gastric cancer                                                    6        14
## Polycystic ovary syndrome                                         0         8
## Ulcerative colitis                                                1         9
## Age                                                               5        12
## Aging                                                             0         7
## Alzheimer's disease                                               2         9
## Balanced reciprocal translocation                                 7         0
## Atopic eczema                                                     5        11
## Autism spectrum disorder                                          7         1
## Constipation                                                      6         0
## Epilepsy                                                          6         0
## Lung cancer                                                       2         8
## Parkinson's disease                                              20        14
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Urinary tract infection                                           0         6
## Cervical cancer                                                   5         0
## Diet                                                             14        19
## Helminthiasis                                                     5         0
## Population                                                        2         7
## Response to transplant                                            8        13
## Spontaneous preterm birth                                        12         7
## Acute lymphoblastic leukemia                                      0         4
## Acute pancreatitis                                                0         4
## Colitis                                                           4         0
## Colorectal cancer                                                10        14
## Ethnic group                                                      3         7
## Food allergy                                                      6         2
## Human immunodeficiency virus                                      0         4
## Hypertension                                                      7         3
## Periodontitis                                                     5         1
## Pregnancy                                                         4         0
## Response to antibiotic                                            0         4
## Alcohol drinking                                                  3         0
## Atopic asthma                                                     4         1
## Birth measurement                                                 3         0
## Crohn's disease                                                   2         5
## Delivery method                                                   1         4
## Extraction protocol                                              23        26
## Irritable bowel syndrome                                          3         6
## Male homosexuality                                                3         0
## Oral lichen planus                                                3         0
## SARS-CoV-2-related disease                                        0         3
## Schizophrenia                                                     1         4
## Type II diabetes mellitus                                         2         5
## Age at assessment                                                 3         1
## Antimicrobial agent                                               8        10
## Breed                                                             0         2
## Cervical glandular intraepithelial neoplasia                      2         0
## Chronic kidney disease                                            2         4
## Cognitive impairment                                              1         3
## Depressive disorder                                               0         2
## Diarrhea                                                          6         4
## Eczema                                                            0         2
## Endometrial cancer                                                4         2
## Esophageal adenocarcinoma                                         0         2
## Iron biomarker measurement                                        1         3
## Milk allergic reaction                                            2         0
## Papillary thyroid carcinoma                                       2         0
## Phenylketonuria                                                   1         3
## Response to anti-tuberculosis drug                                8        10
## Response to antiviral drug                                        2         4
## Response to immunochemotherapy                                    3         1
## Sampling site                                                     3         1
## Smoking behavior                                                 10         8
## Squamous cell carcinoma                                           2         0
## Streptococcus pneumoniae                                          0         2
## Stroke                                                            2         0
## Traditional Chinese medicine type                                 2         4
## Acute respiratory failure                                         6         5
## Air pollution                                                     7         6
## Anxiety disorder                                                  0         1
## Breast cancer                                                     3         4
## Breastfeeding duration                                            2         3
## Chlamydia trachomatis                                             1         2
## Chronic fatigue syndrome                                          0         1
## Chronic hepatitis B virus infection                               0         1
## Chronic obstructive pulmonary disease                             3         2
## Diabetes mellitus                                                 0         1
## Endometriosis                                                     2         3
## Esophageal cancer                                                 1         2
## Gestational diabetes                                              1         0
## Hepatocellular carcinoma                                          0         1
## Hypertrophy                                                       1         0
## Multiple sclerosis                                                0         1
## Oral cavity carcinoma                                             0         1
## Oral squamous cell carcinoma                                      3         2
## Pancreatic carcinoma                                              0         1
## Psoriasis                                                         1         0
## Respiratory Syncytial Virus Infection                             0         1
## Response to diet                                                  3         2
## Response to vaccine                                               1         0
## Rheumatoid arthritis                                              5         4
## Sample treatment protocol                                         1         0
## Sampling time                                                     4         3
## Social interaction measurement                                    2         1
## Socioeconomic status                                              3         4
## Treatment                                                         1         0
## Type I diabetes mellitus                                          0         1
## Vesicle membrane                                                  3         2
## Vitiligo                                                          0         1
## Abnormal stool composition                                        0         0
## Acute myeloid leukemia                                            1         1
## Arthritis                                                         0         0
## Asthma                                                            1         1
## Biological sex                                                    1         1
## Bipolar disorder                                                  0         0
## Celiac disease                                                    0         0
## Clinical treatment                                                1         1
## Colorectal adenoma                                                2         2
## Contraception                                                     0         0
## COVID-19 symptoms measurement                                     0         0
## Diarrhea, Infantile                                               0         0
## Disease progression measurement                                   0         0
## Gastric adenocarcinoma                                            0         0
## Head and neck squamous cell carcinoma                             0         0
## Health study participation                                        2         2
## HIV mother to child transmission                                  0         0
## Lactose intolerance                                               0         0
## Lifestyle measurement                                             2         2
## Lung transplantation                                              2         2
## Obsessive-compulsive disorder                                     0         0
## Ovarian cancer                                                    3         3
## Phenotype                                                         2         2
## Psoriasis vulgaris                                                0         0
## Response to ketogenic diet                                        2         2
## Sample collection protocol                                        0         0
## SARS coronavirus                                                  0         0
## Simian immunodeficiency virus infection                           0         0
## Smoking cessation                                                 0         0
## Transplant outcome measurement                                    0         0
## Viral load                                                        0         0
## Waist circumference                                               0         0
##                                                           unchanged
## Oxalate measurement                                               0
## COVID-19                                                         42
## Obesity                                                          59
## HIV infection                                                    26
## Smoking behaviour measurement                                     0
## Clostridium difficile infection                                   1
## Dry eye syndrome                                                 11
## Systemic inflammatory response syndrome                           4
## Treatment outcome measurement                                    64
## Cesarean section                                                 16
## Chronic constipation                                             12
## Human papilloma virus infection                                  28
## Gastric cancer                                                   26
## Polycystic ovary syndrome                                        10
## Ulcerative colitis                                                3
## Age                                                               9
## Aging                                                             0
## Alzheimer's disease                                              26
## Balanced reciprocal translocation                                 0
## Atopic eczema                                                    72
## Autism spectrum disorder                                          8
## Constipation                                                      2
## Epilepsy                                                          5
## Lung cancer                                                       7
## Parkinson's disease                                              81
## Response to allogeneic hematopoietic stem cell transplant         0
## Urinary tract infection                                           8
## Cervical cancer                                                   5
## Diet                                                             63
## Helminthiasis                                                     8
## Population                                                       25
## Response to transplant                                           25
## Spontaneous preterm birth                                         5
## Acute lymphoblastic leukemia                                      5
## Acute pancreatitis                                                2
## Colitis                                                           1
## Colorectal cancer                                                48
## Ethnic group                                                      6
## Food allergy                                                     19
## Human immunodeficiency virus                                      6
## Hypertension                                                      6
## Periodontitis                                                    10
## Pregnancy                                                         2
## Response to antibiotic                                            8
## Alcohol drinking                                                  2
## Atopic asthma                                                     7
## Birth measurement                                                 4
## Crohn's disease                                                   5
## Delivery method                                                   2
## Extraction protocol                                              20
## Irritable bowel syndrome                                         20
## Male homosexuality                                                6
## Oral lichen planus                                                4
## SARS-CoV-2-related disease                                        4
## Schizophrenia                                                    14
## Type II diabetes mellitus                                        24
## Age at assessment                                                 1
## Antimicrobial agent                                              25
## Breed                                                             7
## Cervical glandular intraepithelial neoplasia                      9
## Chronic kidney disease                                            5
## Cognitive impairment                                              8
## Depressive disorder                                               4
## Diarrhea                                                          8
## Eczema                                                           10
## Endometrial cancer                                                3
## Esophageal adenocarcinoma                                         4
## Iron biomarker measurement                                        2
## Milk allergic reaction                                            5
## Papillary thyroid carcinoma                                      10
## Phenylketonuria                                                   4
## Response to anti-tuberculosis drug                               13
## Response to antiviral drug                                        5
## Response to immunochemotherapy                                    3
## Sampling site                                                     7
## Smoking behavior                                                 20
## Squamous cell carcinoma                                           4
## Streptococcus pneumoniae                                          4
## Stroke                                                           16
## Traditional Chinese medicine type                                 6
## Acute respiratory failure                                         0
## Air pollution                                                     3
## Anxiety disorder                                                  7
## Breast cancer                                                    16
## Breastfeeding duration                                            9
## Chlamydia trachomatis                                             2
## Chronic fatigue syndrome                                          4
## Chronic hepatitis B virus infection                               5
## Chronic obstructive pulmonary disease                             2
## Diabetes mellitus                                                 5
## Endometriosis                                                    14
## Esophageal cancer                                                 2
## Gestational diabetes                                             35
## Hepatocellular carcinoma                                          6
## Hypertrophy                                                       4
## Multiple sclerosis                                               17
## Oral cavity carcinoma                                             7
## Oral squamous cell carcinoma                                      3
## Pancreatic carcinoma                                              4
## Psoriasis                                                        12
## Respiratory Syncytial Virus Infection                             5
## Response to diet                                                 27
## Response to vaccine                                               5
## Rheumatoid arthritis                                              9
## Sample treatment protocol                                         4
## Sampling time                                                     5
## Social interaction measurement                                    6
## Socioeconomic status                                              8
## Treatment                                                         7
## Type I diabetes mellitus                                          6
## Vesicle membrane                                                  1
## Vitiligo                                                          4
## Abnormal stool composition                                        6
## Acute myeloid leukemia                                            4
## Arthritis                                                         6
## Asthma                                                           14
## Biological sex                                                    6
## Bipolar disorder                                                  5
## Celiac disease                                                    6
## Clinical treatment                                                5
## Colorectal adenoma                                               10
## Contraception                                                     5
## COVID-19 symptoms measurement                                     5
## Diarrhea, Infantile                                              27
## Disease progression measurement                                   5
## Gastric adenocarcinoma                                            8
## Head and neck squamous cell carcinoma                             8
## Health study participation                                       35
## HIV mother to child transmission                                  8
## Lactose intolerance                                               5
## Lifestyle measurement                                             8
## Lung transplantation                                              2
## Obsessive-compulsive disorder                                     5
## Ovarian cancer                                                   27
## Phenotype                                                        19
## Psoriasis vulgaris                                               14
## Response to ketogenic diet                                        3
## Sample collection protocol                                        9
## SARS coronavirus                                                  6
## Simian immunodeficiency virus infection                           5
## Smoking cessation                                                 6
## Transplant outcome measurement                                    5
## Viral load                                                        6
## Waist circumference                                               5
tabDiv(exps, "Shannon", "Condition", perc = TRUE)
##                                                           increased decreased
## Oxalate measurement                                           0.000     1.000
## COVID-19                                                      0.120     0.320
## Obesity                                                       0.038     0.210
## HIV infection                                                 0.026     0.310
## Smoking behaviour measurement                                 0.130     0.870
## Clostridium difficile infection                               0.910     0.000
## Dry eye syndrome                                              0.043     0.480
## Systemic inflammatory response syndrome                       0.210     0.620
## Treatment outcome measurement                                 0.110     0.220
## Cesarean section                                              0.360     0.000
## Chronic constipation                                          0.430     0.000
## Human papilloma virus infection                               0.260     0.026
## Gastric cancer                                                0.130     0.300
## Polycystic ovary syndrome                                     0.000     0.440
## Ulcerative colitis                                            0.077     0.690
## Age                                                           0.190     0.460
## Aging                                                         0.000     1.000
## Alzheimer's disease                                           0.054     0.240
## Balanced reciprocal translocation                             1.000     0.000
## Atopic eczema                                                 0.057     0.120
## Autism spectrum disorder                                      0.440     0.062
## Constipation                                                  0.750     0.000
## Epilepsy                                                      0.550     0.000
## Lung cancer                                                   0.120     0.470
## Parkinson's disease                                           0.170     0.120
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Urinary tract infection                                       0.000     0.430
## Cervical cancer                                               0.500     0.000
## Diet                                                          0.150     0.200
## Helminthiasis                                                 0.380     0.000
## Population                                                    0.059     0.210
## Response to transplant                                        0.170     0.280
## Spontaneous preterm birth                                     0.500     0.290
## Acute lymphoblastic leukemia                                  0.000     0.440
## Acute pancreatitis                                            0.000     0.670
## Colitis                                                       0.800     0.000
## Colorectal cancer                                             0.140     0.190
## Ethnic group                                                  0.190     0.440
## Food allergy                                                  0.220     0.074
## Human immunodeficiency virus                                  0.000     0.400
## Hypertension                                                  0.440     0.190
## Periodontitis                                                 0.310     0.062
## Pregnancy                                                     0.670     0.000
## Response to antibiotic                                        0.000     0.330
## Alcohol drinking                                              0.600     0.000
## Atopic asthma                                                 0.330     0.083
## Birth measurement                                             0.430     0.000
## Crohn's disease                                               0.170     0.420
## Delivery method                                               0.140     0.570
## Extraction protocol                                           0.330     0.380
## Irritable bowel syndrome                                      0.100     0.210
## Male homosexuality                                            0.330     0.000
## Oral lichen planus                                            0.430     0.000
## SARS-CoV-2-related disease                                    0.000     0.430
## Schizophrenia                                                 0.053     0.210
## Type II diabetes mellitus                                     0.065     0.160
## Age at assessment                                             0.600     0.200
## Antimicrobial agent                                           0.190     0.230
## Breed                                                         0.000     0.220
## Cervical glandular intraepithelial neoplasia                  0.180     0.000
## Chronic kidney disease                                        0.180     0.360
## Cognitive impairment                                          0.083     0.250
## Depressive disorder                                           0.000     0.330
## Diarrhea                                                      0.330     0.220
## Eczema                                                        0.000     0.170
## Endometrial cancer                                            0.440     0.220
## Esophageal adenocarcinoma                                     0.000     0.330
## Iron biomarker measurement                                    0.170     0.500
## Milk allergic reaction                                        0.290     0.000
## Papillary thyroid carcinoma                                   0.170     0.000
## Phenylketonuria                                               0.120     0.380
## Response to anti-tuberculosis drug                            0.260     0.320
## Response to antiviral drug                                    0.180     0.360
## Response to immunochemotherapy                                0.430     0.140
## Sampling site                                                 0.270     0.091
## Smoking behavior                                              0.260     0.210
## Squamous cell carcinoma                                       0.330     0.000
## Streptococcus pneumoniae                                      0.000     0.330
## Stroke                                                        0.110     0.000
## Traditional Chinese medicine type                             0.170     0.330
## Acute respiratory failure                                     0.550     0.450
## Air pollution                                                 0.440     0.380
## Anxiety disorder                                              0.000     0.120
## Breast cancer                                                 0.130     0.170
## Breastfeeding duration                                        0.140     0.210
## Chlamydia trachomatis                                         0.200     0.400
## Chronic fatigue syndrome                                      0.000     0.200
## Chronic hepatitis B virus infection                           0.000     0.170
## Chronic obstructive pulmonary disease                         0.430     0.290
## Diabetes mellitus                                             0.000     0.170
## Endometriosis                                                 0.110     0.160
## Esophageal cancer                                             0.200     0.400
## Gestational diabetes                                          0.028     0.000
## Hepatocellular carcinoma                                      0.000     0.140
## Hypertrophy                                                   0.200     0.000
## Multiple sclerosis                                            0.000     0.056
## Oral cavity carcinoma                                         0.000     0.120
## Oral squamous cell carcinoma                                  0.380     0.250
## Pancreatic carcinoma                                          0.000     0.200
## Psoriasis                                                     0.077     0.000
## Respiratory Syncytial Virus Infection                         0.000     0.170
## Response to diet                                              0.094     0.062
## Response to vaccine                                           0.170     0.000
## Rheumatoid arthritis                                          0.280     0.220
## Sample treatment protocol                                     0.200     0.000
## Sampling time                                                 0.330     0.250
## Social interaction measurement                                0.220     0.110
## Socioeconomic status                                          0.200     0.270
## Treatment                                                     0.120     0.000
## Type I diabetes mellitus                                      0.000     0.140
## Vesicle membrane                                              0.500     0.330
## Vitiligo                                                      0.000     0.200
## Abnormal stool composition                                    0.000     0.000
## Acute myeloid leukemia                                        0.170     0.170
## Arthritis                                                     0.000     0.000
## Asthma                                                        0.062     0.062
## Biological sex                                                0.120     0.120
## Bipolar disorder                                              0.000     0.000
## Celiac disease                                                0.000     0.000
## Clinical treatment                                            0.140     0.140
## Colorectal adenoma                                            0.140     0.140
## Contraception                                                 0.000     0.000
## COVID-19 symptoms measurement                                 0.000     0.000
## Diarrhea, Infantile                                           0.000     0.000
## Disease progression measurement                               0.000     0.000
## Gastric adenocarcinoma                                        0.000     0.000
## Head and neck squamous cell carcinoma                         0.000     0.000
## Health study participation                                    0.051     0.051
## HIV mother to child transmission                              0.000     0.000
## Lactose intolerance                                           0.000     0.000
## Lifestyle measurement                                         0.170     0.170
## Lung transplantation                                          0.330     0.330
## Obsessive-compulsive disorder                                 0.000     0.000
## Ovarian cancer                                                0.091     0.091
## Phenotype                                                     0.087     0.087
## Psoriasis vulgaris                                            0.000     0.000
## Response to ketogenic diet                                    0.290     0.290
## Sample collection protocol                                    0.000     0.000
## SARS coronavirus                                              0.000     0.000
## Simian immunodeficiency virus infection                       0.000     0.000
## Smoking cessation                                             0.000     0.000
## Transplant outcome measurement                                0.000     0.000
## Viral load                                                    0.000     0.000
## Waist circumference                                           0.000     0.000
##                                                           unchanged
## Oxalate measurement                                           0.000
## COVID-19                                                      0.560
## Obesity                                                       0.760
## HIV infection                                                 0.670
## Smoking behaviour measurement                                 0.000
## Clostridium difficile infection                               0.091
## Dry eye syndrome                                              0.480
## Systemic inflammatory response syndrome                       0.170
## Treatment outcome measurement                                 0.670
## Cesarean section                                              0.640
## Chronic constipation                                          0.570
## Human papilloma virus infection                               0.720
## Gastric cancer                                                0.570
## Polycystic ovary syndrome                                     0.560
## Ulcerative colitis                                            0.230
## Age                                                           0.350
## Aging                                                         0.000
## Alzheimer's disease                                           0.700
## Balanced reciprocal translocation                             0.000
## Atopic eczema                                                 0.820
## Autism spectrum disorder                                      0.500
## Constipation                                                  0.250
## Epilepsy                                                      0.450
## Lung cancer                                                   0.410
## Parkinson's disease                                           0.700
## Response to allogeneic hematopoietic stem cell transplant     0.000
## Urinary tract infection                                       0.570
## Cervical cancer                                               0.500
## Diet                                                          0.660
## Helminthiasis                                                 0.620
## Population                                                    0.740
## Response to transplant                                        0.540
## Spontaneous preterm birth                                     0.210
## Acute lymphoblastic leukemia                                  0.560
## Acute pancreatitis                                            0.330
## Colitis                                                       0.200
## Colorectal cancer                                             0.670
## Ethnic group                                                  0.380
## Food allergy                                                  0.700
## Human immunodeficiency virus                                  0.600
## Hypertension                                                  0.380
## Periodontitis                                                 0.620
## Pregnancy                                                     0.330
## Response to antibiotic                                        0.670
## Alcohol drinking                                              0.400
## Atopic asthma                                                 0.580
## Birth measurement                                             0.570
## Crohn's disease                                               0.420
## Delivery method                                               0.290
## Extraction protocol                                           0.290
## Irritable bowel syndrome                                      0.690
## Male homosexuality                                            0.670
## Oral lichen planus                                            0.570
## SARS-CoV-2-related disease                                    0.570
## Schizophrenia                                                 0.740
## Type II diabetes mellitus                                     0.770
## Age at assessment                                             0.200
## Antimicrobial agent                                           0.580
## Breed                                                         0.780
## Cervical glandular intraepithelial neoplasia                  0.820
## Chronic kidney disease                                        0.450
## Cognitive impairment                                          0.670
## Depressive disorder                                           0.670
## Diarrhea                                                      0.440
## Eczema                                                        0.830
## Endometrial cancer                                            0.330
## Esophageal adenocarcinoma                                     0.670
## Iron biomarker measurement                                    0.330
## Milk allergic reaction                                        0.710
## Papillary thyroid carcinoma                                   0.830
## Phenylketonuria                                               0.500
## Response to anti-tuberculosis drug                            0.420
## Response to antiviral drug                                    0.450
## Response to immunochemotherapy                                0.430
## Sampling site                                                 0.640
## Smoking behavior                                              0.530
## Squamous cell carcinoma                                       0.670
## Streptococcus pneumoniae                                      0.670
## Stroke                                                        0.890
## Traditional Chinese medicine type                             0.500
## Acute respiratory failure                                     0.000
## Air pollution                                                 0.190
## Anxiety disorder                                              0.880
## Breast cancer                                                 0.700
## Breastfeeding duration                                        0.640
## Chlamydia trachomatis                                         0.400
## Chronic fatigue syndrome                                      0.800
## Chronic hepatitis B virus infection                           0.830
## Chronic obstructive pulmonary disease                         0.290
## Diabetes mellitus                                             0.830
## Endometriosis                                                 0.740
## Esophageal cancer                                             0.400
## Gestational diabetes                                          0.970
## Hepatocellular carcinoma                                      0.860
## Hypertrophy                                                   0.800
## Multiple sclerosis                                            0.940
## Oral cavity carcinoma                                         0.880
## Oral squamous cell carcinoma                                  0.380
## Pancreatic carcinoma                                          0.800
## Psoriasis                                                     0.920
## Respiratory Syncytial Virus Infection                         0.830
## Response to diet                                              0.840
## Response to vaccine                                           0.830
## Rheumatoid arthritis                                          0.500
## Sample treatment protocol                                     0.800
## Sampling time                                                 0.420
## Social interaction measurement                                0.670
## Socioeconomic status                                          0.530
## Treatment                                                     0.880
## Type I diabetes mellitus                                      0.860
## Vesicle membrane                                              0.170
## Vitiligo                                                      0.800
## Abnormal stool composition                                    1.000
## Acute myeloid leukemia                                        0.670
## Arthritis                                                     1.000
## Asthma                                                        0.880
## Biological sex                                                0.750
## Bipolar disorder                                              1.000
## Celiac disease                                                1.000
## Clinical treatment                                            0.710
## Colorectal adenoma                                            0.710
## Contraception                                                 1.000
## COVID-19 symptoms measurement                                 1.000
## Diarrhea, Infantile                                           1.000
## Disease progression measurement                               1.000
## Gastric adenocarcinoma                                        1.000
## Head and neck squamous cell carcinoma                         1.000
## Health study participation                                    0.900
## HIV mother to child transmission                              1.000
## Lactose intolerance                                           1.000
## Lifestyle measurement                                         0.670
## Lung transplantation                                          0.330
## Obsessive-compulsive disorder                                 1.000
## Ovarian cancer                                                0.820
## Phenotype                                                     0.830
## Psoriasis vulgaris                                            1.000
## Response to ketogenic diet                                    0.430
## Sample collection protocol                                    1.000
## SARS coronavirus                                              1.000
## Simian immunodeficiency virus infection                       1.000
## Smoking cessation                                             1.000
## Transplant outcome measurement                                1.000
## Viral load                                                    1.000
## Waist circumference                                           1.000
tabDiv(exps, "Richness", "Condition")
##                                                           increased decreased
## Treatment outcome measurement                                     5        21
## Diet                                                              4        19
## Helminthiasis                                                    13         0
## HIV infection                                                     3        15
## COVID-19                                                          9        20
## Chronic constipation                                              8         0
## Parkinson's disease                                              18        26
## Phenotype                                                         9         1
## Balanced reciprocal translocation                                 7         0
## Diarrhea                                                          8         1
## Head and neck squamous cell carcinoma                             0         7
## Polycystic ovary syndrome                                         0         7
## Increased intestinal transit time                                 6         0
## Response to allogeneic hematopoietic stem cell transplant         0         6
## Alcohol drinking                                                  5         0
## Antimicrobial agent                                               2         7
## Human immunodeficiency virus                                      1         6
## Human papilloma virus infection                                   7         2
## Acute lymphoblastic leukemia                                      5         1
## Age                                                               1         5
## Air pollution                                                     9         5
## Cervical glandular intraepithelial neoplasia                      4         0
## Dry eye syndrome                                                  0         4
## Endometriosis                                                     4         0
## Epilepsy                                                          4         0
## Periodontitis                                                     5         1
## Schizophrenia                                                     1         5
## Vesicle membrane                                                  5         1
## Atopic asthma                                                     4         1
## Delivery method                                                   4         1
## Food allergy                                                      0         3
## Gastric cancer                                                    5         8
## Gestational diabetes                                              3         6
## Hypertrophy                                                       3         0
## Iron biomarker measurement                                        1         4
## Irritable bowel syndrome                                          2         5
## Oral squamous cell carcinoma                                      1         4
## Asthma                                                            2         0
## Autism spectrum disorder                                          4         6
## Breast cancer                                                     2         0
## Colorectal cancer                                                 8        10
## Esophageal adenocarcinoma                                         0         2
## Hypertension                                                      1         3
## Phenylketonuria                                                   1         3
## Smoking behavior                                                  6         8
## Smoking status measurement                                        2         0
## Streptococcus pneumoniae                                          0         2
## Traditional Chinese medicine type                                 1         3
## Transplant outcome measurement                                    0         2
## Treatment                                                         1         3
## Ulcerative colitis                                                1         3
## Alzheimer's disease                                               6         5
## Atopic eczema                                                     2         1
## Breastfeeding duration                                            1         0
## Cesarean section                                                  3         2
## Colorectal adenoma                                                1         2
## Constipation                                                      4         5
## Crohn's disease                                                   2         3
## Endometrial cancer                                                1         2
## Health study participation                                        1         0
## Inflammatory bowel disease                                        2         3
## Lung cancer                                                       0         1
## Obesity                                                           8         7
## Obsessive-compulsive disorder                                     0         1
## Ovarian cancer                                                    1         0
## Psoriasis                                                         0         1
## Response to transplant                                            6         7
## Rheumatoid arthritis                                              3         4
## Sampling site                                                     1         2
## Socioeconomic status                                              2         1
## Transport                                                         1         2
## Type II diabetes mellitus                                         2         3
## Urinary tract infection                                           0         1
## Abnormal stool composition                                        0         0
## Chlamydia trachomatis                                             1         1
## Diarrhea, Infantile                                               0         0
## Ethnic group                                                      2         2
## HIV mother to child transmission                                  0         0
## Male homosexuality                                                0         0
## Multiple sclerosis                                                0         0
## Papillary thyroid carcinoma                                       0         0
## Physical activity                                                 2         2
## Psoriasis vulgaris                                                0         0
## Response to diet                                                  3         3
## Sample collection protocol                                        0         0
## Smoking cessation                                                 0         0
## Stroke                                                            2         2
## Viral load                                                        0         0
##                                                           unchanged
## Treatment outcome measurement                                    45
## Diet                                                             30
## Helminthiasis                                                     0
## HIV infection                                                    10
## COVID-19                                                         24
## Chronic constipation                                              6
## Parkinson's disease                                              28
## Phenotype                                                        11
## Balanced reciprocal translocation                                 0
## Diarrhea                                                          4
## Head and neck squamous cell carcinoma                             4
## Polycystic ovary syndrome                                         3
## Increased intestinal transit time                                 0
## Response to allogeneic hematopoietic stem cell transplant         0
## Alcohol drinking                                                  0
## Antimicrobial agent                                              10
## Human immunodeficiency virus                                      2
## Human papilloma virus infection                                  12
## Acute lymphoblastic leukemia                                      0
## Age                                                               1
## Air pollution                                                     6
## Cervical glandular intraepithelial neoplasia                      2
## Dry eye syndrome                                                  3
## Endometriosis                                                     8
## Epilepsy                                                          1
## Periodontitis                                                     6
## Schizophrenia                                                     8
## Vesicle membrane                                                  0
## Atopic asthma                                                     7
## Delivery method                                                   1
## Food allergy                                                      9
## Gastric cancer                                                   14
## Gestational diabetes                                             25
## Hypertrophy                                                       2
## Iron biomarker measurement                                        1
## Irritable bowel syndrome                                         13
## Oral squamous cell carcinoma                                      0
## Asthma                                                           10
## Autism spectrum disorder                                          0
## Breast cancer                                                     7
## Colorectal cancer                                                21
## Esophageal adenocarcinoma                                         4
## Hypertension                                                      6
## Phenylketonuria                                                   4
## Smoking behavior                                                  8
## Smoking status measurement                                        3
## Streptococcus pneumoniae                                          3
## Traditional Chinese medicine type                                 4
## Transplant outcome measurement                                    3
## Treatment                                                         6
## Ulcerative colitis                                                1
## Alzheimer's disease                                              24
## Atopic eczema                                                     6
## Breastfeeding duration                                            9
## Cesarean section                                                 10
## Colorectal adenoma                                               11
## Constipation                                                      8
## Crohn's disease                                                   2
## Endometrial cancer                                                3
## Health study participation                                       28
## Inflammatory bowel disease                                        0
## Lung cancer                                                      10
## Obesity                                                          19
## Obsessive-compulsive disorder                                     4
## Ovarian cancer                                                   30
## Psoriasis                                                         8
## Response to transplant                                           11
## Rheumatoid arthritis                                              1
## Sampling site                                                     2
## Socioeconomic status                                              2
## Transport                                                         3
## Type II diabetes mellitus                                        10
## Urinary tract infection                                           6
## Abnormal stool composition                                        6
## Chlamydia trachomatis                                             3
## Diarrhea, Infantile                                              27
## Ethnic group                                                      1
## HIV mother to child transmission                                  8
## Male homosexuality                                                9
## Multiple sclerosis                                               17
## Papillary thyroid carcinoma                                      12
## Physical activity                                                 1
## Psoriasis vulgaris                                               14
## Response to diet                                                  4
## Sample collection protocol                                        9
## Smoking cessation                                                 6
## Stroke                                                           17
## Viral load                                                        5
tabDiv(exps, "Richness", "Condition", perc = TRUE)
##                                                           increased decreased
## Treatment outcome measurement                                 0.070     0.300
## Diet                                                          0.075     0.360
## Helminthiasis                                                 1.000     0.000
## HIV infection                                                 0.110     0.540
## COVID-19                                                      0.170     0.380
## Chronic constipation                                          0.570     0.000
## Parkinson's disease                                           0.250     0.360
## Phenotype                                                     0.430     0.048
## Balanced reciprocal translocation                             1.000     0.000
## Diarrhea                                                      0.620     0.077
## Head and neck squamous cell carcinoma                         0.000     0.640
## Polycystic ovary syndrome                                     0.000     0.700
## Increased intestinal transit time                             1.000     0.000
## Response to allogeneic hematopoietic stem cell transplant     0.000     1.000
## Alcohol drinking                                              1.000     0.000
## Antimicrobial agent                                           0.110     0.370
## Human immunodeficiency virus                                  0.110     0.670
## Human papilloma virus infection                               0.330     0.095
## Acute lymphoblastic leukemia                                  0.830     0.170
## Age                                                           0.140     0.710
## Air pollution                                                 0.450     0.250
## Cervical glandular intraepithelial neoplasia                  0.670     0.000
## Dry eye syndrome                                              0.000     0.570
## Endometriosis                                                 0.330     0.000
## Epilepsy                                                      0.800     0.000
## Periodontitis                                                 0.420     0.083
## Schizophrenia                                                 0.071     0.360
## Vesicle membrane                                              0.830     0.170
## Atopic asthma                                                 0.330     0.083
## Delivery method                                               0.670     0.170
## Food allergy                                                  0.000     0.250
## Gastric cancer                                                0.190     0.300
## Gestational diabetes                                          0.088     0.180
## Hypertrophy                                                   0.600     0.000
## Iron biomarker measurement                                    0.170     0.670
## Irritable bowel syndrome                                      0.100     0.250
## Oral squamous cell carcinoma                                  0.200     0.800
## Asthma                                                        0.170     0.000
## Autism spectrum disorder                                      0.400     0.600
## Breast cancer                                                 0.220     0.000
## Colorectal cancer                                             0.210     0.260
## Esophageal adenocarcinoma                                     0.000     0.330
## Hypertension                                                  0.100     0.300
## Phenylketonuria                                               0.120     0.380
## Smoking behavior                                              0.270     0.360
## Smoking status measurement                                    0.400     0.000
## Streptococcus pneumoniae                                      0.000     0.400
## Traditional Chinese medicine type                             0.120     0.380
## Transplant outcome measurement                                0.000     0.400
## Treatment                                                     0.100     0.300
## Ulcerative colitis                                            0.200     0.600
## Alzheimer's disease                                           0.170     0.140
## Atopic eczema                                                 0.220     0.110
## Breastfeeding duration                                        0.100     0.000
## Cesarean section                                              0.200     0.130
## Colorectal adenoma                                            0.071     0.140
## Constipation                                                  0.240     0.290
## Crohn's disease                                               0.290     0.430
## Endometrial cancer                                            0.170     0.330
## Health study participation                                    0.034     0.000
## Inflammatory bowel disease                                    0.400     0.600
## Lung cancer                                                   0.000     0.091
## Obesity                                                       0.240     0.210
## Obsessive-compulsive disorder                                 0.000     0.200
## Ovarian cancer                                                0.032     0.000
## Psoriasis                                                     0.000     0.110
## Response to transplant                                        0.250     0.290
## Rheumatoid arthritis                                          0.380     0.500
## Sampling site                                                 0.200     0.400
## Socioeconomic status                                          0.400     0.200
## Transport                                                     0.170     0.330
## Type II diabetes mellitus                                     0.130     0.200
## Urinary tract infection                                       0.000     0.140
## Abnormal stool composition                                    0.000     0.000
## Chlamydia trachomatis                                         0.200     0.200
## Diarrhea, Infantile                                           0.000     0.000
## Ethnic group                                                  0.400     0.400
## HIV mother to child transmission                              0.000     0.000
## Male homosexuality                                            0.000     0.000
## Multiple sclerosis                                            0.000     0.000
## Papillary thyroid carcinoma                                   0.000     0.000
## Physical activity                                             0.400     0.400
## Psoriasis vulgaris                                            0.000     0.000
## Response to diet                                              0.300     0.300
## Sample collection protocol                                    0.000     0.000
## Smoking cessation                                             0.000     0.000
## Stroke                                                        0.095     0.095
## Viral load                                                    0.000     0.000
##                                                           unchanged
## Treatment outcome measurement                                  0.63
## Diet                                                           0.57
## Helminthiasis                                                  0.00
## HIV infection                                                  0.36
## COVID-19                                                       0.45
## Chronic constipation                                           0.43
## Parkinson's disease                                            0.39
## Phenotype                                                      0.52
## Balanced reciprocal translocation                              0.00
## Diarrhea                                                       0.31
## Head and neck squamous cell carcinoma                          0.36
## Polycystic ovary syndrome                                      0.30
## Increased intestinal transit time                              0.00
## Response to allogeneic hematopoietic stem cell transplant      0.00
## Alcohol drinking                                               0.00
## Antimicrobial agent                                            0.53
## Human immunodeficiency virus                                   0.22
## Human papilloma virus infection                                0.57
## Acute lymphoblastic leukemia                                   0.00
## Age                                                            0.14
## Air pollution                                                  0.30
## Cervical glandular intraepithelial neoplasia                   0.33
## Dry eye syndrome                                               0.43
## Endometriosis                                                  0.67
## Epilepsy                                                       0.20
## Periodontitis                                                  0.50
## Schizophrenia                                                  0.57
## Vesicle membrane                                               0.00
## Atopic asthma                                                  0.58
## Delivery method                                                0.17
## Food allergy                                                   0.75
## Gastric cancer                                                 0.52
## Gestational diabetes                                           0.74
## Hypertrophy                                                    0.40
## Iron biomarker measurement                                     0.17
## Irritable bowel syndrome                                       0.65
## Oral squamous cell carcinoma                                   0.00
## Asthma                                                         0.83
## Autism spectrum disorder                                       0.00
## Breast cancer                                                  0.78
## Colorectal cancer                                              0.54
## Esophageal adenocarcinoma                                      0.67
## Hypertension                                                   0.60
## Phenylketonuria                                                0.50
## Smoking behavior                                               0.36
## Smoking status measurement                                     0.60
## Streptococcus pneumoniae                                       0.60
## Traditional Chinese medicine type                              0.50
## Transplant outcome measurement                                 0.60
## Treatment                                                      0.60
## Ulcerative colitis                                             0.20
## Alzheimer's disease                                            0.69
## Atopic eczema                                                  0.67
## Breastfeeding duration                                         0.90
## Cesarean section                                               0.67
## Colorectal adenoma                                             0.79
## Constipation                                                   0.47
## Crohn's disease                                                0.29
## Endometrial cancer                                             0.50
## Health study participation                                     0.97
## Inflammatory bowel disease                                     0.00
## Lung cancer                                                    0.91
## Obesity                                                        0.56
## Obsessive-compulsive disorder                                  0.80
## Ovarian cancer                                                 0.97
## Psoriasis                                                      0.89
## Response to transplant                                         0.46
## Rheumatoid arthritis                                           0.12
## Sampling site                                                  0.40
## Socioeconomic status                                           0.40
## Transport                                                      0.50
## Type II diabetes mellitus                                      0.67
## Urinary tract infection                                        0.86
## Abnormal stool composition                                     1.00
## Chlamydia trachomatis                                          0.60
## Diarrhea, Infantile                                            1.00
## Ethnic group                                                   0.20
## HIV mother to child transmission                               1.00
## Male homosexuality                                             1.00
## Multiple sclerosis                                             1.00
## Papillary thyroid carcinoma                                    1.00
## Physical activity                                              0.20
## Psoriasis vulgaris                                             1.00
## Response to diet                                               0.40
## Sample collection protocol                                     1.00
## Smoking cessation                                              1.00
## Stroke                                                         0.81
## Viral load                                                     1.00

Body sites with consistently increased or decreased alpha diversity:

tabDiv(exps, "Shannon", "Body site")
##                                increased decreased unchanged
## Feces                                295       434      1153
## Vagina                                16         6        27
## Posterior fornix of vagina             9         0         7
## Skin of body                           7        15         8
## Uterine cervix                         9         1        20
## Uterine cervix,Vaginal fluid           9         1         0
## Buccal epithelium                      0         7         0
## Saliva                                36        43       122
## Subgingival dental plaque              9         3        20
## Buccal mucosa                          5         0         2
## Meconium                               5         0        10
## Space surrounding organism             2         7        13
## Stomach                                5        10         5
## Tongue                                 0         5        12
## Axilla skin                            5         1        11
## Tear film                              0         4         1
## Throat                                 0         4         9
## Caecum                                 1         4        22
## Cecum mucosa                           1         4         6
## Colorectal mucosa                      0         3         8
## Dental plaque                          0         3         3
## Duodenum                               0         3         5
## Nasopharynx                            3         6        32
## Skin of forearm                        3         0         3
## Bile                                   2         0         3
## Brachialis muscle                      0         2         3
## Conjunctiva                            1         3         6
## Conjunctival sac                       1         3         1
## Esophagus                              0         2         4
## Forelimb skin                          2         0         4
## Lung                                   2         4         7
## Mouth                                  8         6        28
## Rumen                                  2         0         4
## Supragingival dental plaque            1         3         1
## Thyroid gland                          2         0        10
## Uterus                                 3         1        11
## Blood                                  0         1         6
## Breast                                 3         4         4
## Breast,Milk                            1         0         4
## Bulbar conjunctiva                     3         2         5
## Colon                                  3         2        18
## Ileum                                  1         0        11
## Nasal cavity                           0         1         5
## Oropharynx                             1         2         3
## Small intestine                        3         4         1
## Vagina,Uterine cervix                  3         2         7
## Vaginal fluid                          1         0         8
## Bronchus                               0         0         6
## Endothelium of trachea                 3         3         0
## Internal cheek pouch                   0         0        11
## Intestine                              1         1        14
## Jejunum                                1         1         8
## Milk                                   0         0         9
## Oral cavity                            5         5         7
## Ovary                                  0         0         7
## Peritoneal fluid                       0         0         6
## Posterior wall of oropharynx           2         2         1
## Rectum                                 0         0        12
## Skin of abdomen                        0         0         5
## Sputum                                 6         6         8
## Surface of tongue                      2         2         3
## Urine                                  1         1        16
## Ventral side of post-anal tail         0         0         6
tabDiv(exps, "Shannon", "Body site", perc = TRUE)
##                                increased decreased unchanged
## Feces                              0.160     0.230      0.61
## Vagina                             0.330     0.120      0.55
## Posterior fornix of vagina         0.560     0.000      0.44
## Skin of body                       0.230     0.500      0.27
## Uterine cervix                     0.300     0.033      0.67
## Uterine cervix,Vaginal fluid       0.900     0.100      0.00
## Buccal epithelium                  0.000     1.000      0.00
## Saliva                             0.180     0.210      0.61
## Subgingival dental plaque          0.280     0.094      0.62
## Buccal mucosa                      0.710     0.000      0.29
## Meconium                           0.330     0.000      0.67
## Space surrounding organism         0.091     0.320      0.59
## Stomach                            0.250     0.500      0.25
## Tongue                             0.000     0.290      0.71
## Axilla skin                        0.290     0.059      0.65
## Tear film                          0.000     0.800      0.20
## Throat                             0.000     0.310      0.69
## Caecum                             0.037     0.150      0.81
## Cecum mucosa                       0.091     0.360      0.55
## Colorectal mucosa                  0.000     0.270      0.73
## Dental plaque                      0.000     0.500      0.50
## Duodenum                           0.000     0.380      0.62
## Nasopharynx                        0.073     0.150      0.78
## Skin of forearm                    0.500     0.000      0.50
## Bile                               0.400     0.000      0.60
## Brachialis muscle                  0.000     0.400      0.60
## Conjunctiva                        0.100     0.300      0.60
## Conjunctival sac                   0.200     0.600      0.20
## Esophagus                          0.000     0.330      0.67
## Forelimb skin                      0.330     0.000      0.67
## Lung                               0.150     0.310      0.54
## Mouth                              0.190     0.140      0.67
## Rumen                              0.330     0.000      0.67
## Supragingival dental plaque        0.200     0.600      0.20
## Thyroid gland                      0.170     0.000      0.83
## Uterus                             0.200     0.067      0.73
## Blood                              0.000     0.140      0.86
## Breast                             0.270     0.360      0.36
## Breast,Milk                        0.200     0.000      0.80
## Bulbar conjunctiva                 0.300     0.200      0.50
## Colon                              0.130     0.087      0.78
## Ileum                              0.083     0.000      0.92
## Nasal cavity                       0.000     0.170      0.83
## Oropharynx                         0.170     0.330      0.50
## Small intestine                    0.380     0.500      0.12
## Vagina,Uterine cervix              0.250     0.170      0.58
## Vaginal fluid                      0.110     0.000      0.89
## Bronchus                           0.000     0.000      1.00
## Endothelium of trachea             0.500     0.500      0.00
## Internal cheek pouch               0.000     0.000      1.00
## Intestine                          0.062     0.062      0.88
## Jejunum                            0.100     0.100      0.80
## Milk                               0.000     0.000      1.00
## Oral cavity                        0.290     0.290      0.41
## Ovary                              0.000     0.000      1.00
## Peritoneal fluid                   0.000     0.000      1.00
## Posterior wall of oropharynx       0.400     0.400      0.20
## Rectum                             0.000     0.000      1.00
## Skin of abdomen                    0.000     0.000      1.00
## Sputum                             0.300     0.300      0.40
## Surface of tongue                  0.290     0.290      0.43
## Urine                              0.056     0.056      0.89
## Ventral side of post-anal tail     0.000     0.000      1.00
tabDiv(exps, "Richness", "Body site")
##                              increased decreased unchanged
## Feces                              183       246       598
## Mouth                               10         3         9
## Posterior fornix of vagina           8         1         2
## Uterine cervix                       8         1        11
## Oropharynx                           0         6         3
## Skin of body                         3         9         6
## Subgingival dental plaque            7         2        17
## Uterine cervix,Vaginal fluid         7         2         1
## Nasopharynx                          5         9        19
## Stomach                              4         8         3
## Cecum mucosa                         3         6         3
## Oral cavity                          2         5         0
## Small intestine                      1         4         0
## Throat                               2         5         5
## Colon                                6         4        10
## Ear                                  2         0         3
## Esophagus                            0         2         4
## Rectum                               0         2         7
## Saliva                              20        22        43
## Surface of tongue                    4         2         1
## Caecum                               2         3         1
## Ileum                                2         1         9
## Meconium                             2         3         7
## Milk                                 2         1         5
## Nasal cavity                         1         2        10
## Urine                                3         2        12
## Vagina                               3         2        11
## Vagina,Uterine cervix                1         0        11
## Breast                               1         1         7
## Bronchus                             0         0         6
## Conjunctiva                          1         1         5
## Internal cheek pouch                 0         0         7
## Intestine                            0         0        13
## Ovary                                0         0         7
## Peritoneal fluid                     0         0         6
## Thyroid gland                        0         0        12
## Tongue                               2         2         7
tabDiv(exps, "Richness", "Body site", perc = TRUE)
##                              increased decreased unchanged
## Feces                            0.180     0.240      0.58
## Mouth                            0.450     0.140      0.41
## Posterior fornix of vagina       0.730     0.091      0.18
## Uterine cervix                   0.400     0.050      0.55
## Oropharynx                       0.000     0.670      0.33
## Skin of body                     0.170     0.500      0.33
## Subgingival dental plaque        0.270     0.077      0.65
## Uterine cervix,Vaginal fluid     0.700     0.200      0.10
## Nasopharynx                      0.150     0.270      0.58
## Stomach                          0.270     0.530      0.20
## Cecum mucosa                     0.250     0.500      0.25
## Oral cavity                      0.290     0.710      0.00
## Small intestine                  0.200     0.800      0.00
## Throat                           0.170     0.420      0.42
## Colon                            0.300     0.200      0.50
## Ear                              0.400     0.000      0.60
## Esophagus                        0.000     0.330      0.67
## Rectum                           0.000     0.220      0.78
## Saliva                           0.240     0.260      0.51
## Surface of tongue                0.570     0.290      0.14
## Caecum                           0.330     0.500      0.17
## Ileum                            0.170     0.083      0.75
## Meconium                         0.170     0.250      0.58
## Milk                             0.250     0.120      0.62
## Nasal cavity                     0.077     0.150      0.77
## Urine                            0.180     0.120      0.71
## Vagina                           0.190     0.120      0.69
## Vagina,Uterine cervix            0.083     0.000      0.92
## Breast                           0.110     0.110      0.78
## Bronchus                         0.000     0.000      1.00
## Conjunctiva                      0.140     0.140      0.71
## Internal cheek pouch             0.000     0.000      1.00
## Intestine                        0.000     0.000      1.00
## Ovary                            0.000     0.000      1.00
## Peritoneal fluid                 0.000     0.000      1.00
## Thyroid gland                    0.000     0.000      1.00
## Tongue                           0.180     0.180      0.64

Signature stats

sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")

Unique microbes

Number unique microbes contained in the signatures:

(nuniq <- length(unique(unlist(sigs))))
## [1] 7521

Development of unique microbes captured over time:

Microbe set size distribution

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    5.00    8.27   10.00  470.00
gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
    xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())

sum(lengths(sigs) > 4)
## [1] 4187

Microbe co-occurrence

dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus")
## Loading required namespace: safe

antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus", antagonistic = TRUE)

Get the top 20 genera most frequently reported as differentially abundant:

sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname", 
                            tax.level = "genus", exact.tax.level = FALSE) 
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              899              594              588              542 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              517              493              469              464 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              460              445              429              402 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              348              329              320              310 
##      Veillonella     Enterococcus      Lachnospira      Escherichia 
##              281              275              275              261

Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:

all(names(top20) %in% rownames(cooc.mat))
## [1] TRUE
cooc.mat <- cooc.mat[names(top20), names(top20)]
all(names(top20) %in% rownames(antag.mat))
## [1] TRUE
antag.mat <- antag.mat[names(top20), names(top20)]

Distinguish by direction of abundance change (increased / decreased):

# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                               tax.level = "genus", exact.tax.level = FALSE) 
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              425              291              215              280 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              230              199              302              151 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              243              208              271              170 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              211              136              117              131 
##      Veillonella     Enterococcus      Lachnospira      Escherichia 
##              176              198               94              180
# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname", 
                                 tax.level = "genus", exact.tax.level = FALSE) 
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down
## 
##      Bacteroides  Bifidobacterium Faecalibacterium      Clostridium 
##              467              295              367              257 
##          Blautia     Ruminococcus    Streptococcus        Roseburia 
##              281              289              159              307 
##  Parabacteroides       Prevotella    Lactobacillus        Alistipes 
##              211              234              156              226 
##      Akkermansia            Dorea      Coprococcus      Eubacterium 
##              133              187              197              173 
##      Veillonella     Enterococcus      Lachnospira      Escherichia 
##              102               75              175               75

Plot the heatmap

# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
                                                  col = c("#D55E00", "#0072B2")),
                                   height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)

lgd_list <- list(
    Legend(labels = c("increased", "decreased"), 
           title = "Abundance in Group 1", 
           type = "grid",
           legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
                                            
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
                               col = col, 
                               name = "Relative frequency (top)",
                               cluster_columns = FALSE, 
                               row_km = 3, 
                               row_title = "same direction", 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
                               col = col, 
                               name = "Relative frequency (bottom)",
                               cluster_columns = FALSE, 
                               row_title = "opposite direction", 
                               row_km = 3, 
                               column_names_rot = 45,
                               row_names_gp = gpar(fontsize = 8),
                               column_names_gp = gpar(fontsize = 8))

# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan", 
                                tax.level = "genus", exact.tax.level = FALSE) 
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
                                    tax.id.type = "taxname",
                                    tax.level = "phylum",
                                    exact.tax.level = FALSE) 
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)

uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
                                 col = pcols[phyla.grid],
                                 cluster_columns = FALSE,
                                 column_names_rot = 45,
                                 column_names_gp = gpar(fontsize = 8))

# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)

decorate_annotation("Abundance in Group 1", {
    grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
    grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
    grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
    grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
    grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
    grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
    grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})

Signature similarity

Jaccard index

Inspect signature similarity for signatures from stomach samples based on Jaccard index:

stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)

Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).

jdist <- as.dist(1 - pair.jsim)
plot(hclust(jdist))