Reading data
Get bulk export from bugsigdb.org:
full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)
## [1] 8972 50
colnames(full.dat)
## [1] "BSDB ID" "Study"
## [3] "Study design" "PMID"
## [5] "DOI" "URL"
## [7] "Authors list" "Title"
## [9] "Journal" "Year"
## [11] "Keywords" "Experiment"
## [13] "Location of subjects" "Host species"
## [15] "Body site" "UBERON ID"
## [17] "Condition" "EFO ID"
## [19] "Group 0 name" "Group 1 name"
## [21] "Group 1 definition" "Group 0 sample size"
## [23] "Group 1 sample size" "Antibiotics exclusion"
## [25] "Sequencing type" "16S variable region"
## [27] "Sequencing platform" "Statistical test"
## [29] "Significance threshold" "MHT correction"
## [31] "LDA Score above" "Matched on"
## [33] "Confounders controlled for" "Pielou"
## [35] "Shannon" "Chao1"
## [37] "Simpson" "Inverse Simpson"
## [39] "Richness" "Signature page name"
## [41] "Source" "Curated date"
## [43] "Curator" "Revision editor"
## [45] "Description" "Abundance in Group 1"
## [47] "MetaPhlAn taxon names" "NCBI Taxonomy IDs"
## [49] "State" "Reviewer"
Stripping illformed entries:
Curation output
Number of papers and signatures curated:
## [1] 1441
nrow(full.dat)
## [1] 8972
Publication date of the curated papers:
pmids <- pmids[!is.na(pmids)]
pubyear <- pmid2pubyear(pmids)
head(cbind(pmids, pubyear))
tab <- table(pubyear)
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers",
label = TRUE, fill = "steelblue",
ggtheme = theme_bw())
Stripping empty signatures:
ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)
## [1] 8972
Papers containing only empty UP and DOWN signatures (under curation?):
## numeric(0)
Progress over time:
dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)
plotProgressOverTime(dat, diff = TRUE)
Stratified by curator:
npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)
Number of complete and revised signatures: Turned off because it’s way too long these days
Study stats
Study design
spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)
## case-control,prospective cohort
## 1
## cross-sectional observational, not case-control,prospective cohort
## 2
## laboratory experiment,meta-analysis
## 2
## prospective cohort,time series / longitudinal observational
## 2
## case-control,time series / longitudinal observational
## 3
## cross-sectional observational, not case-control,laboratory experiment
## 4
## case-control,meta-analysis
## 5
## laboratory experiment,time series / longitudinal observational
## 5
## case-control,laboratory experiment
## 6
## meta-analysis
## 18
## randomized controlled trial
## 66
## prospective cohort
## 113
## time series / longitudinal observational
## 130
## laboratory experiment
## 156
## cross-sectional observational, not case-control
## 387
## case-control
## 555
Experiment stats
Columns of the full dataset that describe experiments:
# Experiment ID
exp.cols <- c("Study", "Experiment")
# Subjects
sub.cols <- c("Host species",
"Location of subjects",
"Body site",
"Condition",
"Antibiotics exclusion",
"Group 0 sample size",
"Group 1 sample size")
# Lab analysis
lab.cols <- c("Sequencing type",
"16S variable region",
"Sequencing platform")
# Statistical analysis
stat.cols <- c("Statistical test",
"MHT correction",
"Significance threshold")
# Alpha diversity
div.cols <- c("Pielou",
"Shannon",
"Chao1",
"Simpson",
"Inverse Simpson",
"Richness")
Restrict dataset to experiment information:
Subjects
Number of experiments for the top 10 categories for each subjects column:
## $`Host species`
##
## Homo sapiens Mus musculus Rattus norvegicus
## 4350 585 112
## Sus scrofa domesticus Canis lupus familiaris Ovis aries
## 88 73 24
## Not specified Bos taurus Capra hircus
## 23 18 14
## Gallus gallus
## 14
##
## $`Location of subjects`
##
## China United States of America Germany
## 1799 947 173
## Japan Denmark Italy
## 166 150 129
## Australia Netherlands South Korea
## 112 112 101
## Spain
## 96
##
## $`Body site`
##
## Feces Saliva Vagina
## 3477 292 103
## Mouth Nasopharynx Subgingival dental plaque
## 67 55 49
## Skin of body Uterine cervix Colon
## 47 47 46
## Caecum
## 41
##
## $Condition
##
## Parkinson's disease Diet
## 197 149
## Obesity Treatment outcome measurement
## 141 136
## Colorectal cancer COVID-19
## 134 116
## Atopic eczema Antimicrobial agent
## 90 85
## Alzheimer's disease Response to transplant
## 77 73
##
## $`Antibiotics exclusion`
##
## 3 months 1 month 2 months
## 589 497 241
## 6 months 2 weeks 3 Months
## 180 130 44
## 6 Months 1 Month currently on antibiotics
## 40 28 28
## 3 Months.
## 25
Proportions instead:
sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
##
## Homo sapiens Mus musculus Rattus norvegicus
## 0.80000 0.10800 0.02060
## Sus scrofa domesticus Canis lupus familiaris Ovis aries
## 0.01620 0.01340 0.00441
## Not specified Bos taurus Capra hircus
## 0.00423 0.00331 0.00257
## Gallus gallus
## 0.00257
##
## $`Location of subjects`
##
## China United States of America Germany
## 0.3310 0.1740 0.0318
## Japan Denmark Italy
## 0.0305 0.0276 0.0237
## Australia Netherlands South Korea
## 0.0206 0.0206 0.0186
## Spain
## 0.0177
##
## $`Body site`
##
## Feces Saliva Vagina
## 0.63900 0.05370 0.01890
## Mouth Nasopharynx Subgingival dental plaque
## 0.01230 0.01010 0.00901
## Skin of body Uterine cervix Colon
## 0.00864 0.00864 0.00846
## Caecum
## 0.00754
##
## $Condition
##
## Parkinson's disease Diet
## 0.0376 0.0284
## Obesity Treatment outcome measurement
## 0.0269 0.0260
## Colorectal cancer COVID-19
## 0.0256 0.0221
## Atopic eczema Antimicrobial agent
## 0.0172 0.0162
## Alzheimer's disease Response to transplant
## 0.0147 0.0139
##
## $`Antibiotics exclusion`
##
## 3 months 1 month 2 months
## 0.2470 0.2090 0.1010
## 6 months 2 weeks 3 Months
## 0.0756 0.0546 0.0185
## 6 Months 1 Month currently on antibiotics
## 0.0168 0.0118 0.0118
## 3 Months.
## 0.0105
Sample size:
ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize
## Group 0 sample size Group 1 sample size
## Min. 0.000 1.00000
## 1st Qu. 12.000 10.00000
## Median 24.000 22.00000
## Mean 559.801 68.17235
## 3rd Qu. 50.000 43.00000
## Max. 308633.000 10413.00000
## NA's 775.000 773.00000
Lab analysis
Number of experiments for the top 10 categories for each lab analysis column:
## $`Sequencing type`
##
## 16S WMS PCR ITS / ITS2 18S
## 4297 787 68 25 5
##
## $`16S variable region`
##
## 34 4 12 123 45 345 3 56 678 23
## 1730 1205 279 214 153 140 61 45 31 21
##
## $`Sequencing platform`
##
## Illumina Roche454
## 4321 328
## Ion Torrent RT-qPCR
## 200 105
## MGISEQ-2000 PacBio RS
## 37 24
## BGISEQ-500 Sequencing Mass spectrometry
## 18 18
## Human Intestinal Tract Chip Illumina,Roche454
## 16 11
Proportions instead:
lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
##
## 16S WMS PCR ITS / ITS2 18S
## 0.829000 0.152000 0.013100 0.004820 0.000965
##
## $`16S variable region`
##
## 34 4 12 123 45 345 3 56 678 23
## 0.43500 0.30300 0.07010 0.05380 0.03840 0.03520 0.01530 0.01130 0.00779 0.00528
##
## $`Sequencing platform`
##
## Illumina Roche454
## 0.84400 0.06410
## Ion Torrent RT-qPCR
## 0.03910 0.02050
## MGISEQ-2000 PacBio RS
## 0.00723 0.00469
## BGISEQ-500 Sequencing Mass spectrometry
## 0.00352 0.00352
## Human Intestinal Tract Chip Illumina,Roche454
## 0.00313 0.00215
Statistical analysis
Number of experiments for the top 10 categories for each statistical analysis column:
## $`Statistical test`
##
## LEfSe Mann-Whitney (Wilcoxon) DESeq2
## 1612 832 509
## Kruskall-Wallis Linear Regression ANOVA
## 282 253 238
## T-Test MaAsLin2 ANCOM
## 202 182 167
## Spearman Correlation
## 90
##
## $`MHT correction`
##
## TRUE FALSE
## 2693 2080
##
## $`Significance threshold`
##
## 0.05 0.1 0.01 0.001 0.25 0.2 0.15 0.005 2 1e-04
## 4520 332 108 34 34 25 19 15 15 6
Proportions instead:
stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
##
## LEfSe Mann-Whitney (Wilcoxon) DESeq2
## 0.3130 0.1620 0.0989
## Kruskall-Wallis Linear Regression ANOVA
## 0.0548 0.0492 0.0462
## T-Test MaAsLin2 ANCOM
## 0.0393 0.0354 0.0325
## Spearman Correlation
## 0.0175
##
## $`MHT correction`
##
## TRUE FALSE
## 0.564 0.436
##
## $`Significance threshold`
##
## 0.05 0.1 0.01 0.001 0.25 0.2 0.15 0.005 2 1e-04
## 0.88100 0.06470 0.02100 0.00662 0.00662 0.00487 0.00370 0.00292 0.00292 0.00117
Alpha diversity
Overall distribution:
apply(exps[,div.cols], 2, table)
## Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased 46 701 450 207 62 421
## increased 37 546 312 166 34 337
## unchanged 172 1924 851 682 204 964
Correspondence of Shannon diversity and Richness:
table(exps$Shannon, exps$Richness)
##
## decreased increased unchanged
## decreased 221 13 51
## increased 9 167 52
## unchanged 95 87 781
Conditions with consistently increased or decreased alpha diversity:
tabDiv(exps, "Shannon", "Condition")
## increased decreased
## Oxalate measurement 0 17
## COVID-19 9 24
## Obesity 3 16
## Polycystic ovary syndrome 0 13
## HIV infection 1 12
## Smoking behaviour measurement 2 13
## Clostridium difficile infection 10 0
## Dry eye syndrome 1 11
## Systemic inflammatory response syndrome 5 15
## Treatment outcome measurement 11 21
## Chronic constipation 9 0
## Human papilloma virus infection 10 1
## Alzheimer's disease 2 10
## Gastric cancer 6 14
## Ulcerative colitis 1 9
## Age 5 12
## Aging 0 7
## Balanced reciprocal translocation 7 0
## Atopic eczema 5 11
## Autism spectrum disorder 7 1
## Cesarean section 6 0
## Epilepsy 6 0
## Lung cancer 2 8
## Parkinson's disease 20 14
## Response to allogeneic hematopoietic stem cell transplant 0 6
## Response to transplant 9 15
## Urinary tract infection 0 6
## Cervical cancer 5 0
## Diet 14 19
## Helminthiasis 5 0
## Population 2 7
## Spontaneous preterm birth 12 7
## Acute lymphoblastic leukemia 0 4
## Acute pancreatitis 0 4
## Colitis 4 0
## Colorectal cancer 10 14
## Ethnic group 3 7
## Food allergy 6 2
## Human immunodeficiency virus 0 4
## Hypertension 7 3
## Periodontitis 5 1
## Pregnancy 4 0
## Response to antibiotic 0 4
## Alcohol drinking 3 0
## Atopic asthma 4 1
## Birth measurement 3 0
## Constipation 6 3
## Delivery method 1 4
## Extraction protocol 23 26
## Male homosexuality 3 0
## Oral lichen planus 3 0
## SARS-CoV-2-related disease 0 3
## Schizophrenia 1 4
## Type II diabetes mellitus 2 5
## Age at assessment 3 1
## Antimicrobial agent 8 10
## Breed 0 2
## Cervical glandular intraepithelial neoplasia 2 0
## Chronic kidney disease 2 4
## Cognitive impairment 1 3
## Crohn's disease 2 4
## Depressive disorder 0 2
## Diarrhea 6 4
## Eczema 0 2
## Endometrial cancer 4 2
## Esophageal adenocarcinoma 0 2
## Iron biomarker measurement 1 3
## Milk allergic reaction 2 0
## Papillary thyroid carcinoma 2 0
## Phenylketonuria 1 3
## Response to anti-tuberculosis drug 8 10
## Response to antiviral drug 2 4
## Response to immunochemotherapy 3 1
## Sampling site 3 1
## Smoking behavior 10 8
## Squamous cell carcinoma 2 0
## Streptococcus pneumoniae 0 2
## Stroke 2 0
## Acute respiratory failure 6 5
## Air pollution 7 6
## Anxiety disorder 0 1
## Breast cancer 3 4
## Breastfeeding duration 2 3
## Chlamydia trachomatis 1 2
## Chronic fatigue syndrome 0 1
## Chronic hepatitis B virus infection 0 1
## Chronic obstructive pulmonary disease 3 2
## Diabetes mellitus 0 1
## Endometriosis 2 3
## Esophageal cancer 1 2
## Gestational diabetes 1 0
## Hepatocellular carcinoma 0 1
## Hypertrophy 1 0
## Irritable bowel syndrome 5 6
## Multiple sclerosis 0 1
## Oral cavity carcinoma 0 1
## Oral squamous cell carcinoma 3 2
## Pancreatic carcinoma 0 1
## Psoriasis 1 0
## Respiratory Syncytial Virus Infection 0 1
## Respiratory tract infectious disease 1 2
## Response to vaccine 1 0
## Rheumatoid arthritis 5 4
## Sample treatment protocol 1 0
## Sampling time 4 3
## Social interaction measurement 2 1
## Socioeconomic status 3 4
## Traditional Chinese medicine type 5 4
## Treatment 1 0
## Type I diabetes mellitus 0 1
## Vesicle membrane 3 2
## Vitiligo 0 1
## Abnormal stool composition 0 0
## Acute myeloid leukemia 1 1
## Arthritis 0 0
## Asthma 1 1
## Biological sex 1 1
## Bipolar disorder 0 0
## Celiac disease 0 0
## Clinical treatment 1 1
## Colorectal adenoma 2 2
## Contraception 0 0
## COVID-19 symptoms measurement 0 0
## Disease progression measurement 0 0
## Functional abnormality of the gastrointestinal tract 0 0
## Gastric adenocarcinoma 0 0
## Head and neck squamous cell carcinoma 0 0
## Health study participation 2 2
## HIV mother to child transmission 0 0
## Lactose intolerance 0 0
## Lifestyle measurement 2 2
## Lung transplantation 2 2
## Obsessive-compulsive disorder 0 0
## Ovarian cancer 3 3
## Phenotype 2 2
## Psoriasis vulgaris 0 0
## Response to diet 3 3
## Response to ketogenic diet 2 2
## Sample collection protocol 0 0
## SARS coronavirus 0 0
## Simian immunodeficiency virus infection 0 0
## Smoking cessation 0 0
## Transplant outcome measurement 0 0
## Viral load 0 0
## Waist circumference 0 0
## unchanged
## Oxalate measurement 0
## COVID-19 42
## Obesity 58
## Polycystic ovary syndrome 12
## HIV infection 26
## Smoking behaviour measurement 0
## Clostridium difficile infection 1
## Dry eye syndrome 11
## Systemic inflammatory response syndrome 4
## Treatment outcome measurement 66
## Chronic constipation 10
## Human papilloma virus infection 28
## Alzheimer's disease 24
## Gastric cancer 26
## Ulcerative colitis 3
## Age 9
## Aging 0
## Balanced reciprocal translocation 0
## Atopic eczema 72
## Autism spectrum disorder 8
## Cesarean section 16
## Epilepsy 5
## Lung cancer 7
## Parkinson's disease 86
## Response to allogeneic hematopoietic stem cell transplant 0
## Response to transplant 23
## Urinary tract infection 8
## Cervical cancer 5
## Diet 63
## Helminthiasis 8
## Population 25
## Spontaneous preterm birth 5
## Acute lymphoblastic leukemia 5
## Acute pancreatitis 2
## Colitis 1
## Colorectal cancer 44
## Ethnic group 6
## Food allergy 19
## Human immunodeficiency virus 6
## Hypertension 6
## Periodontitis 10
## Pregnancy 2
## Response to antibiotic 8
## Alcohol drinking 2
## Atopic asthma 7
## Birth measurement 4
## Constipation 2
## Delivery method 2
## Extraction protocol 20
## Male homosexuality 6
## Oral lichen planus 4
## SARS-CoV-2-related disease 4
## Schizophrenia 14
## Type II diabetes mellitus 24
## Age at assessment 1
## Antimicrobial agent 25
## Breed 7
## Cervical glandular intraepithelial neoplasia 9
## Chronic kidney disease 5
## Cognitive impairment 9
## Crohn's disease 5
## Depressive disorder 4
## Diarrhea 8
## Eczema 10
## Endometrial cancer 3
## Esophageal adenocarcinoma 4
## Iron biomarker measurement 2
## Milk allergic reaction 5
## Papillary thyroid carcinoma 10
## Phenylketonuria 4
## Response to anti-tuberculosis drug 13
## Response to antiviral drug 5
## Response to immunochemotherapy 3
## Sampling site 7
## Smoking behavior 20
## Squamous cell carcinoma 4
## Streptococcus pneumoniae 4
## Stroke 16
## Acute respiratory failure 0
## Air pollution 3
## Anxiety disorder 7
## Breast cancer 16
## Breastfeeding duration 9
## Chlamydia trachomatis 2
## Chronic fatigue syndrome 4
## Chronic hepatitis B virus infection 5
## Chronic obstructive pulmonary disease 2
## Diabetes mellitus 5
## Endometriosis 14
## Esophageal cancer 2
## Gestational diabetes 35
## Hepatocellular carcinoma 6
## Hypertrophy 4
## Irritable bowel syndrome 22
## Multiple sclerosis 17
## Oral cavity carcinoma 7
## Oral squamous cell carcinoma 3
## Pancreatic carcinoma 4
## Psoriasis 12
## Respiratory Syncytial Virus Infection 5
## Respiratory tract infectious disease 6
## Response to vaccine 5
## Rheumatoid arthritis 9
## Sample treatment protocol 4
## Sampling time 5
## Social interaction measurement 6
## Socioeconomic status 8
## Traditional Chinese medicine type 6
## Treatment 7
## Type I diabetes mellitus 6
## Vesicle membrane 1
## Vitiligo 4
## Abnormal stool composition 6
## Acute myeloid leukemia 4
## Arthritis 6
## Asthma 14
## Biological sex 6
## Bipolar disorder 5
## Celiac disease 6
## Clinical treatment 5
## Colorectal adenoma 10
## Contraception 5
## COVID-19 symptoms measurement 5
## Disease progression measurement 5
## Functional abnormality of the gastrointestinal tract 27
## Gastric adenocarcinoma 8
## Head and neck squamous cell carcinoma 8
## Health study participation 35
## HIV mother to child transmission 8
## Lactose intolerance 5
## Lifestyle measurement 8
## Lung transplantation 2
## Obsessive-compulsive disorder 5
## Ovarian cancer 27
## Phenotype 19
## Psoriasis vulgaris 14
## Response to diet 31
## Response to ketogenic diet 3
## Sample collection protocol 9
## SARS coronavirus 6
## Simian immunodeficiency virus infection 5
## Smoking cessation 6
## Transplant outcome measurement 13
## Viral load 6
## Waist circumference 5
tabDiv(exps, "Shannon", "Condition", perc = TRUE)
## increased decreased
## Oxalate measurement 0.000 1.000
## COVID-19 0.120 0.320
## Obesity 0.039 0.210
## Polycystic ovary syndrome 0.000 0.520
## HIV infection 0.026 0.310
## Smoking behaviour measurement 0.130 0.870
## Clostridium difficile infection 0.910 0.000
## Dry eye syndrome 0.043 0.480
## Systemic inflammatory response syndrome 0.210 0.620
## Treatment outcome measurement 0.110 0.210
## Chronic constipation 0.470 0.000
## Human papilloma virus infection 0.260 0.026
## Alzheimer's disease 0.056 0.280
## Gastric cancer 0.130 0.300
## Ulcerative colitis 0.077 0.690
## Age 0.190 0.460
## Aging 0.000 1.000
## Balanced reciprocal translocation 1.000 0.000
## Atopic eczema 0.057 0.120
## Autism spectrum disorder 0.440 0.062
## Cesarean section 0.270 0.000
## Epilepsy 0.550 0.000
## Lung cancer 0.120 0.470
## Parkinson's disease 0.170 0.120
## Response to allogeneic hematopoietic stem cell transplant 0.000 1.000
## Response to transplant 0.190 0.320
## Urinary tract infection 0.000 0.430
## Cervical cancer 0.500 0.000
## Diet 0.150 0.200
## Helminthiasis 0.380 0.000
## Population 0.059 0.210
## Spontaneous preterm birth 0.500 0.290
## Acute lymphoblastic leukemia 0.000 0.440
## Acute pancreatitis 0.000 0.670
## Colitis 0.800 0.000
## Colorectal cancer 0.150 0.210
## Ethnic group 0.190 0.440
## Food allergy 0.220 0.074
## Human immunodeficiency virus 0.000 0.400
## Hypertension 0.440 0.190
## Periodontitis 0.310 0.062
## Pregnancy 0.670 0.000
## Response to antibiotic 0.000 0.330
## Alcohol drinking 0.600 0.000
## Atopic asthma 0.330 0.083
## Birth measurement 0.430 0.000
## Constipation 0.550 0.270
## Delivery method 0.140 0.570
## Extraction protocol 0.330 0.380
## Male homosexuality 0.330 0.000
## Oral lichen planus 0.430 0.000
## SARS-CoV-2-related disease 0.000 0.430
## Schizophrenia 0.053 0.210
## Type II diabetes mellitus 0.065 0.160
## Age at assessment 0.600 0.200
## Antimicrobial agent 0.190 0.230
## Breed 0.000 0.220
## Cervical glandular intraepithelial neoplasia 0.180 0.000
## Chronic kidney disease 0.180 0.360
## Cognitive impairment 0.077 0.230
## Crohn's disease 0.180 0.360
## Depressive disorder 0.000 0.330
## Diarrhea 0.330 0.220
## Eczema 0.000 0.170
## Endometrial cancer 0.440 0.220
## Esophageal adenocarcinoma 0.000 0.330
## Iron biomarker measurement 0.170 0.500
## Milk allergic reaction 0.290 0.000
## Papillary thyroid carcinoma 0.170 0.000
## Phenylketonuria 0.120 0.380
## Response to anti-tuberculosis drug 0.260 0.320
## Response to antiviral drug 0.180 0.360
## Response to immunochemotherapy 0.430 0.140
## Sampling site 0.270 0.091
## Smoking behavior 0.260 0.210
## Squamous cell carcinoma 0.330 0.000
## Streptococcus pneumoniae 0.000 0.330
## Stroke 0.110 0.000
## Acute respiratory failure 0.550 0.450
## Air pollution 0.440 0.380
## Anxiety disorder 0.000 0.120
## Breast cancer 0.130 0.170
## Breastfeeding duration 0.140 0.210
## Chlamydia trachomatis 0.200 0.400
## Chronic fatigue syndrome 0.000 0.200
## Chronic hepatitis B virus infection 0.000 0.170
## Chronic obstructive pulmonary disease 0.430 0.290
## Diabetes mellitus 0.000 0.170
## Endometriosis 0.110 0.160
## Esophageal cancer 0.200 0.400
## Gestational diabetes 0.028 0.000
## Hepatocellular carcinoma 0.000 0.140
## Hypertrophy 0.200 0.000
## Irritable bowel syndrome 0.150 0.180
## Multiple sclerosis 0.000 0.056
## Oral cavity carcinoma 0.000 0.120
## Oral squamous cell carcinoma 0.380 0.250
## Pancreatic carcinoma 0.000 0.200
## Psoriasis 0.077 0.000
## Respiratory Syncytial Virus Infection 0.000 0.170
## Respiratory tract infectious disease 0.110 0.220
## Response to vaccine 0.170 0.000
## Rheumatoid arthritis 0.280 0.220
## Sample treatment protocol 0.200 0.000
## Sampling time 0.330 0.250
## Social interaction measurement 0.220 0.110
## Socioeconomic status 0.200 0.270
## Traditional Chinese medicine type 0.330 0.270
## Treatment 0.120 0.000
## Type I diabetes mellitus 0.000 0.140
## Vesicle membrane 0.500 0.330
## Vitiligo 0.000 0.200
## Abnormal stool composition 0.000 0.000
## Acute myeloid leukemia 0.170 0.170
## Arthritis 0.000 0.000
## Asthma 0.062 0.062
## Biological sex 0.120 0.120
## Bipolar disorder 0.000 0.000
## Celiac disease 0.000 0.000
## Clinical treatment 0.140 0.140
## Colorectal adenoma 0.140 0.140
## Contraception 0.000 0.000
## COVID-19 symptoms measurement 0.000 0.000
## Disease progression measurement 0.000 0.000
## Functional abnormality of the gastrointestinal tract 0.000 0.000
## Gastric adenocarcinoma 0.000 0.000
## Head and neck squamous cell carcinoma 0.000 0.000
## Health study participation 0.051 0.051
## HIV mother to child transmission 0.000 0.000
## Lactose intolerance 0.000 0.000
## Lifestyle measurement 0.170 0.170
## Lung transplantation 0.330 0.330
## Obsessive-compulsive disorder 0.000 0.000
## Ovarian cancer 0.091 0.091
## Phenotype 0.087 0.087
## Psoriasis vulgaris 0.000 0.000
## Response to diet 0.081 0.081
## Response to ketogenic diet 0.290 0.290
## Sample collection protocol 0.000 0.000
## SARS coronavirus 0.000 0.000
## Simian immunodeficiency virus infection 0.000 0.000
## Smoking cessation 0.000 0.000
## Transplant outcome measurement 0.000 0.000
## Viral load 0.000 0.000
## Waist circumference 0.000 0.000
## unchanged
## Oxalate measurement 0.000
## COVID-19 0.560
## Obesity 0.750
## Polycystic ovary syndrome 0.480
## HIV infection 0.670
## Smoking behaviour measurement 0.000
## Clostridium difficile infection 0.091
## Dry eye syndrome 0.480
## Systemic inflammatory response syndrome 0.170
## Treatment outcome measurement 0.670
## Chronic constipation 0.530
## Human papilloma virus infection 0.720
## Alzheimer's disease 0.670
## Gastric cancer 0.570
## Ulcerative colitis 0.230
## Age 0.350
## Aging 0.000
## Balanced reciprocal translocation 0.000
## Atopic eczema 0.820
## Autism spectrum disorder 0.500
## Cesarean section 0.730
## Epilepsy 0.450
## Lung cancer 0.410
## Parkinson's disease 0.720
## Response to allogeneic hematopoietic stem cell transplant 0.000
## Response to transplant 0.490
## Urinary tract infection 0.570
## Cervical cancer 0.500
## Diet 0.660
## Helminthiasis 0.620
## Population 0.740
## Spontaneous preterm birth 0.210
## Acute lymphoblastic leukemia 0.560
## Acute pancreatitis 0.330
## Colitis 0.200
## Colorectal cancer 0.650
## Ethnic group 0.380
## Food allergy 0.700
## Human immunodeficiency virus 0.600
## Hypertension 0.380
## Periodontitis 0.620
## Pregnancy 0.330
## Response to antibiotic 0.670
## Alcohol drinking 0.400
## Atopic asthma 0.580
## Birth measurement 0.570
## Constipation 0.180
## Delivery method 0.290
## Extraction protocol 0.290
## Male homosexuality 0.670
## Oral lichen planus 0.570
## SARS-CoV-2-related disease 0.570
## Schizophrenia 0.740
## Type II diabetes mellitus 0.770
## Age at assessment 0.200
## Antimicrobial agent 0.580
## Breed 0.780
## Cervical glandular intraepithelial neoplasia 0.820
## Chronic kidney disease 0.450
## Cognitive impairment 0.690
## Crohn's disease 0.450
## Depressive disorder 0.670
## Diarrhea 0.440
## Eczema 0.830
## Endometrial cancer 0.330
## Esophageal adenocarcinoma 0.670
## Iron biomarker measurement 0.330
## Milk allergic reaction 0.710
## Papillary thyroid carcinoma 0.830
## Phenylketonuria 0.500
## Response to anti-tuberculosis drug 0.420
## Response to antiviral drug 0.450
## Response to immunochemotherapy 0.430
## Sampling site 0.640
## Smoking behavior 0.530
## Squamous cell carcinoma 0.670
## Streptococcus pneumoniae 0.670
## Stroke 0.890
## Acute respiratory failure 0.000
## Air pollution 0.190
## Anxiety disorder 0.880
## Breast cancer 0.700
## Breastfeeding duration 0.640
## Chlamydia trachomatis 0.400
## Chronic fatigue syndrome 0.800
## Chronic hepatitis B virus infection 0.830
## Chronic obstructive pulmonary disease 0.290
## Diabetes mellitus 0.830
## Endometriosis 0.740
## Esophageal cancer 0.400
## Gestational diabetes 0.970
## Hepatocellular carcinoma 0.860
## Hypertrophy 0.800
## Irritable bowel syndrome 0.670
## Multiple sclerosis 0.940
## Oral cavity carcinoma 0.880
## Oral squamous cell carcinoma 0.380
## Pancreatic carcinoma 0.800
## Psoriasis 0.920
## Respiratory Syncytial Virus Infection 0.830
## Respiratory tract infectious disease 0.670
## Response to vaccine 0.830
## Rheumatoid arthritis 0.500
## Sample treatment protocol 0.800
## Sampling time 0.420
## Social interaction measurement 0.670
## Socioeconomic status 0.530
## Traditional Chinese medicine type 0.400
## Treatment 0.880
## Type I diabetes mellitus 0.860
## Vesicle membrane 0.170
## Vitiligo 0.800
## Abnormal stool composition 1.000
## Acute myeloid leukemia 0.670
## Arthritis 1.000
## Asthma 0.880
## Biological sex 0.750
## Bipolar disorder 1.000
## Celiac disease 1.000
## Clinical treatment 0.710
## Colorectal adenoma 0.710
## Contraception 1.000
## COVID-19 symptoms measurement 1.000
## Disease progression measurement 1.000
## Functional abnormality of the gastrointestinal tract 1.000
## Gastric adenocarcinoma 1.000
## Head and neck squamous cell carcinoma 1.000
## Health study participation 0.900
## HIV mother to child transmission 1.000
## Lactose intolerance 1.000
## Lifestyle measurement 0.670
## Lung transplantation 0.330
## Obsessive-compulsive disorder 1.000
## Ovarian cancer 0.820
## Phenotype 0.830
## Psoriasis vulgaris 1.000
## Response to diet 0.840
## Response to ketogenic diet 0.430
## Sample collection protocol 1.000
## SARS coronavirus 1.000
## Simian immunodeficiency virus infection 1.000
## Smoking cessation 1.000
## Transplant outcome measurement 1.000
## Viral load 1.000
## Waist circumference 1.000
tabDiv(exps, "Richness", "Condition")
## increased decreased
## Treatment outcome measurement 5 22
## Diet 4 19
## Helminthiasis 13 0
## HIV infection 3 15
## COVID-19 9 20
## Chronic constipation 8 0
## Parkinson's disease 18 26
## Phenotype 9 1
## Balanced reciprocal translocation 7 0
## Diarrhea 8 1
## Head and neck squamous cell carcinoma 0 7
## Polycystic ovary syndrome 0 7
## Increased intestinal transit time 6 0
## Response to allogeneic hematopoietic stem cell transplant 0 6
## Alcohol drinking 5 0
## Antimicrobial agent 2 7
## Human immunodeficiency virus 1 6
## Human papilloma virus infection 7 2
## Acute lymphoblastic leukemia 5 1
## Age 1 5
## Air pollution 9 5
## Cervical glandular intraepithelial neoplasia 4 0
## Dry eye syndrome 0 4
## Endometriosis 4 0
## Epilepsy 4 0
## Periodontitis 5 1
## Schizophrenia 1 5
## Vesicle membrane 5 1
## Atopic asthma 4 1
## Delivery method 4 1
## Food allergy 0 3
## Gastric cancer 5 8
## Gestational diabetes 3 6
## Hypertrophy 3 0
## Iron biomarker measurement 1 4
## Oral squamous cell carcinoma 1 4
## Response to transplant 5 8
## Asthma 2 0
## Autism spectrum disorder 4 6
## Breast cancer 2 0
## Colorectal cancer 8 10
## Esophageal adenocarcinoma 0 2
## Hypertension 1 3
## Phenylketonuria 1 3
## Respiratory tract infectious disease 3 1
## Smoking behavior 6 8
## Smoking status measurement 2 0
## Streptococcus pneumoniae 0 2
## Traditional Chinese medicine type 1 3
## Transplant outcome measurement 0 2
## Treatment 1 3
## Ulcerative colitis 1 3
## Alzheimer's disease 6 5
## Atopic eczema 2 1
## Breastfeeding duration 1 0
## Cesarean section 3 2
## Colorectal adenoma 1 2
## Constipation 4 5
## Endometrial cancer 1 2
## Health study participation 1 0
## Inflammatory bowel disease 2 3
## Irritable bowel syndrome 4 5
## Lung cancer 0 1
## Obesity 8 9
## Obsessive-compulsive disorder 0 1
## Ovarian cancer 1 0
## Psoriasis 0 1
## Response to diet 3 4
## Rheumatoid arthritis 3 4
## Sampling site 1 2
## Socioeconomic status 2 1
## Transport 1 2
## Type II diabetes mellitus 2 3
## Urinary tract infection 0 1
## Abnormal stool composition 0 0
## Chlamydia trachomatis 1 1
## Crohn's disease 2 2
## Ethnic group 2 2
## Functional abnormality of the gastrointestinal tract 0 0
## HIV mother to child transmission 0 0
## Male homosexuality 0 0
## Multiple sclerosis 0 0
## Papillary thyroid carcinoma 0 0
## Physical activity 2 2
## Psoriasis vulgaris 0 0
## Sample collection protocol 0 0
## Smoking cessation 0 0
## Stroke 2 2
## Viral load 0 0
## unchanged
## Treatment outcome measurement 47
## Diet 30
## Helminthiasis 0
## HIV infection 10
## COVID-19 24
## Chronic constipation 4
## Parkinson's disease 28
## Phenotype 11
## Balanced reciprocal translocation 0
## Diarrhea 4
## Head and neck squamous cell carcinoma 4
## Polycystic ovary syndrome 1
## Increased intestinal transit time 0
## Response to allogeneic hematopoietic stem cell transplant 0
## Alcohol drinking 0
## Antimicrobial agent 10
## Human immunodeficiency virus 2
## Human papilloma virus infection 12
## Acute lymphoblastic leukemia 0
## Age 1
## Air pollution 6
## Cervical glandular intraepithelial neoplasia 2
## Dry eye syndrome 3
## Endometriosis 8
## Epilepsy 1
## Periodontitis 6
## Schizophrenia 8
## Vesicle membrane 0
## Atopic asthma 7
## Delivery method 1
## Food allergy 9
## Gastric cancer 14
## Gestational diabetes 25
## Hypertrophy 2
## Iron biomarker measurement 1
## Oral squamous cell carcinoma 0
## Response to transplant 11
## Asthma 10
## Autism spectrum disorder 0
## Breast cancer 7
## Colorectal cancer 21
## Esophageal adenocarcinoma 4
## Hypertension 6
## Phenylketonuria 4
## Respiratory tract infectious disease 1
## Smoking behavior 8
## Smoking status measurement 3
## Streptococcus pneumoniae 3
## Traditional Chinese medicine type 4
## Transplant outcome measurement 5
## Treatment 6
## Ulcerative colitis 1
## Alzheimer's disease 23
## Atopic eczema 6
## Breastfeeding duration 9
## Cesarean section 10
## Colorectal adenoma 11
## Constipation 8
## Endometrial cancer 3
## Health study participation 28
## Inflammatory bowel disease 0
## Irritable bowel syndrome 14
## Lung cancer 10
## Obesity 19
## Obsessive-compulsive disorder 4
## Ovarian cancer 30
## Psoriasis 8
## Response to diet 8
## Rheumatoid arthritis 1
## Sampling site 2
## Socioeconomic status 2
## Transport 3
## Type II diabetes mellitus 10
## Urinary tract infection 6
## Abnormal stool composition 6
## Chlamydia trachomatis 3
## Crohn's disease 2
## Ethnic group 1
## Functional abnormality of the gastrointestinal tract 27
## HIV mother to child transmission 8
## Male homosexuality 9
## Multiple sclerosis 17
## Papillary thyroid carcinoma 12
## Physical activity 1
## Psoriasis vulgaris 14
## Sample collection protocol 9
## Smoking cessation 6
## Stroke 17
## Viral load 5
tabDiv(exps, "Richness", "Condition", perc = TRUE)
## increased decreased
## Treatment outcome measurement 0.068 0.300
## Diet 0.075 0.360
## Helminthiasis 1.000 0.000
## HIV infection 0.110 0.540
## COVID-19 0.170 0.380
## Chronic constipation 0.670 0.000
## Parkinson's disease 0.250 0.360
## Phenotype 0.430 0.048
## Balanced reciprocal translocation 1.000 0.000
## Diarrhea 0.620 0.077
## Head and neck squamous cell carcinoma 0.000 0.640
## Polycystic ovary syndrome 0.000 0.880
## Increased intestinal transit time 1.000 0.000
## Response to allogeneic hematopoietic stem cell transplant 0.000 1.000
## Alcohol drinking 1.000 0.000
## Antimicrobial agent 0.110 0.370
## Human immunodeficiency virus 0.110 0.670
## Human papilloma virus infection 0.330 0.095
## Acute lymphoblastic leukemia 0.830 0.170
## Age 0.140 0.710
## Air pollution 0.450 0.250
## Cervical glandular intraepithelial neoplasia 0.670 0.000
## Dry eye syndrome 0.000 0.570
## Endometriosis 0.330 0.000
## Epilepsy 0.800 0.000
## Periodontitis 0.420 0.083
## Schizophrenia 0.071 0.360
## Vesicle membrane 0.830 0.170
## Atopic asthma 0.330 0.083
## Delivery method 0.670 0.170
## Food allergy 0.000 0.250
## Gastric cancer 0.190 0.300
## Gestational diabetes 0.088 0.180
## Hypertrophy 0.600 0.000
## Iron biomarker measurement 0.170 0.670
## Oral squamous cell carcinoma 0.200 0.800
## Response to transplant 0.210 0.330
## Asthma 0.170 0.000
## Autism spectrum disorder 0.400 0.600
## Breast cancer 0.220 0.000
## Colorectal cancer 0.210 0.260
## Esophageal adenocarcinoma 0.000 0.330
## Hypertension 0.100 0.300
## Phenylketonuria 0.120 0.380
## Respiratory tract infectious disease 0.600 0.200
## Smoking behavior 0.270 0.360
## Smoking status measurement 0.400 0.000
## Streptococcus pneumoniae 0.000 0.400
## Traditional Chinese medicine type 0.120 0.380
## Transplant outcome measurement 0.000 0.290
## Treatment 0.100 0.300
## Ulcerative colitis 0.200 0.600
## Alzheimer's disease 0.180 0.150
## Atopic eczema 0.220 0.110
## Breastfeeding duration 0.100 0.000
## Cesarean section 0.200 0.130
## Colorectal adenoma 0.071 0.140
## Constipation 0.240 0.290
## Endometrial cancer 0.170 0.330
## Health study participation 0.034 0.000
## Inflammatory bowel disease 0.400 0.600
## Irritable bowel syndrome 0.170 0.220
## Lung cancer 0.000 0.091
## Obesity 0.220 0.250
## Obsessive-compulsive disorder 0.000 0.200
## Ovarian cancer 0.032 0.000
## Psoriasis 0.000 0.110
## Response to diet 0.200 0.270
## Rheumatoid arthritis 0.380 0.500
## Sampling site 0.200 0.400
## Socioeconomic status 0.400 0.200
## Transport 0.170 0.330
## Type II diabetes mellitus 0.130 0.200
## Urinary tract infection 0.000 0.140
## Abnormal stool composition 0.000 0.000
## Chlamydia trachomatis 0.200 0.200
## Crohn's disease 0.330 0.330
## Ethnic group 0.400 0.400
## Functional abnormality of the gastrointestinal tract 0.000 0.000
## HIV mother to child transmission 0.000 0.000
## Male homosexuality 0.000 0.000
## Multiple sclerosis 0.000 0.000
## Papillary thyroid carcinoma 0.000 0.000
## Physical activity 0.400 0.400
## Psoriasis vulgaris 0.000 0.000
## Sample collection protocol 0.000 0.000
## Smoking cessation 0.000 0.000
## Stroke 0.095 0.095
## Viral load 0.000 0.000
## unchanged
## Treatment outcome measurement 0.64
## Diet 0.57
## Helminthiasis 0.00
## HIV infection 0.36
## COVID-19 0.45
## Chronic constipation 0.33
## Parkinson's disease 0.39
## Phenotype 0.52
## Balanced reciprocal translocation 0.00
## Diarrhea 0.31
## Head and neck squamous cell carcinoma 0.36
## Polycystic ovary syndrome 0.12
## Increased intestinal transit time 0.00
## Response to allogeneic hematopoietic stem cell transplant 0.00
## Alcohol drinking 0.00
## Antimicrobial agent 0.53
## Human immunodeficiency virus 0.22
## Human papilloma virus infection 0.57
## Acute lymphoblastic leukemia 0.00
## Age 0.14
## Air pollution 0.30
## Cervical glandular intraepithelial neoplasia 0.33
## Dry eye syndrome 0.43
## Endometriosis 0.67
## Epilepsy 0.20
## Periodontitis 0.50
## Schizophrenia 0.57
## Vesicle membrane 0.00
## Atopic asthma 0.58
## Delivery method 0.17
## Food allergy 0.75
## Gastric cancer 0.52
## Gestational diabetes 0.74
## Hypertrophy 0.40
## Iron biomarker measurement 0.17
## Oral squamous cell carcinoma 0.00
## Response to transplant 0.46
## Asthma 0.83
## Autism spectrum disorder 0.00
## Breast cancer 0.78
## Colorectal cancer 0.54
## Esophageal adenocarcinoma 0.67
## Hypertension 0.60
## Phenylketonuria 0.50
## Respiratory tract infectious disease 0.20
## Smoking behavior 0.36
## Smoking status measurement 0.60
## Streptococcus pneumoniae 0.60
## Traditional Chinese medicine type 0.50
## Transplant outcome measurement 0.71
## Treatment 0.60
## Ulcerative colitis 0.20
## Alzheimer's disease 0.68
## Atopic eczema 0.67
## Breastfeeding duration 0.90
## Cesarean section 0.67
## Colorectal adenoma 0.79
## Constipation 0.47
## Endometrial cancer 0.50
## Health study participation 0.97
## Inflammatory bowel disease 0.00
## Irritable bowel syndrome 0.61
## Lung cancer 0.91
## Obesity 0.53
## Obsessive-compulsive disorder 0.80
## Ovarian cancer 0.97
## Psoriasis 0.89
## Response to diet 0.53
## Rheumatoid arthritis 0.12
## Sampling site 0.40
## Socioeconomic status 0.40
## Transport 0.50
## Type II diabetes mellitus 0.67
## Urinary tract infection 0.86
## Abnormal stool composition 1.00
## Chlamydia trachomatis 0.60
## Crohn's disease 0.33
## Ethnic group 0.20
## Functional abnormality of the gastrointestinal tract 1.00
## HIV mother to child transmission 1.00
## Male homosexuality 1.00
## Multiple sclerosis 1.00
## Papillary thyroid carcinoma 1.00
## Physical activity 0.20
## Psoriasis vulgaris 1.00
## Sample collection protocol 1.00
## Smoking cessation 1.00
## Stroke 0.81
## Viral load 1.00
Body sites with consistently increased or decreased alpha diversity:
tabDiv(exps, "Shannon", "Body site")
## increased decreased unchanged
## Feces 297 441 1169
## Vagina 16 6 27
## Posterior fornix of vagina 9 0 7
## Skin of body 7 15 8
## Uterine cervix 9 1 20
## Uterine cervix,Vaginal fluid 9 1 0
## Buccal epithelium 0 7 0
## Saliva 36 43 122
## Subgingival dental plaque 9 3 20
## Buccal mucosa 5 0 2
## Meconium 5 0 10
## Space surrounding organism 2 7 13
## Stomach 5 10 5
## Tongue 0 5 12
## Axilla skin 5 1 11
## Tear film 0 4 1
## Throat 0 4 8
## Caecum 4 7 22
## Cecum mucosa 1 4 6
## Colorectal mucosa 0 3 8
## Dental plaque 0 3 3
## Duodenum 0 3 6
## Nasopharynx 3 6 32
## Rectum 0 3 12
## Skin of forearm 3 0 3
## Bile 2 0 3
## Brachialis muscle 0 2 3
## Conjunctiva 1 3 6
## Conjunctival sac 1 3 1
## Esophagus 0 2 4
## Forelimb skin 2 0 4
## Lung 2 4 7
## Mouth 8 6 28
## Oropharynx 2 4 6
## Rumen 2 0 4
## Supragingival dental plaque 1 3 1
## Thyroid gland 2 0 10
## Uterus 3 1 11
## Blood 0 1 6
## Breast 3 4 4
## Breast,Milk 1 0 4
## Bulbar conjunctiva 3 2 5
## Colon 3 2 19
## Ileum 1 0 11
## Nasal cavity 0 1 5
## Small intestine 3 4 1
## Vagina,Uterine cervix 3 2 7
## Vaginal fluid 1 0 8
## Bronchus 0 0 6
## Endothelium of trachea 3 3 0
## Internal cheek pouch 0 0 11
## Intestine 1 1 16
## Jejunum 1 1 8
## Milk 0 0 9
## Oral cavity 5 5 7
## Ovary 0 0 7
## Peritoneal fluid 0 0 6
## Posterior wall of oropharynx 2 2 1
## Skin of abdomen 0 0 5
## Sputum 6 6 8
## Surface of tongue 2 2 3
## Urine 1 1 16
## Ventral side of post-anal tail 0 0 6
tabDiv(exps, "Shannon", "Body site", perc = TRUE)
## increased decreased unchanged
## Feces 0.160 0.230 0.61
## Vagina 0.330 0.120 0.55
## Posterior fornix of vagina 0.560 0.000 0.44
## Skin of body 0.230 0.500 0.27
## Uterine cervix 0.300 0.033 0.67
## Uterine cervix,Vaginal fluid 0.900 0.100 0.00
## Buccal epithelium 0.000 1.000 0.00
## Saliva 0.180 0.210 0.61
## Subgingival dental plaque 0.280 0.094 0.62
## Buccal mucosa 0.710 0.000 0.29
## Meconium 0.330 0.000 0.67
## Space surrounding organism 0.091 0.320 0.59
## Stomach 0.250 0.500 0.25
## Tongue 0.000 0.290 0.71
## Axilla skin 0.290 0.059 0.65
## Tear film 0.000 0.800 0.20
## Throat 0.000 0.330 0.67
## Caecum 0.120 0.210 0.67
## Cecum mucosa 0.091 0.360 0.55
## Colorectal mucosa 0.000 0.270 0.73
## Dental plaque 0.000 0.500 0.50
## Duodenum 0.000 0.330 0.67
## Nasopharynx 0.073 0.150 0.78
## Rectum 0.000 0.200 0.80
## Skin of forearm 0.500 0.000 0.50
## Bile 0.400 0.000 0.60
## Brachialis muscle 0.000 0.400 0.60
## Conjunctiva 0.100 0.300 0.60
## Conjunctival sac 0.200 0.600 0.20
## Esophagus 0.000 0.330 0.67
## Forelimb skin 0.330 0.000 0.67
## Lung 0.150 0.310 0.54
## Mouth 0.190 0.140 0.67
## Oropharynx 0.170 0.330 0.50
## Rumen 0.330 0.000 0.67
## Supragingival dental plaque 0.200 0.600 0.20
## Thyroid gland 0.170 0.000 0.83
## Uterus 0.200 0.067 0.73
## Blood 0.000 0.140 0.86
## Breast 0.270 0.360 0.36
## Breast,Milk 0.200 0.000 0.80
## Bulbar conjunctiva 0.300 0.200 0.50
## Colon 0.120 0.083 0.79
## Ileum 0.083 0.000 0.92
## Nasal cavity 0.000 0.170 0.83
## Small intestine 0.380 0.500 0.12
## Vagina,Uterine cervix 0.250 0.170 0.58
## Vaginal fluid 0.110 0.000 0.89
## Bronchus 0.000 0.000 1.00
## Endothelium of trachea 0.500 0.500 0.00
## Internal cheek pouch 0.000 0.000 1.00
## Intestine 0.056 0.056 0.89
## Jejunum 0.100 0.100 0.80
## Milk 0.000 0.000 1.00
## Oral cavity 0.290 0.290 0.41
## Ovary 0.000 0.000 1.00
## Peritoneal fluid 0.000 0.000 1.00
## Posterior wall of oropharynx 0.400 0.400 0.20
## Skin of abdomen 0.000 0.000 1.00
## Sputum 0.300 0.300 0.40
## Surface of tongue 0.290 0.290 0.43
## Urine 0.056 0.056 0.89
## Ventral side of post-anal tail 0.000 0.000 1.00
tabDiv(exps, "Richness", "Body site")
## increased decreased unchanged
## Feces 187 248 604
## Mouth 10 3 9
## Posterior fornix of vagina 8 1 2
## Uterine cervix 8 1 11
## Skin of body 3 9 6
## Rectum 0 5 7
## Subgingival dental plaque 7 2 17
## Uterine cervix,Vaginal fluid 7 2 1
## Nasopharynx 5 9 19
## Oropharynx 3 7 5
## Stomach 4 8 3
## Throat 1 5 5
## Cecum mucosa 3 6 3
## Oral cavity 2 5 0
## Small intestine 1 4 0
## Colon 6 4 11
## Ear 2 0 3
## Esophagus 0 2 4
## Saliva 20 22 43
## Surface of tongue 4 2 1
## Caecum 2 3 1
## Ileum 2 1 9
## Meconium 2 3 7
## Milk 2 1 5
## Nasal cavity 1 2 10
## Urine 3 2 12
## Vagina 3 2 11
## Vagina,Uterine cervix 1 0 11
## Breast 1 1 7
## Bronchus 0 0 6
## Conjunctiva 1 1 5
## Internal cheek pouch 0 0 7
## Intestine 0 0 15
## Ovary 0 0 7
## Peritoneal fluid 0 0 6
## Thyroid gland 0 0 12
## Tongue 2 2 7
tabDiv(exps, "Richness", "Body site", perc = TRUE)
## increased decreased unchanged
## Feces 0.180 0.240 0.58
## Mouth 0.450 0.140 0.41
## Posterior fornix of vagina 0.730 0.091 0.18
## Uterine cervix 0.400 0.050 0.55
## Skin of body 0.170 0.500 0.33
## Rectum 0.000 0.420 0.58
## Subgingival dental plaque 0.270 0.077 0.65
## Uterine cervix,Vaginal fluid 0.700 0.200 0.10
## Nasopharynx 0.150 0.270 0.58
## Oropharynx 0.200 0.470 0.33
## Stomach 0.270 0.530 0.20
## Throat 0.091 0.450 0.45
## Cecum mucosa 0.250 0.500 0.25
## Oral cavity 0.290 0.710 0.00
## Small intestine 0.200 0.800 0.00
## Colon 0.290 0.190 0.52
## Ear 0.400 0.000 0.60
## Esophagus 0.000 0.330 0.67
## Saliva 0.240 0.260 0.51
## Surface of tongue 0.570 0.290 0.14
## Caecum 0.330 0.500 0.17
## Ileum 0.170 0.083 0.75
## Meconium 0.170 0.250 0.58
## Milk 0.250 0.120 0.62
## Nasal cavity 0.077 0.150 0.77
## Urine 0.180 0.120 0.71
## Vagina 0.190 0.120 0.69
## Vagina,Uterine cervix 0.083 0.000 0.92
## Breast 0.110 0.110 0.78
## Bronchus 0.000 0.000 1.00
## Conjunctiva 0.140 0.140 0.71
## Internal cheek pouch 0.000 0.000 1.00
## Intestine 0.000 0.000 1.00
## Ovary 0.000 0.000 1.00
## Peritoneal fluid 0.000 0.000 1.00
## Thyroid gland 0.000 0.000 1.00
## Tongue 0.180 0.180 0.64
Signature stats
sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")
Unique microbes
Number unique microbes contained in the signatures:
## [1] 7792
Development of unique microbes captured over time:
Microbe set size distribution
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 5.000 8.277 10.000 470.000
gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())
## [1] 4297
Microbe co-occurrence
dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus")
## Loading required namespace: safe
antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus", antagonistic = TRUE)
Get the top 20 genera most frequently reported as differentially abundant:
sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 902 605 600 536
## Blautia Ruminococcus Streptococcus Roseburia
## 526 504 475 471
## Parabacteroides Prevotella Lactobacillus Alistipes
## 468 450 435 406
## Akkermansia Dorea Coprococcus Eubacterium
## 353 334 327 308
## Veillonella Lachnospira Enterococcus Escherichia
## 283 278 277 263
Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:
## [1] TRUE
## [1] TRUE
Distinguish by direction of abundance change (increased / decreased):
# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 429 294 218 276
## Blautia Ruminococcus Streptococcus Roseburia
## 237 205 307 155
## Parabacteroides Prevotella Lactobacillus Alistipes
## 247 211 274 172
## Akkermansia Dorea Coprococcus Eubacterium
## 216 138 120 131
## Veillonella Lachnospira Enterococcus Escherichia
## 178 96 198 182
# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 466 303 376 255
## Blautia Ruminococcus Streptococcus Roseburia
## 283 294 160 310
## Parabacteroides Prevotella Lactobacillus Alistipes
## 215 236 159 228
## Akkermansia Dorea Coprococcus Eubacterium
## 133 190 201 171
## Veillonella Lachnospira Enterococcus Escherichia
## 102 176 77 75
Plot the heatmap
# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
col = c("#D55E00", "#0072B2")),
height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)
lgd_list <- list(
Legend(labels = c("increased", "decreased"),
title = "Abundance in Group 1",
type = "grid",
legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
col = col,
name = "Relative frequency (top)",
cluster_columns = FALSE,
row_km = 3,
row_title = "same direction",
column_names_rot = 45,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8))
# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
col = col,
name = "Relative frequency (bottom)",
cluster_columns = FALSE,
row_title = "opposite direction",
row_km = 3,
column_names_rot = 45,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8))
# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan",
tax.level = "genus", exact.tax.level = FALSE)
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
tax.id.type = "taxname",
tax.level = "phylum",
exact.tax.level = FALSE)
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)
uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
col = pcols[phyla.grid],
cluster_columns = FALSE,
column_names_rot = 45,
column_names_gp = gpar(fontsize = 8))
# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)
decorate_annotation("Abundance in Group 1", {
grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})
Signature similarity
Jaccard index
Inspect signature similarity for signatures from stomach samples based on Jaccard index:
stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)
Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).