Reading data
Get bulk export from bugsigdb.org:
full.dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(full.dat)
## [1] 8760 50
colnames(full.dat)
## [1] "BSDB ID" "Study"
## [3] "Study design" "PMID"
## [5] "DOI" "URL"
## [7] "Authors list" "Title"
## [9] "Journal" "Year"
## [11] "Keywords" "Experiment"
## [13] "Location of subjects" "Host species"
## [15] "Body site" "UBERON ID"
## [17] "Condition" "EFO ID"
## [19] "Group 0 name" "Group 1 name"
## [21] "Group 1 definition" "Group 0 sample size"
## [23] "Group 1 sample size" "Antibiotics exclusion"
## [25] "Sequencing type" "16S variable region"
## [27] "Sequencing platform" "Statistical test"
## [29] "Significance threshold" "MHT correction"
## [31] "LDA Score above" "Matched on"
## [33] "Confounders controlled for" "Pielou"
## [35] "Shannon" "Chao1"
## [37] "Simpson" "Inverse Simpson"
## [39] "Richness" "Signature page name"
## [41] "Source" "Curated date"
## [43] "Curator" "Revision editor"
## [45] "Description" "Abundance in Group 1"
## [47] "MetaPhlAn taxon names" "NCBI Taxonomy IDs"
## [49] "State" "Reviewer"
Stripping illformed entries:
Curation output
Number of papers and signatures curated:
## [1] 1422
nrow(full.dat)
## [1] 8760
Publication date of the curated papers:
pmids <- pmids[!is.na(pmids)]
pubyear <- pmid2pubyear(pmids)
head(cbind(pmids, pubyear))
tab <- table(pubyear)
tab <- tab[order(as.integer(names(tab)))]
df <- data.frame(year = names(tab), papers = as.integer(tab))
ggbarplot(df, x = "year", y = "papers",
label = TRUE, fill = "steelblue",
ggtheme = theme_bw())
Stripping empty signatures:
ind1 <- lengths(full.dat[["MetaPhlAn taxon names"]]) > 0
ind2 <- lengths(full.dat[["NCBI Taxonomy IDs"]]) > 0
dat <- full.dat[ind1 & ind2,]
nrow(dat)
## [1] 8760
Papers containing only empty UP and DOWN signatures (under curation?):
## numeric(0)
Progress over time:
dat[,"Curated date"] <- as.character(lubridate::dmy(dat[,"Curated date"]))
plotProgressOverTime(dat)
plotProgressOverTime(dat, diff = TRUE)
Stratified by curator:
npc <- stratifyByCurator(dat)
plotCuratorStats(dat, npc)
Number of complete and revised signatures: Turned off because it’s way too long these days
Study stats
Study design
spl <- split(dat[["Study"]], dat[["Study design"]])
sds <- lapply(spl, unique)
sort(lengths(sds), decreasing = FALSE)
## case-control,prospective cohort
## 1
## laboratory experiment,prospective cohort
## 1
## cross-sectional observational, not case-control,laboratory experiment
## 2
## cross-sectional observational, not case-control,prospective cohort
## 2
## laboratory experiment,meta-analysis
## 2
## prospective cohort,time series / longitudinal observational
## 2
## case-control,time series / longitudinal observational
## 3
## case-control,meta-analysis
## 5
## laboratory experiment,time series / longitudinal observational
## 5
## case-control,laboratory experiment
## 6
## meta-analysis
## 18
## randomized controlled trial
## 66
## prospective cohort
## 114
## time series / longitudinal observational
## 127
## laboratory experiment
## 148
## cross-sectional observational, not case-control
## 384
## case-control
## 550
Experiment stats
Columns of the full dataset that describe experiments:
# Experiment ID
exp.cols <- c("Study", "Experiment")
# Subjects
sub.cols <- c("Host species",
"Location of subjects",
"Body site",
"Condition",
"Antibiotics exclusion",
"Group 0 sample size",
"Group 1 sample size")
# Lab analysis
lab.cols <- c("Sequencing type",
"16S variable region",
"Sequencing platform")
# Statistical analysis
stat.cols <- c("Statistical test",
"MHT correction",
"Significance threshold")
# Alpha diversity
div.cols <- c("Pielou",
"Shannon",
"Chao1",
"Simpson",
"Inverse Simpson",
"Richness")
Restrict dataset to experiment information:
Subjects
Number of experiments for the top 10 categories for each subjects column:
## $`Host species`
##
## Homo sapiens Mus musculus Rattus norvegicus
## 4227 570 109
## Sus scrofa domesticus Canis lupus familiaris Not specified
## 78 73 33
## Ovis aries Bos taurus Capra hircus
## 24 18 14
## Gallus gallus
## 14
##
## $`Location of subjects`
##
## China United States of America Germany
## 1697 939 168
## Japan Denmark Italy
## 164 150 128
## Australia Netherlands South Korea
## 115 111 96
## Spain
## 96
##
## $`Body site`
##
## Feces Saliva Vagina
## 3351 292 103
## Mouth Nasopharynx Subgingival dental plaque
## 67 55 49
## Skin of body Uterine cervix Colon
## 47 47 39
## Throat
## 37
##
## $Condition
##
## Parkinson's disease Diet
## 192 149
## Obesity Colorectal cancer
## 140 138
## Treatment outcome measurement COVID-19
## 132 116
## Atopic eczema Antimicrobial agent
## 90 85
## Alzheimer's disease Extraction protocol
## 79 69
##
## $`Antibiotics exclusion`
##
## 3 months 1 month 2 months
## 577 485 241
## 6 months 2 weeks 3 Months
## 177 130 43
## 6 Months currently on antibiotics 1 Month
## 40 28 27
## 3 Months.
## 25
Proportions instead:
sub.tab <- lapply(sub.cols[1:5], tabCol, df = exps, n = 10, perc = TRUE)
names(sub.tab) <- sub.cols[1:5]
sub.tab
## $`Host species`
##
## Homo sapiens Mus musculus Rattus norvegicus
## 0.79900 0.10800 0.02060
## Sus scrofa domesticus Canis lupus familiaris Not specified
## 0.01470 0.01380 0.00624
## Ovis aries Bos taurus Capra hircus
## 0.00454 0.00340 0.00265
## Gallus gallus
## 0.00265
##
## $`Location of subjects`
##
## China United States of America Germany
## 0.3210 0.1780 0.0318
## Japan Denmark Italy
## 0.0311 0.0284 0.0242
## Australia Netherlands South Korea
## 0.0218 0.0210 0.0182
## Spain
## 0.0182
##
## $`Body site`
##
## Feces Saliva Vagina
## 0.63400 0.05530 0.01950
## Mouth Nasopharynx Subgingival dental plaque
## 0.01270 0.01040 0.00927
## Skin of body Uterine cervix Colon
## 0.00889 0.00889 0.00738
## Throat
## 0.00700
##
## $Condition
##
## Parkinson's disease Diet
## 0.0374 0.0290
## Obesity Colorectal cancer
## 0.0273 0.0269
## Treatment outcome measurement COVID-19
## 0.0257 0.0226
## Atopic eczema Antimicrobial agent
## 0.0175 0.0166
## Alzheimer's disease Extraction protocol
## 0.0154 0.0134
##
## $`Antibiotics exclusion`
##
## 3 months 1 month 2 months
## 0.2460 0.2060 0.1030
## 6 months 2 weeks 3 Months
## 0.0753 0.0553 0.0183
## 6 Months currently on antibiotics 1 Month
## 0.0170 0.0119 0.0115
## 3 Months.
## 0.0106
Sample size:
ssize <- apply(exps[,sub.cols[6:7]], 2, summary)
ssize
## Group 0 sample size Group 1 sample size
## Min. 0.0000 1.00000
## 1st Qu. 12.0000 10.00000
## Median 24.0000 22.00000
## Mean 567.8895 68.43638
## 3rd Qu. 50.0000 43.00000
## Max. 308633.0000 10413.00000
## NA's 703.0000 700.00000
Lab analysis
Number of experiments for the top 10 categories for each lab analysis column:
## $`Sequencing type`
##
## 16S WMS PCR ITS / ITS2 18S
## 4234 763 58 25 5
##
## $`16S variable region`
##
## 34 4 12 123 45 345 3 56 678 23
## 1693 1199 279 213 150 139 61 43 31 21
##
## $`Sequencing platform`
##
## Illumina Roche454
## 4221 328
## Ion Torrent RT-qPCR
## 200 105
## MGISEQ-2000 PacBio RS
## 37 24
## BGISEQ-500 Sequencing Mass spectrometry
## 18 18
## Human Intestinal Tract Chip Illumina,Roche454
## 16 11
Proportions instead:
lab.tab <- lapply(lab.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(lab.tab) <- lab.cols
lab.tab
## $`Sequencing type`
##
## 16S WMS PCR ITS / ITS2 18S
## 0.833000 0.150000 0.011400 0.004920 0.000983
##
## $`16S variable region`
##
## 34 4 12 123 45 345 3 56 678 23
## 0.43100 0.30500 0.07100 0.05420 0.03820 0.03540 0.01550 0.01090 0.00789 0.00534
##
## $`Sequencing platform`
##
## Illumina Roche454
## 0.84100 0.06540
## Ion Torrent RT-qPCR
## 0.03990 0.02090
## MGISEQ-2000 PacBio RS
## 0.00737 0.00478
## BGISEQ-500 Sequencing Mass spectrometry
## 0.00359 0.00359
## Human Intestinal Tract Chip Illumina,Roche454
## 0.00319 0.00219
Statistical analysis
Number of experiments for the top 10 categories for each statistical analysis column:
## $`Statistical test`
##
## LEfSe Mann-Whitney (Wilcoxon) DESeq2
## 1567 825 508
## Kruskall-Wallis ANOVA Linear Regression
## 278 238 230
## T-Test MaAsLin2 ANCOM
## 201 182 167
## Logistic Regression
## 85
##
## $`MHT correction`
##
## TRUE FALSE
## 2661 2010
##
## $`Significance threshold`
##
## 0.05 0.1 0.01 0.001 0.25 0.2 0.15 0.005 2 1e-04
## 4426 330 108 34 34 25 19 15 15 6
Proportions instead:
stat.tab <- lapply(stat.cols, tabCol, df = exps, n = 10, perc = TRUE)
names(stat.tab) <- stat.cols
stat.tab
## $`Statistical test`
##
## LEfSe Mann-Whitney (Wilcoxon) DESeq2
## 0.3110 0.1640 0.1010
## Kruskall-Wallis ANOVA Linear Regression
## 0.0551 0.0472 0.0456
## T-Test MaAsLin2 ANCOM
## 0.0399 0.0361 0.0331
## Logistic Regression
## 0.0169
##
## $`MHT correction`
##
## TRUE FALSE
## 0.57 0.43
##
## $`Significance threshold`
##
## 0.05 0.1 0.01 0.001 0.25 0.2 0.15 0.005 2 1e-04
## 0.87900 0.06560 0.02150 0.00675 0.00675 0.00497 0.00377 0.00298 0.00298 0.00119
Alpha diversity
Overall distribution:
apply(exps[,div.cols], 2, table)
## Pielou Shannon Chao1 Simpson Inverse Simpson Richness
## decreased 46 685 443 199 59 415
## increased 37 540 309 161 34 331
## unchanged 171 1903 835 678 200 954
Correspondence of Shannon diversity and Richness:
table(exps$Shannon, exps$Richness)
##
## decreased increased unchanged
## decreased 219 13 48
## increased 9 162 51
## unchanged 93 88 775
Conditions with consistently increased or decreased alpha diversity:
tabDiv(exps, "Shannon", "Condition")
## increased decreased
## Oxalate measurement 0 17
## COVID-19 9 24
## Obesity 3 16
## HIV infection 1 12
## Smoking behaviour measurement 2 13
## Clostridium difficile infection 10 0
## Dry eye syndrome 1 11
## Systemic inflammatory response syndrome 5 15
## Treatment outcome measurement 11 21
## Cesarean section 9 0
## Chronic constipation 9 0
## Human papilloma virus infection 10 1
## Gastric cancer 6 14
## Polycystic ovary syndrome 0 8
## Ulcerative colitis 1 9
## Age 5 12
## Aging 0 7
## Alzheimer's disease 2 9
## Balanced reciprocal translocation 7 0
## Atopic eczema 5 11
## Autism spectrum disorder 7 1
## Constipation 6 0
## Epilepsy 6 0
## Lung cancer 2 8
## Parkinson's disease 20 14
## Response to allogeneic hematopoietic stem cell transplant 0 6
## Urinary tract infection 0 6
## Cervical cancer 5 0
## Diet 14 19
## Helminthiasis 5 0
## Population 2 7
## Response to transplant 8 13
## Spontaneous preterm birth 12 7
## Acute lymphoblastic leukemia 0 4
## Acute pancreatitis 0 4
## Colitis 4 0
## Colorectal cancer 10 14
## Ethnic group 3 7
## Food allergy 6 2
## Human immunodeficiency virus 0 4
## Hypertension 7 3
## Periodontitis 5 1
## Pregnancy 4 0
## Response to antibiotic 0 4
## Alcohol drinking 3 0
## Atopic asthma 4 1
## Birth measurement 3 0
## Crohn's disease 2 5
## Delivery method 1 4
## Extraction protocol 23 26
## Irritable bowel syndrome 3 6
## Male homosexuality 3 0
## Oral lichen planus 3 0
## SARS-CoV-2-related disease 0 3
## Schizophrenia 1 4
## Type II diabetes mellitus 2 5
## Age at assessment 3 1
## Antimicrobial agent 8 10
## Breed 0 2
## Cervical glandular intraepithelial neoplasia 2 0
## Chronic kidney disease 2 4
## Cognitive impairment 1 3
## Depressive disorder 0 2
## Diarrhea 6 4
## Eczema 0 2
## Endometrial cancer 4 2
## Esophageal adenocarcinoma 0 2
## Iron biomarker measurement 1 3
## Milk allergic reaction 2 0
## Papillary thyroid carcinoma 2 0
## Phenylketonuria 1 3
## Response to anti-tuberculosis drug 8 10
## Response to antiviral drug 2 4
## Response to immunochemotherapy 3 1
## Sampling site 3 1
## Smoking behavior 10 8
## Squamous cell carcinoma 2 0
## Streptococcus pneumoniae 0 2
## Stroke 2 0
## Traditional Chinese medicine type 2 4
## Acute respiratory failure 6 5
## Air pollution 7 6
## Anxiety disorder 0 1
## Breast cancer 3 4
## Breastfeeding duration 2 3
## Chlamydia trachomatis 1 2
## Chronic fatigue syndrome 0 1
## Chronic hepatitis B virus infection 0 1
## Chronic obstructive pulmonary disease 3 2
## Diabetes mellitus 0 1
## Endometriosis 2 3
## Esophageal cancer 1 2
## Gestational diabetes 1 0
## Hepatocellular carcinoma 0 1
## Hypertrophy 1 0
## Multiple sclerosis 0 1
## Oral cavity carcinoma 0 1
## Oral squamous cell carcinoma 3 2
## Pancreatic carcinoma 0 1
## Psoriasis 1 0
## Respiratory Syncytial Virus Infection 0 1
## Response to diet 3 2
## Response to vaccine 1 0
## Rheumatoid arthritis 5 4
## Sample treatment protocol 1 0
## Sampling time 4 3
## Social interaction measurement 2 1
## Socioeconomic status 3 4
## Treatment 1 0
## Type I diabetes mellitus 0 1
## Vesicle membrane 3 2
## Vitiligo 0 1
## Abnormal stool composition 0 0
## Acute myeloid leukemia 1 1
## Arthritis 0 0
## Asthma 1 1
## Biological sex 1 1
## Bipolar disorder 0 0
## Celiac disease 0 0
## Clinical treatment 1 1
## Colorectal adenoma 2 2
## Contraception 0 0
## COVID-19 symptoms measurement 0 0
## Diarrhea, Infantile 0 0
## Disease progression measurement 0 0
## Gastric adenocarcinoma 0 0
## Head and neck squamous cell carcinoma 0 0
## Health study participation 2 2
## HIV mother to child transmission 0 0
## Lactose intolerance 0 0
## Lifestyle measurement 2 2
## Lung transplantation 2 2
## Obsessive-compulsive disorder 0 0
## Ovarian cancer 3 3
## Phenotype 2 2
## Psoriasis vulgaris 0 0
## Response to ketogenic diet 2 2
## Sample collection protocol 0 0
## SARS coronavirus 0 0
## Simian immunodeficiency virus infection 0 0
## Smoking cessation 0 0
## Transplant outcome measurement 0 0
## Viral load 0 0
## Waist circumference 0 0
## unchanged
## Oxalate measurement 0
## COVID-19 42
## Obesity 59
## HIV infection 26
## Smoking behaviour measurement 0
## Clostridium difficile infection 1
## Dry eye syndrome 11
## Systemic inflammatory response syndrome 4
## Treatment outcome measurement 64
## Cesarean section 16
## Chronic constipation 12
## Human papilloma virus infection 28
## Gastric cancer 26
## Polycystic ovary syndrome 10
## Ulcerative colitis 3
## Age 9
## Aging 0
## Alzheimer's disease 26
## Balanced reciprocal translocation 0
## Atopic eczema 72
## Autism spectrum disorder 8
## Constipation 2
## Epilepsy 5
## Lung cancer 7
## Parkinson's disease 81
## Response to allogeneic hematopoietic stem cell transplant 0
## Urinary tract infection 8
## Cervical cancer 5
## Diet 63
## Helminthiasis 8
## Population 25
## Response to transplant 25
## Spontaneous preterm birth 5
## Acute lymphoblastic leukemia 5
## Acute pancreatitis 2
## Colitis 1
## Colorectal cancer 48
## Ethnic group 6
## Food allergy 19
## Human immunodeficiency virus 6
## Hypertension 6
## Periodontitis 10
## Pregnancy 2
## Response to antibiotic 8
## Alcohol drinking 2
## Atopic asthma 7
## Birth measurement 4
## Crohn's disease 5
## Delivery method 2
## Extraction protocol 20
## Irritable bowel syndrome 20
## Male homosexuality 6
## Oral lichen planus 4
## SARS-CoV-2-related disease 4
## Schizophrenia 14
## Type II diabetes mellitus 24
## Age at assessment 1
## Antimicrobial agent 25
## Breed 7
## Cervical glandular intraepithelial neoplasia 9
## Chronic kidney disease 5
## Cognitive impairment 8
## Depressive disorder 4
## Diarrhea 8
## Eczema 10
## Endometrial cancer 3
## Esophageal adenocarcinoma 4
## Iron biomarker measurement 2
## Milk allergic reaction 5
## Papillary thyroid carcinoma 10
## Phenylketonuria 4
## Response to anti-tuberculosis drug 13
## Response to antiviral drug 5
## Response to immunochemotherapy 3
## Sampling site 7
## Smoking behavior 20
## Squamous cell carcinoma 4
## Streptococcus pneumoniae 4
## Stroke 16
## Traditional Chinese medicine type 6
## Acute respiratory failure 0
## Air pollution 3
## Anxiety disorder 7
## Breast cancer 16
## Breastfeeding duration 9
## Chlamydia trachomatis 2
## Chronic fatigue syndrome 4
## Chronic hepatitis B virus infection 5
## Chronic obstructive pulmonary disease 2
## Diabetes mellitus 5
## Endometriosis 14
## Esophageal cancer 2
## Gestational diabetes 35
## Hepatocellular carcinoma 6
## Hypertrophy 4
## Multiple sclerosis 17
## Oral cavity carcinoma 7
## Oral squamous cell carcinoma 3
## Pancreatic carcinoma 4
## Psoriasis 12
## Respiratory Syncytial Virus Infection 5
## Response to diet 27
## Response to vaccine 5
## Rheumatoid arthritis 9
## Sample treatment protocol 4
## Sampling time 5
## Social interaction measurement 6
## Socioeconomic status 8
## Treatment 7
## Type I diabetes mellitus 6
## Vesicle membrane 1
## Vitiligo 4
## Abnormal stool composition 6
## Acute myeloid leukemia 4
## Arthritis 6
## Asthma 14
## Biological sex 6
## Bipolar disorder 5
## Celiac disease 6
## Clinical treatment 5
## Colorectal adenoma 10
## Contraception 5
## COVID-19 symptoms measurement 5
## Diarrhea, Infantile 27
## Disease progression measurement 5
## Gastric adenocarcinoma 8
## Head and neck squamous cell carcinoma 8
## Health study participation 35
## HIV mother to child transmission 8
## Lactose intolerance 5
## Lifestyle measurement 8
## Lung transplantation 2
## Obsessive-compulsive disorder 5
## Ovarian cancer 27
## Phenotype 19
## Psoriasis vulgaris 14
## Response to ketogenic diet 3
## Sample collection protocol 9
## SARS coronavirus 6
## Simian immunodeficiency virus infection 5
## Smoking cessation 6
## Transplant outcome measurement 5
## Viral load 6
## Waist circumference 5
tabDiv(exps, "Shannon", "Condition", perc = TRUE)
## increased decreased
## Oxalate measurement 0.000 1.000
## COVID-19 0.120 0.320
## Obesity 0.038 0.210
## HIV infection 0.026 0.310
## Smoking behaviour measurement 0.130 0.870
## Clostridium difficile infection 0.910 0.000
## Dry eye syndrome 0.043 0.480
## Systemic inflammatory response syndrome 0.210 0.620
## Treatment outcome measurement 0.110 0.220
## Cesarean section 0.360 0.000
## Chronic constipation 0.430 0.000
## Human papilloma virus infection 0.260 0.026
## Gastric cancer 0.130 0.300
## Polycystic ovary syndrome 0.000 0.440
## Ulcerative colitis 0.077 0.690
## Age 0.190 0.460
## Aging 0.000 1.000
## Alzheimer's disease 0.054 0.240
## Balanced reciprocal translocation 1.000 0.000
## Atopic eczema 0.057 0.120
## Autism spectrum disorder 0.440 0.062
## Constipation 0.750 0.000
## Epilepsy 0.550 0.000
## Lung cancer 0.120 0.470
## Parkinson's disease 0.170 0.120
## Response to allogeneic hematopoietic stem cell transplant 0.000 1.000
## Urinary tract infection 0.000 0.430
## Cervical cancer 0.500 0.000
## Diet 0.150 0.200
## Helminthiasis 0.380 0.000
## Population 0.059 0.210
## Response to transplant 0.170 0.280
## Spontaneous preterm birth 0.500 0.290
## Acute lymphoblastic leukemia 0.000 0.440
## Acute pancreatitis 0.000 0.670
## Colitis 0.800 0.000
## Colorectal cancer 0.140 0.190
## Ethnic group 0.190 0.440
## Food allergy 0.220 0.074
## Human immunodeficiency virus 0.000 0.400
## Hypertension 0.440 0.190
## Periodontitis 0.310 0.062
## Pregnancy 0.670 0.000
## Response to antibiotic 0.000 0.330
## Alcohol drinking 0.600 0.000
## Atopic asthma 0.330 0.083
## Birth measurement 0.430 0.000
## Crohn's disease 0.170 0.420
## Delivery method 0.140 0.570
## Extraction protocol 0.330 0.380
## Irritable bowel syndrome 0.100 0.210
## Male homosexuality 0.330 0.000
## Oral lichen planus 0.430 0.000
## SARS-CoV-2-related disease 0.000 0.430
## Schizophrenia 0.053 0.210
## Type II diabetes mellitus 0.065 0.160
## Age at assessment 0.600 0.200
## Antimicrobial agent 0.190 0.230
## Breed 0.000 0.220
## Cervical glandular intraepithelial neoplasia 0.180 0.000
## Chronic kidney disease 0.180 0.360
## Cognitive impairment 0.083 0.250
## Depressive disorder 0.000 0.330
## Diarrhea 0.330 0.220
## Eczema 0.000 0.170
## Endometrial cancer 0.440 0.220
## Esophageal adenocarcinoma 0.000 0.330
## Iron biomarker measurement 0.170 0.500
## Milk allergic reaction 0.290 0.000
## Papillary thyroid carcinoma 0.170 0.000
## Phenylketonuria 0.120 0.380
## Response to anti-tuberculosis drug 0.260 0.320
## Response to antiviral drug 0.180 0.360
## Response to immunochemotherapy 0.430 0.140
## Sampling site 0.270 0.091
## Smoking behavior 0.260 0.210
## Squamous cell carcinoma 0.330 0.000
## Streptococcus pneumoniae 0.000 0.330
## Stroke 0.110 0.000
## Traditional Chinese medicine type 0.170 0.330
## Acute respiratory failure 0.550 0.450
## Air pollution 0.440 0.380
## Anxiety disorder 0.000 0.120
## Breast cancer 0.130 0.170
## Breastfeeding duration 0.140 0.210
## Chlamydia trachomatis 0.200 0.400
## Chronic fatigue syndrome 0.000 0.200
## Chronic hepatitis B virus infection 0.000 0.170
## Chronic obstructive pulmonary disease 0.430 0.290
## Diabetes mellitus 0.000 0.170
## Endometriosis 0.110 0.160
## Esophageal cancer 0.200 0.400
## Gestational diabetes 0.028 0.000
## Hepatocellular carcinoma 0.000 0.140
## Hypertrophy 0.200 0.000
## Multiple sclerosis 0.000 0.056
## Oral cavity carcinoma 0.000 0.120
## Oral squamous cell carcinoma 0.380 0.250
## Pancreatic carcinoma 0.000 0.200
## Psoriasis 0.077 0.000
## Respiratory Syncytial Virus Infection 0.000 0.170
## Response to diet 0.094 0.062
## Response to vaccine 0.170 0.000
## Rheumatoid arthritis 0.280 0.220
## Sample treatment protocol 0.200 0.000
## Sampling time 0.330 0.250
## Social interaction measurement 0.220 0.110
## Socioeconomic status 0.200 0.270
## Treatment 0.120 0.000
## Type I diabetes mellitus 0.000 0.140
## Vesicle membrane 0.500 0.330
## Vitiligo 0.000 0.200
## Abnormal stool composition 0.000 0.000
## Acute myeloid leukemia 0.170 0.170
## Arthritis 0.000 0.000
## Asthma 0.062 0.062
## Biological sex 0.120 0.120
## Bipolar disorder 0.000 0.000
## Celiac disease 0.000 0.000
## Clinical treatment 0.140 0.140
## Colorectal adenoma 0.140 0.140
## Contraception 0.000 0.000
## COVID-19 symptoms measurement 0.000 0.000
## Diarrhea, Infantile 0.000 0.000
## Disease progression measurement 0.000 0.000
## Gastric adenocarcinoma 0.000 0.000
## Head and neck squamous cell carcinoma 0.000 0.000
## Health study participation 0.051 0.051
## HIV mother to child transmission 0.000 0.000
## Lactose intolerance 0.000 0.000
## Lifestyle measurement 0.170 0.170
## Lung transplantation 0.330 0.330
## Obsessive-compulsive disorder 0.000 0.000
## Ovarian cancer 0.091 0.091
## Phenotype 0.087 0.087
## Psoriasis vulgaris 0.000 0.000
## Response to ketogenic diet 0.290 0.290
## Sample collection protocol 0.000 0.000
## SARS coronavirus 0.000 0.000
## Simian immunodeficiency virus infection 0.000 0.000
## Smoking cessation 0.000 0.000
## Transplant outcome measurement 0.000 0.000
## Viral load 0.000 0.000
## Waist circumference 0.000 0.000
## unchanged
## Oxalate measurement 0.000
## COVID-19 0.560
## Obesity 0.760
## HIV infection 0.670
## Smoking behaviour measurement 0.000
## Clostridium difficile infection 0.091
## Dry eye syndrome 0.480
## Systemic inflammatory response syndrome 0.170
## Treatment outcome measurement 0.670
## Cesarean section 0.640
## Chronic constipation 0.570
## Human papilloma virus infection 0.720
## Gastric cancer 0.570
## Polycystic ovary syndrome 0.560
## Ulcerative colitis 0.230
## Age 0.350
## Aging 0.000
## Alzheimer's disease 0.700
## Balanced reciprocal translocation 0.000
## Atopic eczema 0.820
## Autism spectrum disorder 0.500
## Constipation 0.250
## Epilepsy 0.450
## Lung cancer 0.410
## Parkinson's disease 0.700
## Response to allogeneic hematopoietic stem cell transplant 0.000
## Urinary tract infection 0.570
## Cervical cancer 0.500
## Diet 0.660
## Helminthiasis 0.620
## Population 0.740
## Response to transplant 0.540
## Spontaneous preterm birth 0.210
## Acute lymphoblastic leukemia 0.560
## Acute pancreatitis 0.330
## Colitis 0.200
## Colorectal cancer 0.670
## Ethnic group 0.380
## Food allergy 0.700
## Human immunodeficiency virus 0.600
## Hypertension 0.380
## Periodontitis 0.620
## Pregnancy 0.330
## Response to antibiotic 0.670
## Alcohol drinking 0.400
## Atopic asthma 0.580
## Birth measurement 0.570
## Crohn's disease 0.420
## Delivery method 0.290
## Extraction protocol 0.290
## Irritable bowel syndrome 0.690
## Male homosexuality 0.670
## Oral lichen planus 0.570
## SARS-CoV-2-related disease 0.570
## Schizophrenia 0.740
## Type II diabetes mellitus 0.770
## Age at assessment 0.200
## Antimicrobial agent 0.580
## Breed 0.780
## Cervical glandular intraepithelial neoplasia 0.820
## Chronic kidney disease 0.450
## Cognitive impairment 0.670
## Depressive disorder 0.670
## Diarrhea 0.440
## Eczema 0.830
## Endometrial cancer 0.330
## Esophageal adenocarcinoma 0.670
## Iron biomarker measurement 0.330
## Milk allergic reaction 0.710
## Papillary thyroid carcinoma 0.830
## Phenylketonuria 0.500
## Response to anti-tuberculosis drug 0.420
## Response to antiviral drug 0.450
## Response to immunochemotherapy 0.430
## Sampling site 0.640
## Smoking behavior 0.530
## Squamous cell carcinoma 0.670
## Streptococcus pneumoniae 0.670
## Stroke 0.890
## Traditional Chinese medicine type 0.500
## Acute respiratory failure 0.000
## Air pollution 0.190
## Anxiety disorder 0.880
## Breast cancer 0.700
## Breastfeeding duration 0.640
## Chlamydia trachomatis 0.400
## Chronic fatigue syndrome 0.800
## Chronic hepatitis B virus infection 0.830
## Chronic obstructive pulmonary disease 0.290
## Diabetes mellitus 0.830
## Endometriosis 0.740
## Esophageal cancer 0.400
## Gestational diabetes 0.970
## Hepatocellular carcinoma 0.860
## Hypertrophy 0.800
## Multiple sclerosis 0.940
## Oral cavity carcinoma 0.880
## Oral squamous cell carcinoma 0.380
## Pancreatic carcinoma 0.800
## Psoriasis 0.920
## Respiratory Syncytial Virus Infection 0.830
## Response to diet 0.840
## Response to vaccine 0.830
## Rheumatoid arthritis 0.500
## Sample treatment protocol 0.800
## Sampling time 0.420
## Social interaction measurement 0.670
## Socioeconomic status 0.530
## Treatment 0.880
## Type I diabetes mellitus 0.860
## Vesicle membrane 0.170
## Vitiligo 0.800
## Abnormal stool composition 1.000
## Acute myeloid leukemia 0.670
## Arthritis 1.000
## Asthma 0.880
## Biological sex 0.750
## Bipolar disorder 1.000
## Celiac disease 1.000
## Clinical treatment 0.710
## Colorectal adenoma 0.710
## Contraception 1.000
## COVID-19 symptoms measurement 1.000
## Diarrhea, Infantile 1.000
## Disease progression measurement 1.000
## Gastric adenocarcinoma 1.000
## Head and neck squamous cell carcinoma 1.000
## Health study participation 0.900
## HIV mother to child transmission 1.000
## Lactose intolerance 1.000
## Lifestyle measurement 0.670
## Lung transplantation 0.330
## Obsessive-compulsive disorder 1.000
## Ovarian cancer 0.820
## Phenotype 0.830
## Psoriasis vulgaris 1.000
## Response to ketogenic diet 0.430
## Sample collection protocol 1.000
## SARS coronavirus 1.000
## Simian immunodeficiency virus infection 1.000
## Smoking cessation 1.000
## Transplant outcome measurement 1.000
## Viral load 1.000
## Waist circumference 1.000
tabDiv(exps, "Richness", "Condition")
## increased decreased
## Treatment outcome measurement 5 21
## Diet 4 19
## Helminthiasis 13 0
## HIV infection 3 15
## COVID-19 9 20
## Chronic constipation 8 0
## Parkinson's disease 18 26
## Phenotype 9 1
## Balanced reciprocal translocation 7 0
## Diarrhea 8 1
## Head and neck squamous cell carcinoma 0 7
## Polycystic ovary syndrome 0 7
## Increased intestinal transit time 6 0
## Response to allogeneic hematopoietic stem cell transplant 0 6
## Alcohol drinking 5 0
## Antimicrobial agent 2 7
## Human immunodeficiency virus 1 6
## Human papilloma virus infection 7 2
## Acute lymphoblastic leukemia 5 1
## Age 1 5
## Air pollution 9 5
## Cervical glandular intraepithelial neoplasia 4 0
## Dry eye syndrome 0 4
## Endometriosis 4 0
## Epilepsy 4 0
## Periodontitis 5 1
## Schizophrenia 1 5
## Vesicle membrane 5 1
## Atopic asthma 4 1
## Delivery method 4 1
## Food allergy 0 3
## Gastric cancer 5 8
## Gestational diabetes 3 6
## Hypertrophy 3 0
## Iron biomarker measurement 1 4
## Irritable bowel syndrome 2 5
## Oral squamous cell carcinoma 1 4
## Asthma 2 0
## Autism spectrum disorder 4 6
## Breast cancer 2 0
## Colorectal cancer 8 10
## Esophageal adenocarcinoma 0 2
## Hypertension 1 3
## Phenylketonuria 1 3
## Smoking behavior 6 8
## Smoking status measurement 2 0
## Streptococcus pneumoniae 0 2
## Traditional Chinese medicine type 1 3
## Transplant outcome measurement 0 2
## Treatment 1 3
## Ulcerative colitis 1 3
## Alzheimer's disease 6 5
## Atopic eczema 2 1
## Breastfeeding duration 1 0
## Cesarean section 3 2
## Colorectal adenoma 1 2
## Constipation 4 5
## Crohn's disease 2 3
## Endometrial cancer 1 2
## Health study participation 1 0
## Inflammatory bowel disease 2 3
## Lung cancer 0 1
## Obesity 8 7
## Obsessive-compulsive disorder 0 1
## Ovarian cancer 1 0
## Psoriasis 0 1
## Response to transplant 6 7
## Rheumatoid arthritis 3 4
## Sampling site 1 2
## Socioeconomic status 2 1
## Transport 1 2
## Type II diabetes mellitus 2 3
## Urinary tract infection 0 1
## Abnormal stool composition 0 0
## Chlamydia trachomatis 1 1
## Diarrhea, Infantile 0 0
## Ethnic group 2 2
## HIV mother to child transmission 0 0
## Male homosexuality 0 0
## Multiple sclerosis 0 0
## Papillary thyroid carcinoma 0 0
## Physical activity 2 2
## Psoriasis vulgaris 0 0
## Response to diet 3 3
## Sample collection protocol 0 0
## Smoking cessation 0 0
## Stroke 2 2
## Viral load 0 0
## unchanged
## Treatment outcome measurement 45
## Diet 30
## Helminthiasis 0
## HIV infection 10
## COVID-19 24
## Chronic constipation 6
## Parkinson's disease 28
## Phenotype 11
## Balanced reciprocal translocation 0
## Diarrhea 4
## Head and neck squamous cell carcinoma 4
## Polycystic ovary syndrome 3
## Increased intestinal transit time 0
## Response to allogeneic hematopoietic stem cell transplant 0
## Alcohol drinking 0
## Antimicrobial agent 10
## Human immunodeficiency virus 2
## Human papilloma virus infection 12
## Acute lymphoblastic leukemia 0
## Age 1
## Air pollution 6
## Cervical glandular intraepithelial neoplasia 2
## Dry eye syndrome 3
## Endometriosis 8
## Epilepsy 1
## Periodontitis 6
## Schizophrenia 8
## Vesicle membrane 0
## Atopic asthma 7
## Delivery method 1
## Food allergy 9
## Gastric cancer 14
## Gestational diabetes 25
## Hypertrophy 2
## Iron biomarker measurement 1
## Irritable bowel syndrome 13
## Oral squamous cell carcinoma 0
## Asthma 10
## Autism spectrum disorder 0
## Breast cancer 7
## Colorectal cancer 21
## Esophageal adenocarcinoma 4
## Hypertension 6
## Phenylketonuria 4
## Smoking behavior 8
## Smoking status measurement 3
## Streptococcus pneumoniae 3
## Traditional Chinese medicine type 4
## Transplant outcome measurement 3
## Treatment 6
## Ulcerative colitis 1
## Alzheimer's disease 24
## Atopic eczema 6
## Breastfeeding duration 9
## Cesarean section 10
## Colorectal adenoma 11
## Constipation 8
## Crohn's disease 2
## Endometrial cancer 3
## Health study participation 28
## Inflammatory bowel disease 0
## Lung cancer 10
## Obesity 19
## Obsessive-compulsive disorder 4
## Ovarian cancer 30
## Psoriasis 8
## Response to transplant 11
## Rheumatoid arthritis 1
## Sampling site 2
## Socioeconomic status 2
## Transport 3
## Type II diabetes mellitus 10
## Urinary tract infection 6
## Abnormal stool composition 6
## Chlamydia trachomatis 3
## Diarrhea, Infantile 27
## Ethnic group 1
## HIV mother to child transmission 8
## Male homosexuality 9
## Multiple sclerosis 17
## Papillary thyroid carcinoma 12
## Physical activity 1
## Psoriasis vulgaris 14
## Response to diet 4
## Sample collection protocol 9
## Smoking cessation 6
## Stroke 17
## Viral load 5
tabDiv(exps, "Richness", "Condition", perc = TRUE)
## increased decreased
## Treatment outcome measurement 0.070 0.300
## Diet 0.075 0.360
## Helminthiasis 1.000 0.000
## HIV infection 0.110 0.540
## COVID-19 0.170 0.380
## Chronic constipation 0.570 0.000
## Parkinson's disease 0.250 0.360
## Phenotype 0.430 0.048
## Balanced reciprocal translocation 1.000 0.000
## Diarrhea 0.620 0.077
## Head and neck squamous cell carcinoma 0.000 0.640
## Polycystic ovary syndrome 0.000 0.700
## Increased intestinal transit time 1.000 0.000
## Response to allogeneic hematopoietic stem cell transplant 0.000 1.000
## Alcohol drinking 1.000 0.000
## Antimicrobial agent 0.110 0.370
## Human immunodeficiency virus 0.110 0.670
## Human papilloma virus infection 0.330 0.095
## Acute lymphoblastic leukemia 0.830 0.170
## Age 0.140 0.710
## Air pollution 0.450 0.250
## Cervical glandular intraepithelial neoplasia 0.670 0.000
## Dry eye syndrome 0.000 0.570
## Endometriosis 0.330 0.000
## Epilepsy 0.800 0.000
## Periodontitis 0.420 0.083
## Schizophrenia 0.071 0.360
## Vesicle membrane 0.830 0.170
## Atopic asthma 0.330 0.083
## Delivery method 0.670 0.170
## Food allergy 0.000 0.250
## Gastric cancer 0.190 0.300
## Gestational diabetes 0.088 0.180
## Hypertrophy 0.600 0.000
## Iron biomarker measurement 0.170 0.670
## Irritable bowel syndrome 0.100 0.250
## Oral squamous cell carcinoma 0.200 0.800
## Asthma 0.170 0.000
## Autism spectrum disorder 0.400 0.600
## Breast cancer 0.220 0.000
## Colorectal cancer 0.210 0.260
## Esophageal adenocarcinoma 0.000 0.330
## Hypertension 0.100 0.300
## Phenylketonuria 0.120 0.380
## Smoking behavior 0.270 0.360
## Smoking status measurement 0.400 0.000
## Streptococcus pneumoniae 0.000 0.400
## Traditional Chinese medicine type 0.120 0.380
## Transplant outcome measurement 0.000 0.400
## Treatment 0.100 0.300
## Ulcerative colitis 0.200 0.600
## Alzheimer's disease 0.170 0.140
## Atopic eczema 0.220 0.110
## Breastfeeding duration 0.100 0.000
## Cesarean section 0.200 0.130
## Colorectal adenoma 0.071 0.140
## Constipation 0.240 0.290
## Crohn's disease 0.290 0.430
## Endometrial cancer 0.170 0.330
## Health study participation 0.034 0.000
## Inflammatory bowel disease 0.400 0.600
## Lung cancer 0.000 0.091
## Obesity 0.240 0.210
## Obsessive-compulsive disorder 0.000 0.200
## Ovarian cancer 0.032 0.000
## Psoriasis 0.000 0.110
## Response to transplant 0.250 0.290
## Rheumatoid arthritis 0.380 0.500
## Sampling site 0.200 0.400
## Socioeconomic status 0.400 0.200
## Transport 0.170 0.330
## Type II diabetes mellitus 0.130 0.200
## Urinary tract infection 0.000 0.140
## Abnormal stool composition 0.000 0.000
## Chlamydia trachomatis 0.200 0.200
## Diarrhea, Infantile 0.000 0.000
## Ethnic group 0.400 0.400
## HIV mother to child transmission 0.000 0.000
## Male homosexuality 0.000 0.000
## Multiple sclerosis 0.000 0.000
## Papillary thyroid carcinoma 0.000 0.000
## Physical activity 0.400 0.400
## Psoriasis vulgaris 0.000 0.000
## Response to diet 0.300 0.300
## Sample collection protocol 0.000 0.000
## Smoking cessation 0.000 0.000
## Stroke 0.095 0.095
## Viral load 0.000 0.000
## unchanged
## Treatment outcome measurement 0.63
## Diet 0.57
## Helminthiasis 0.00
## HIV infection 0.36
## COVID-19 0.45
## Chronic constipation 0.43
## Parkinson's disease 0.39
## Phenotype 0.52
## Balanced reciprocal translocation 0.00
## Diarrhea 0.31
## Head and neck squamous cell carcinoma 0.36
## Polycystic ovary syndrome 0.30
## Increased intestinal transit time 0.00
## Response to allogeneic hematopoietic stem cell transplant 0.00
## Alcohol drinking 0.00
## Antimicrobial agent 0.53
## Human immunodeficiency virus 0.22
## Human papilloma virus infection 0.57
## Acute lymphoblastic leukemia 0.00
## Age 0.14
## Air pollution 0.30
## Cervical glandular intraepithelial neoplasia 0.33
## Dry eye syndrome 0.43
## Endometriosis 0.67
## Epilepsy 0.20
## Periodontitis 0.50
## Schizophrenia 0.57
## Vesicle membrane 0.00
## Atopic asthma 0.58
## Delivery method 0.17
## Food allergy 0.75
## Gastric cancer 0.52
## Gestational diabetes 0.74
## Hypertrophy 0.40
## Iron biomarker measurement 0.17
## Irritable bowel syndrome 0.65
## Oral squamous cell carcinoma 0.00
## Asthma 0.83
## Autism spectrum disorder 0.00
## Breast cancer 0.78
## Colorectal cancer 0.54
## Esophageal adenocarcinoma 0.67
## Hypertension 0.60
## Phenylketonuria 0.50
## Smoking behavior 0.36
## Smoking status measurement 0.60
## Streptococcus pneumoniae 0.60
## Traditional Chinese medicine type 0.50
## Transplant outcome measurement 0.60
## Treatment 0.60
## Ulcerative colitis 0.20
## Alzheimer's disease 0.69
## Atopic eczema 0.67
## Breastfeeding duration 0.90
## Cesarean section 0.67
## Colorectal adenoma 0.79
## Constipation 0.47
## Crohn's disease 0.29
## Endometrial cancer 0.50
## Health study participation 0.97
## Inflammatory bowel disease 0.00
## Lung cancer 0.91
## Obesity 0.56
## Obsessive-compulsive disorder 0.80
## Ovarian cancer 0.97
## Psoriasis 0.89
## Response to transplant 0.46
## Rheumatoid arthritis 0.12
## Sampling site 0.40
## Socioeconomic status 0.40
## Transport 0.50
## Type II diabetes mellitus 0.67
## Urinary tract infection 0.86
## Abnormal stool composition 1.00
## Chlamydia trachomatis 0.60
## Diarrhea, Infantile 1.00
## Ethnic group 0.20
## HIV mother to child transmission 1.00
## Male homosexuality 1.00
## Multiple sclerosis 1.00
## Papillary thyroid carcinoma 1.00
## Physical activity 0.20
## Psoriasis vulgaris 1.00
## Response to diet 0.40
## Sample collection protocol 1.00
## Smoking cessation 1.00
## Stroke 0.81
## Viral load 1.00
Body sites with consistently increased or decreased alpha diversity:
tabDiv(exps, "Shannon", "Body site")
## increased decreased unchanged
## Feces 295 434 1153
## Vagina 16 6 27
## Posterior fornix of vagina 9 0 7
## Skin of body 7 15 8
## Uterine cervix 9 1 20
## Uterine cervix,Vaginal fluid 9 1 0
## Buccal epithelium 0 7 0
## Saliva 36 43 122
## Subgingival dental plaque 9 3 20
## Buccal mucosa 5 0 2
## Meconium 5 0 10
## Space surrounding organism 2 7 13
## Stomach 5 10 5
## Tongue 0 5 12
## Axilla skin 5 1 11
## Tear film 0 4 1
## Throat 0 4 9
## Caecum 1 4 22
## Cecum mucosa 1 4 6
## Colorectal mucosa 0 3 8
## Dental plaque 0 3 3
## Duodenum 0 3 5
## Nasopharynx 3 6 32
## Skin of forearm 3 0 3
## Bile 2 0 3
## Brachialis muscle 0 2 3
## Conjunctiva 1 3 6
## Conjunctival sac 1 3 1
## Esophagus 0 2 4
## Forelimb skin 2 0 4
## Lung 2 4 7
## Mouth 8 6 28
## Rumen 2 0 4
## Supragingival dental plaque 1 3 1
## Thyroid gland 2 0 10
## Uterus 3 1 11
## Blood 0 1 6
## Breast 3 4 4
## Breast,Milk 1 0 4
## Bulbar conjunctiva 3 2 5
## Colon 3 2 18
## Ileum 1 0 11
## Nasal cavity 0 1 5
## Oropharynx 1 2 3
## Small intestine 3 4 1
## Vagina,Uterine cervix 3 2 7
## Vaginal fluid 1 0 8
## Bronchus 0 0 6
## Endothelium of trachea 3 3 0
## Internal cheek pouch 0 0 11
## Intestine 1 1 14
## Jejunum 1 1 8
## Milk 0 0 9
## Oral cavity 5 5 7
## Ovary 0 0 7
## Peritoneal fluid 0 0 6
## Posterior wall of oropharynx 2 2 1
## Rectum 0 0 12
## Skin of abdomen 0 0 5
## Sputum 6 6 8
## Surface of tongue 2 2 3
## Urine 1 1 16
## Ventral side of post-anal tail 0 0 6
tabDiv(exps, "Shannon", "Body site", perc = TRUE)
## increased decreased unchanged
## Feces 0.160 0.230 0.61
## Vagina 0.330 0.120 0.55
## Posterior fornix of vagina 0.560 0.000 0.44
## Skin of body 0.230 0.500 0.27
## Uterine cervix 0.300 0.033 0.67
## Uterine cervix,Vaginal fluid 0.900 0.100 0.00
## Buccal epithelium 0.000 1.000 0.00
## Saliva 0.180 0.210 0.61
## Subgingival dental plaque 0.280 0.094 0.62
## Buccal mucosa 0.710 0.000 0.29
## Meconium 0.330 0.000 0.67
## Space surrounding organism 0.091 0.320 0.59
## Stomach 0.250 0.500 0.25
## Tongue 0.000 0.290 0.71
## Axilla skin 0.290 0.059 0.65
## Tear film 0.000 0.800 0.20
## Throat 0.000 0.310 0.69
## Caecum 0.037 0.150 0.81
## Cecum mucosa 0.091 0.360 0.55
## Colorectal mucosa 0.000 0.270 0.73
## Dental plaque 0.000 0.500 0.50
## Duodenum 0.000 0.380 0.62
## Nasopharynx 0.073 0.150 0.78
## Skin of forearm 0.500 0.000 0.50
## Bile 0.400 0.000 0.60
## Brachialis muscle 0.000 0.400 0.60
## Conjunctiva 0.100 0.300 0.60
## Conjunctival sac 0.200 0.600 0.20
## Esophagus 0.000 0.330 0.67
## Forelimb skin 0.330 0.000 0.67
## Lung 0.150 0.310 0.54
## Mouth 0.190 0.140 0.67
## Rumen 0.330 0.000 0.67
## Supragingival dental plaque 0.200 0.600 0.20
## Thyroid gland 0.170 0.000 0.83
## Uterus 0.200 0.067 0.73
## Blood 0.000 0.140 0.86
## Breast 0.270 0.360 0.36
## Breast,Milk 0.200 0.000 0.80
## Bulbar conjunctiva 0.300 0.200 0.50
## Colon 0.130 0.087 0.78
## Ileum 0.083 0.000 0.92
## Nasal cavity 0.000 0.170 0.83
## Oropharynx 0.170 0.330 0.50
## Small intestine 0.380 0.500 0.12
## Vagina,Uterine cervix 0.250 0.170 0.58
## Vaginal fluid 0.110 0.000 0.89
## Bronchus 0.000 0.000 1.00
## Endothelium of trachea 0.500 0.500 0.00
## Internal cheek pouch 0.000 0.000 1.00
## Intestine 0.062 0.062 0.88
## Jejunum 0.100 0.100 0.80
## Milk 0.000 0.000 1.00
## Oral cavity 0.290 0.290 0.41
## Ovary 0.000 0.000 1.00
## Peritoneal fluid 0.000 0.000 1.00
## Posterior wall of oropharynx 0.400 0.400 0.20
## Rectum 0.000 0.000 1.00
## Skin of abdomen 0.000 0.000 1.00
## Sputum 0.300 0.300 0.40
## Surface of tongue 0.290 0.290 0.43
## Urine 0.056 0.056 0.89
## Ventral side of post-anal tail 0.000 0.000 1.00
tabDiv(exps, "Richness", "Body site")
## increased decreased unchanged
## Feces 183 246 598
## Mouth 10 3 9
## Posterior fornix of vagina 8 1 2
## Uterine cervix 8 1 11
## Oropharynx 0 6 3
## Skin of body 3 9 6
## Subgingival dental plaque 7 2 17
## Uterine cervix,Vaginal fluid 7 2 1
## Nasopharynx 5 9 19
## Stomach 4 8 3
## Cecum mucosa 3 6 3
## Oral cavity 2 5 0
## Small intestine 1 4 0
## Throat 2 5 5
## Colon 6 4 10
## Ear 2 0 3
## Esophagus 0 2 4
## Rectum 0 2 7
## Saliva 20 22 43
## Surface of tongue 4 2 1
## Caecum 2 3 1
## Ileum 2 1 9
## Meconium 2 3 7
## Milk 2 1 5
## Nasal cavity 1 2 10
## Urine 3 2 12
## Vagina 3 2 11
## Vagina,Uterine cervix 1 0 11
## Breast 1 1 7
## Bronchus 0 0 6
## Conjunctiva 1 1 5
## Internal cheek pouch 0 0 7
## Intestine 0 0 13
## Ovary 0 0 7
## Peritoneal fluid 0 0 6
## Thyroid gland 0 0 12
## Tongue 2 2 7
tabDiv(exps, "Richness", "Body site", perc = TRUE)
## increased decreased unchanged
## Feces 0.180 0.240 0.58
## Mouth 0.450 0.140 0.41
## Posterior fornix of vagina 0.730 0.091 0.18
## Uterine cervix 0.400 0.050 0.55
## Oropharynx 0.000 0.670 0.33
## Skin of body 0.170 0.500 0.33
## Subgingival dental plaque 0.270 0.077 0.65
## Uterine cervix,Vaginal fluid 0.700 0.200 0.10
## Nasopharynx 0.150 0.270 0.58
## Stomach 0.270 0.530 0.20
## Cecum mucosa 0.250 0.500 0.25
## Oral cavity 0.290 0.710 0.00
## Small intestine 0.200 0.800 0.00
## Throat 0.170 0.420 0.42
## Colon 0.300 0.200 0.50
## Ear 0.400 0.000 0.60
## Esophagus 0.000 0.330 0.67
## Rectum 0.000 0.220 0.78
## Saliva 0.240 0.260 0.51
## Surface of tongue 0.570 0.290 0.14
## Caecum 0.330 0.500 0.17
## Ileum 0.170 0.083 0.75
## Meconium 0.170 0.250 0.58
## Milk 0.250 0.120 0.62
## Nasal cavity 0.077 0.150 0.77
## Urine 0.180 0.120 0.71
## Vagina 0.190 0.120 0.69
## Vagina,Uterine cervix 0.083 0.000 0.92
## Breast 0.110 0.110 0.78
## Bronchus 0.000 0.000 1.00
## Conjunctiva 0.140 0.140 0.71
## Internal cheek pouch 0.000 0.000 1.00
## Intestine 0.000 0.000 1.00
## Ovary 0.000 0.000 1.00
## Peritoneal fluid 0.000 0.000 1.00
## Thyroid gland 0.000 0.000 1.00
## Tongue 0.180 0.180 0.64
Signature stats
sigs <- bugsigdbr::getSignatures(dat, tax.id.type = "metaphlan")
Unique microbes
Number unique microbes contained in the signatures:
## [1] 7521
Development of unique microbes captured over time:
Microbe set size distribution
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 2.00 5.00 8.27 10.00 470.00
gghistogram(lengths(sigs), bins = 30, ylab = "number of signatures",
xlab = "signature size", fill = "#00AFBB", ggtheme = theme_bw())
## [1] 4187
Microbe co-occurrence
dat.feces <- subset(dat, `Body site` == "Feces")
cooc.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus")
## Loading required namespace: safe
antag.mat <- microbeHeatmap(dat.feces, tax.level = "genus", anno = "genus", antagonistic = TRUE)
Get the top 20 genera most frequently reported as differentially abundant:
sigs.feces <- getSignatures(dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20 <- sort(table(unlist(sigs.feces)), decreasing = TRUE)[1:20]
top20
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 899 594 588 542
## Blautia Ruminococcus Streptococcus Roseburia
## 517 493 469 464
## Parabacteroides Prevotella Lactobacillus Alistipes
## 460 445 429 402
## Akkermansia Dorea Coprococcus Eubacterium
## 348 329 320 310
## Veillonella Enterococcus Lachnospira Escherichia
## 281 275 275 261
Subset heatmaps to the top 20 genera most frequently reported as differentially abundant:
## [1] TRUE
## [1] TRUE
Distinguish by direction of abundance change (increased / decreased):
# increased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "increased")
sigs.feces.up <- getSignatures(sub.dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20.up <- table(unlist(sigs.feces.up))[names(top20)]
top20.up
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 425 291 215 280
## Blautia Ruminococcus Streptococcus Roseburia
## 230 199 302 151
## Parabacteroides Prevotella Lactobacillus Alistipes
## 243 208 271 170
## Akkermansia Dorea Coprococcus Eubacterium
## 211 136 117 131
## Veillonella Enterococcus Lachnospira Escherichia
## 176 198 94 180
# decreased
sub.dat.feces <- subset(dat.feces, `Abundance in Group 1` == "decreased")
sigs.feces.down <- getSignatures(sub.dat.feces, tax.id.type = "taxname",
tax.level = "genus", exact.tax.level = FALSE)
top20.down <- table(unlist(sigs.feces.down))[names(top20)]
top20.down
##
## Bacteroides Bifidobacterium Faecalibacterium Clostridium
## 467 295 367 257
## Blautia Ruminococcus Streptococcus Roseburia
## 281 289 159 307
## Parabacteroides Prevotella Lactobacillus Alistipes
## 211 234 156 226
## Akkermansia Dorea Coprococcus Eubacterium
## 133 187 197 173
## Veillonella Enterococcus Lachnospira Escherichia
## 102 75 175 75
Plot the heatmap
# annotation
mat <- matrix(nc = 2, cbind(top20.up, top20.down))
bp <- ComplexHeatmap::anno_barplot(mat, gp = gpar(fill = c("#D55E00", "#0072B2"),
col = c("#D55E00", "#0072B2")),
height = unit(2, "cm"))
banno <- ComplexHeatmap::HeatmapAnnotation(`Abundance in Group 1` = bp)
lgd_list <- list(
Legend(labels = c("increased", "decreased"),
title = "Abundance in Group 1",
type = "grid",
legend_gp = gpar(col = c("#D55E00", "#0072B2"), fill = c("#D55E00", "#0072B2"))))
# same direction
# lcm <- sweep(cooc.mat, 2, matrixStats::colMaxs(cooc.mat), FUN = "/")
# we need to dampen the maximum here a bit down,
# otherwise 100% self co-occurrence takes up a large fraction of the colorscale,
sec <- apply(cooc.mat, 2, function(x) sort(x, decreasing = TRUE)[2])
cooc.mat2 <- cooc.mat
for(i in 1:ncol(cooc.mat2)) cooc.mat2[i,i] <- min(cooc.mat2[i,i], 1.4 * sec[i])
lcm <- sweep(cooc.mat2, 2, matrixStats::colMaxs(cooc.mat2), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "red"))
ht1 <- ComplexHeatmap::Heatmap(lcm,
col = col,
name = "Relative frequency (top)",
cluster_columns = FALSE,
row_km = 3,
row_title = "same direction",
column_names_rot = 45,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8))
# opposite direction
acm <- sweep(antag.mat, 2, matrixStats::colMaxs(antag.mat), FUN = "/")
col <- circlize::colorRamp2(c(0,1), c("#EEEEEE", "blue"))
ht2 <- ComplexHeatmap::Heatmap(acm,
col = col,
name = "Relative frequency (bottom)",
cluster_columns = FALSE,
row_title = "opposite direction",
row_km = 3,
column_names_rot = 45,
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 8))
# phylum
sfp <- bugsigdbr::getSignatures(dat.feces, tax.id.type = "metaphlan",
tax.level = "genus", exact.tax.level = FALSE)
sfp20 <- sort(table(unlist(sfp)), decreasing = TRUE)[1:20]
uanno <- bugsigdbr::extractTaxLevel(names(sfp20),
tax.id.type = "taxname",
tax.level = "phylum",
exact.tax.level = FALSE)
phyla.grid <- seq_along(unique(uanno))
panno <- ComplexHeatmap::HeatmapAnnotation(phylum = uanno)
uanno <- matrix(uanno, nrow = 1)
colnames(uanno) <- names(top20)
pcols <- c("#CC79A7", "#F0E442", "#009E73", "#56B4E9", "#E69F00")
uanno <- ComplexHeatmap::Heatmap(uanno, name = "Phylum",
col = pcols[phyla.grid],
cluster_columns = FALSE,
column_names_rot = 45,
column_names_gp = gpar(fontsize = 8))
# put everything together
ht_list <- ht1 %v% banno %v% ht2 %v% uanno
ComplexHeatmap::draw(ht_list, annotation_legend_list = lgd_list, merge_legend = TRUE)
decorate_annotation("Abundance in Group 1", {
grid.text("# signatures", x = unit(-1, "cm"), rot = 90, just = "bottom", gp = gpar(fontsize = 8))
grid.text("*", x = unit(2.45, "cm"), y = unit(1.2, "cm"))
grid.text("*", x = unit(5.18, "cm"), y = unit(1, "cm"))
grid.text("*", x = unit(6.55, "cm"), y = unit(0.95, "cm"))
grid.text("*", x = unit(8.6, "cm"), y = unit(0.85, "cm"))
grid.text("*", x = unit(10, "cm"), y = unit(0.7, "cm"))
grid.text("*", x = unit(10.7, "cm"), y = unit(0.7, "cm"))
})
Signature similarity
Jaccard index
Inspect signature similarity for signatures from stomach samples based on Jaccard index:
stomachsub <- subset(dat, `Body site` == "Stomach")
sigsub <- bugsigdbr::getSignatures(stomachsub)
pair.jsim <- calcJaccardSimilarity(sigsub)
Create a dendrogram of Jaccard dissimilarities (1.0 has no overlap, 0.0 are identical signatures).