fieldworkanalysis_samara.Rmd
install.packages(c("devtools", "tidyverse", "kableExtra"))
devtools::install_github("waldronlab/bugSigSimple")
devtools::install_github("waldronlab/BugSigDBStats")
devtools::install_github("waldronlab/bugsigdbr")
##
## Attaching package: 'BugSigDBStats'
## The following object is masked from 'package:bugSigSimple':
##
## getMostFrequentTaxa
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(bugSigSimple)
dat <- bugsigdbr::importBugSigDB(cache= TRUE)
## Using cached version from 2025-01-30 22:02:34
dim(dat)
## [1] 5520 50
names(dat)
## [1] "BSDB ID" "Study"
## [3] "Study design" "PMID"
## [5] "DOI" "URL"
## [7] "Authors list" "Title"
## [9] "Journal" "Year"
## [11] "Keywords" "Experiment"
## [13] "Location of subjects" "Host species"
## [15] "Body site" "UBERON ID"
## [17] "Condition" "EFO ID"
## [19] "Group 0 name" "Group 1 name"
## [21] "Group 1 definition" "Group 0 sample size"
## [23] "Group 1 sample size" "Antibiotics exclusion"
## [25] "Sequencing type" "16S variable region"
## [27] "Sequencing platform" "Statistical test"
## [29] "Significance threshold" "MHT correction"
## [31] "LDA Score above" "Matched on"
## [33] "Confounders controlled for" "Pielou"
## [35] "Shannon" "Chao1"
## [37] "Simpson" "Inverse Simpson"
## [39] "Richness" "Signature page name"
## [41] "Source" "Curated date"
## [43] "Curator" "Revision editor"
## [45] "Description" "Abundance in Group 1"
## [47] "MetaPhlAn taxon names" "NCBI Taxonomy IDs"
## [49] "State" "Reviewer"
Subsetting only studies done on humans - 6 of 17 studies were excluded because they were either done on mice or not statistically reliable (Cregger et. al)
subset.dat <-
dat[which(
dat$PMID == "30778155" |
dat$PMID == "32192080" |
dat$PMID == "31087436" |
dat$PMID == "26901400" |
dat$PMID == "33839907" |
dat$PMID == "32046455" |
dat$PMID == "33925708" |
dat$PMID == "32299442" |
dat$PMID == "33313185" |
dat$PMID == "34268384" | dat$PMID == "33660232"
), ]
Summary of studies and most frequent taxa increased and decreased in endometriosis patients for all studies
bugSigSimple::createStudyTable(subset.dat)
## # A tibble: 11 × 5
## Study Condition Cases Controls `Study Design`
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Akiyama 2019 Endometriosis 30 39 cross-sectional obse…
## 2 Ata 2019 Endometriosis 14 14 prospective cohort
## 3 Chao 2021 Endometriosis 37 66 case-control
## 4 Chen 2020 Adenomyosis,Endometriosis 14 120 case-control
## 5 Hernandes 2020 Endometriosis 21 11 case-control
## 6 Khan 2016 Endometriosis 32 32 case-control
## 7 Lee 2021 Endometriosis 45 45 case-control
## 8 Perrotta 2020 Endometriosis 8 12 cross-sectional obse…
## 9 Shan 2021 Endometriosis 12 12 case-control
## 10 Svensson 2021 Endometriosis 66 198 case-control
## 11 Wei 2020 Endometriosis 36 14 cross-sectional obse…
getMostFrequentTaxa(subset.dat, n=30)
## 1301 1380 1578 194 543 838 84107 119852 1300 1350
## 7 5 4 4 4 4 4 3 3 3
## 135621 1375 171549 1940338 2050 28050 286 39948 469 906
## 3 3 3 3 3 3 3 3 3 3
## 13687 1508657 1582879 1654 1663 168808 1743 216851 2701 33038
## 2 2 2 2 2 2 2 2 2 2
getMostFrequentTaxa(subset.dat, direction="UP")
## 1301 543 119852 1300 1350 135621 1578 1940338 286 469
## 6 4 3 3 3 3 3 3 3 3
getMostFrequentTaxa(subset.dat, direction="DOWN")
## 1380 168808 171549 194 2050 28050 33958 838 906 1033
## 3 2 2 2 2 2 2 2 2 1
Summary of studies and most frequent taxa in only samples from female reproductive tract, excluding feces samples
subset.dat2 <-
dat[which(
dat$PMID == "30778155" |
dat$PMID == "32192080" |
dat$PMID == "31087436" |
dat$PMID == "26901400" |
dat$PMID == "32046455" |
dat$PMID == "33925708" |
dat$PMID == "32299442" |
dat$PMID == "33313185" | dat$PMID == "34268384"
), ]
reproductive_sigs <-
subset.dat2[which(subset.dat2$`Body site` != "feces" |
is.na(subset.dat2$`Body site`)), ]
bugSigSimple::createStudyTable(reproductive_sigs)
## # A tibble: 9 × 5
## Study Condition Cases Controls `Study Design`
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Akiyama 2019 Endometriosis 30 39 cross-sectional obser…
## 2 Ata 2019 Endometriosis 14 14 prospective cohort
## 3 Chao 2021 Endometriosis 37 66 case-control
## 4 Chen 2020 Adenomyosis,Endometriosis 14 120 case-control
## 5 Hernandes 2020 Endometriosis 21 11 case-control
## 6 Khan 2016 Endometriosis 32 32 case-control
## 7 Lee 2021 Endometriosis 45 45 case-control
## 8 Perrotta 2020 Endometriosis 8 12 cross-sectional obser…
## 9 Wei 2020 Endometriosis 36 14 cross-sectional obser…
allfreqs <- bugSigSimple::createTaxonTable(reproductive_sigs, n = 20) %>% #could change number
arrange(I(decreased_signatures - increased_signatures))
incfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) > 0)
decfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) < 0)
kableExtra::kbl(allfreqs) %>%
kable_paper("hover", full_width = FALSE)
Taxon Name | Taxonomic Level | total_signatures | increased_signatures | decreased_signatures | Binomial Test pval | kingdom | phylum | class | order | family | genus | species | n_signatures | metaphlan_name |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Streptococcus | genus | 6 | 5 | 1 | 0.22 | Bacteria | Bacillota | Bacilli | Lactobacillales | Streptococcaceae | Streptococcus | NA | 6 | k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus |
Coriobacteriaceae | family | 3 | 3 | 0 | 0.25 | Bacteria | Actinomycetota | Coriobacteriia | Coriobacteriales | Coriobacteriaceae | NA | NA | 3 | k__Bacteria|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae |
Streptococcaceae | family | 3 | 3 | 0 | 0.25 | Bacteria | Bacillota | Bacilli | Lactobacillales | Streptococcaceae | NA | NA | 9 | k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Streptococcaceae |
Enterobacteriaceae | family | 3 | 3 | 0 | 0.25 | Bacteria | Pseudomonadota | Gammaproteobacteria | Enterobacterales | Enterobacteriaceae | NA | NA | 6 | k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae |
Escherichia/Shigella sp. | species | 3 | 3 | 0 | 0.25 | Bacteria | Pseudomonadota | Gammaproteobacteria | Enterobacterales | Enterobacteriaceae | Escherichia/Shigella sp. | NA | 3 | k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|s__Escherichia/Shigella sp. |
Acinetobacter | genus | 3 | 3 | 0 | 0.25 | Bacteria | Pseudomonadota | Gammaproteobacteria | Moraxellales | Moraxellaceae | Acinetobacter | NA | 3 | k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Moraxellales|f__Moraxellaceae|g__Acinetobacter |
Pseudomonadaceae | family | 3 | 3 | 0 | 0.25 | Bacteria | Pseudomonadota | Gammaproteobacteria | Pseudomonadales | Pseudomonadaceae | NA | NA | 4 | k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Pseudomonadales|f__Pseudomonadaceae |
Pseudomonas | genus | 3 | 3 | 0 | 0.25 | Bacteria | Pseudomonadota | Gammaproteobacteria | Pseudomonadales | Pseudomonadaceae | Pseudomonas | NA | 3 | k__Bacteria|p__Pseudomonadota|c__Gammaproteobacteria|o__Pseudomonadales|f__Pseudomonadaceae|g__Pseudomonas |
Alloscardovia | genus | 2 | 2 | 0 | 0.50 | Bacteria | Actinomycetota | Actinomycetes | Bifidobacteriales | Bifidobacteriaceae | Alloscardovia | NA | 2 | k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Alloscardovia |
Arthrobacter | genus | 2 | 2 | 0 | 0.50 | Bacteria | Actinomycetota | Actinomycetes | Micrococcales | Micrococcaceae | Arthrobacter | NA | 2 | k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Micrococcales|f__Micrococcaceae|g__Arthrobacter |
Aerococcus | genus | 3 | 2 | 1 | 1.00 | Bacteria | Bacillota | Bacilli | Lactobacillales | Aerococcaceae | Aerococcus | NA | 3 | k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Aerococcus |
Lactobacillus | genus | 3 | 2 | 1 | 1.00 | Bacteria | Bacillota | Bacilli | Lactobacillales | Lactobacillaceae | Lactobacillus | NA | 4 | k__Bacteria|p__Bacillota|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus |
Dialister | genus | 3 | 2 | 1 | 1.00 | Bacteria | Bacillota | Negativicutes | Veillonellales | Veillonellaceae | Dialister | NA | 3 | k__Bacteria|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Dialister |
Prevotella | genus | 4 | 2 | 2 | 1.00 | Bacteria | Bacteroidota | Bacteroidia | Bacteroidales | Prevotellaceae | Prevotella | NA | 4 | k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella |
Campylobacter | genus | 4 | 2 | 2 | 1.00 | Bacteria | Campylobacterota | Epsilonproteobacteria | Campylobacterales | Campylobacteraceae | Campylobacter | NA | 4 | k__Bacteria|p__Campylobacterota|c__Epsilonproteobacteria|o__Campylobacterales|f__Campylobacteraceae|g__Campylobacter |
Gardnerella | genus | 2 | 1 | 1 | 1.00 | Bacteria | Actinomycetota | Actinomycetes | Bifidobacteriales | Bifidobacteriaceae | Gardnerella | NA | 2 | k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Bifidobacteriales|f__Bifidobacteriaceae|g__Gardnerella |
Propionibacterium | genus | 2 | 1 | 1 | 1.00 | Bacteria | Actinomycetota | Actinomycetes | Propionibacteriales | Propionibacteriaceae | Propionibacterium | NA | 2 | k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Propionibacteriales|f__Propionibacteriaceae|g__Propionibacterium |
Atopobium | genus | 5 | 2 | 3 | 1.00 | Bacteria | Actinomycetota | Coriobacteriia | Coriobacteriales | Atopobiaceae | Atopobium | NA | 5 | k__Bacteria|p__Actinomycetota|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae|g__Atopobium |
Mobiluncus | genus | 3 | 1 | 2 | 1.00 | Bacteria | Actinomycetota | Actinomycetes | Actinomycetales | Actinomycetaceae | Mobiluncus | NA | 3 | k__Bacteria|p__Actinomycetota|c__Actinomycetes|o__Actinomycetales|f__Actinomycetaceae|g__Mobiluncus |
Megasphaera | genus | 3 | 1 | 2 | 1.00 | Bacteria | Bacillota | Negativicutes | Veillonellales | Veillonellaceae | Megasphaera | NA | 3 | k__Bacteria|p__Bacillota|c__Negativicutes|o__Veillonellales|f__Veillonellaceae|g__Megasphaera |
suppressPackageStartupMessages(library(curatedMetagenomicData))
# hmp_vagina_metadata <- filter(sampleMetadata, study_name == "HMP_2012" & body_site == "vagina")
all_healthy_adult_vagina_metadata <- filter(sampleMetadata, body_site == "vagina" & disease == "healthy" & age_category == "adult")
se <-
curatedMetagenomicData::returnSamples(all_healthy_adult_vagina_metadata, dataType = "relative_abundance", counts = FALSE)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(.fns = ~replace_na(.x, 0))`.
## Caused by warning:
## ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0.
## ℹ Please supply `.cols` instead.
Get matrices of species and genus relative abundance
allranks <- mia::splitByRanks(se)
#species_relab <- t(assay(allranks[["species"]]))
genus_relab <- t(assay(allranks[["genus"]]))
#family_relab <- t(assay(allranks[["family"]]))
Centered log-ratio transformation on genus level relative abundance:
genus_relab_clr <- Hotelling::clr(genus_relab + 1)
df <- dplyr::select(data.frame(genus_relab_clr), c("Streptococcus", "Lactobacillus"))
df$age <- se$age
df$BMI <- se$BMI
fit <- lm(Streptococcus ~ Lactobacillus + age + BMI, data = df)
summary(fit)
##
## Call:
## lm(formula = Streptococcus ~ Lactobacillus + age + BMI, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06004 -0.02525 -0.01591 -0.00853 0.71736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.112785 0.117024 -0.964 0.339
## Lactobacillus 0.008483 0.015007 0.565 0.574
## age 0.001061 0.003520 0.301 0.764
## BMI 0.001921 0.004362 0.440 0.661
##
## Residual standard error: 0.1149 on 61 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.01484, Adjusted R-squared: -0.03361
## F-statistic: 0.3064 on 3 and 61 DF, p-value: 0.8207
##
## Call:
## lm(formula = Streptococcus ~ Lactobacillus, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6006 -0.0609 -0.0462 -0.0440 4.1909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.55246 0.20193 2.736 0.00745 **
## Lactobacillus -0.11409 0.04751 -2.401 0.01832 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5642 on 93 degrees of freedom
## Multiple R-squared: 0.05839, Adjusted R-squared: 0.04826
## F-statistic: 5.767 on 1 and 93 DF, p-value: 0.01832
wilcox.test(df$Streptococcus, df$Lactobacillus)
##
## Wilcoxon rank sum test with continuity correction
##
## data: df$Streptococcus and df$Lactobacillus
## W = 213.5, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
t.test(df$Streptococcus, df$Lactobacillus)
##
## Welch Two Sample t-test
##
## data: df$Streptococcus and df$Lactobacillus
## t = -28.666, df = 133.93, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.258800 -3.709054
## sample estimates:
## mean of x mean of y
## 0.0879008 4.0718280
secount <-
curatedMetagenomicData::returnSamples(all_healthy_adult_vagina_metadata, dataType = "relative_abundance", counts = TRUE)
##
## $`2021-03-31.FerrettiP_2018.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_champanellensis
## k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Spirochaetes|c__Spirochaetia|o__Leptospirales|f__Leptospiraceae|g__Leptospira|s__Leptospira_hartskeerlii
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## $`2021-03-31.HMP_2012.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae|g__Olsenella|s__Olsenella_profusa
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_champanellensis
## k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## $`2021-03-31.WampachL_2018.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
allranks <- mia::splitByRanks(secount)
genus_relab_count <- t(assay(allranks[["genus"]]))
df <- dplyr::select(data.frame(genus_relab_count), c("Streptococcus", "Lactobacillus"))
df$age <- secount$age
df$BMI <- secount$BMI
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
fit.ZInegbin <- pscl::zeroinfl(Streptococcus ~ Lactobacillus + age + BMI | 1,
data = df,
dist = "negbin")
## Warning: glm.fit: algorithm did not converge
library(curatedMetagenomicData)
library(dplyr)
library(DT)
library(purrr)
taxon_strep <- c("1884" = "Streptococcus")
taxon_lact <- c("1578" = "Lactobacillus")
target_sample <- "vagina"
sample_metadata <- sampleMetadata %>%
filter(
grepl(target_sample, body_site) | grepl(target_sample, body_subsite)
) %>%
discard(~all(is.na(.x))) %>%
as_tibble()
dim(sample_metadata)
## [1] 96 26
unique(sample_metadata$study_name)
## [1] "FerrettiP_2018" "HMP_2012" "WampachL_2018"
tse <- returnSamples(
sampleMetadata = sample_metadata,
dataType = "relative_abundance",
counts = FALSE,
rownames = "short"
)
##
## $`2021-03-31.FerrettiP_2018.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_champanellensis
## k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Spirochaetes|c__Spirochaetia|o__Leptospirales|f__Leptospiraceae|g__Leptospira|s__Leptospira_hartskeerlii
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## $`2021-03-31.HMP_2012.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae|g__Olsenella|s__Olsenella_profusa
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_champanellensis
## k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## $`2021-03-31.WampachL_2018.relative_abundance`
## dropping rows without rowTree matches:
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
## k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
## k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
## k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
## k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
tse
## class: TreeSummarizedExperiment
## dim: 902 96
## metadata(0):
## assays(1): relative_abundance
## rownames(902): [Clostridium] innocuum [Clostridium] leptum ... Rahnella
## aquatilis Rothia kristinae
## rowData names(7): superkingdom phylum ... genus species
## colnames(96): CA_C10001MS2003VA_t0M15 CA_C10003MS2031VA_t0M15 ...
## MV_C118 MV_C120
## colData names(25): study_name subject_id ... family_role body_subsite
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
## rowLinks: a LinkDataFrame (902 rows)
## rowTree: 1 phylo tree(s) (10430 leaves)
## colLinks: NULL
## colTree: NULL
## [1] 4049.558
var(df$Streptococcus)
## [1] 509068831
## [1] 0.8736842
#Fit log-linear models
fit.pois <- glm(Streptococcus ~ Lactobacillus + age + BMI,
data = df,
family = poisson(link = "log"))
## Warning: glm.fit: algorithm did not converge
summary (fit.pois)
##
## Call:
## glm(formula = Streptococcus ~ Lactobacillus + age + BMI, family = poisson(link = "log"),
## data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.211e+00 3.056e-02 170.485 < 2e-16 ***
## Lactobacillus 2.307e-08 6.221e-10 37.085 < 2e-16 ***
## age 5.232e-02 1.049e-03 49.868 < 2e-16 ***
## BMI -4.464e-03 1.356e-03 -3.291 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 339596 on 64 degrees of freedom
## Residual deviance: 335673 on 61 degrees of freedom
## (30 observations deleted due to missingness)
## AIC: 335733
##
## Number of Fisher Scoring iterations: 25
library(pscl)
fit.ZIpois <- zeroinfl(Streptococcus ~ Lactobacillus + age + BMI | 1,
data = df,
dist = "poisson")
## Warning: glm.fit: algorithm did not converge
summary (fit.ZIpois)
##
## Call:
## zeroinfl(formula = Streptococcus ~ Lactobacillus + age + BMI | 1, data = df,
## dist = "poisson")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.3189 -0.3189 -0.3189 -0.3189 9.6361
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.122e+01 NaN NaN NaN
## Lactobacillus -1.116e-07 NaN NaN NaN
## age -7.023e-01 NaN NaN NaN
## BMI 3.252e-01 NaN NaN NaN
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.286 NaN NaN NaN
##
## Number of iterations in BFGS optimization: 33
## Log-likelihood: -3.134e+04 on 5 Df
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.negbin <-
glm.nb(
Streptococcus ~ Lactobacillus + age + BMI,
data = df,
control = glm.control(
epsilon = 1e-8,
maxit = 50,
trace = FALSE
)
)
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(Streptococcus ~ Lactobacillus + age + BMI, data = df, :
## alternation limit reached
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 5.21 3.06e- 2 170. 0
## 2 Lactobacillus 0.0000000229 6.52e-10 35.1 1.77e-269
## 3 age 0.0524 1.05e- 3 49.9 0
## 4 BMI -0.00453 1.36e- 3 -3.32 8.92e- 4
fit.ZInegbin <- zeroinfl(Streptococcus ~ Lactobacillus + age + BMI | 1,
data = df,
dist = "negbin")
## Warning: glm.fit: algorithm did not converge
summary(fit.ZInegbin)
##
## Call:
## zeroinfl(formula = Streptococcus ~ Lactobacillus + age + BMI | 1, data = df,
## dist = "negbin")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.2128 -0.2128 -0.2128 -0.2127 6.3373
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.478e+01 NaN NaN NaN
## Lactobacillus -5.468e-07 NaN NaN NaN
## age -1.129e+00 NaN NaN NaN
## BMI 7.501e-01 NaN NaN NaN
## Log(theta) -2.122e-01 NaN NaN NaN
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.233 NaN NaN NaN
##
## Theta = 0.8088
## Number of iterations in BFGS optimization: 39
## Log-likelihood: -70.77 on 6 Df