Packages used here:

Cirrhosis exploratory analysis

se <-
    curatedMetagenomicAnalyses::makeSEforCondition("cirrhosis", removestudies = "HMP_2019_ibdmdb", dataType = "relative_abundance")
## snapshotDate(): 2021-10-19
## 
## $`2021-10-14.LoombaR_2017.relative_abundance`
## dropping rows without rowTree matches:
##   k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
##   k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
##   k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
##   k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
##   k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
##   k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## $`2021-03-31.QinN_2014.relative_abundance`
## dropping rows without rowTree matches:
##   k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Atopobiaceae|g__Olsenella|s__Olsenella_profusa
##   k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Collinsella|s__Collinsella_stercoris
##   k__Bacteria|p__Actinobacteria|c__Coriobacteriia|o__Coriobacteriales|f__Coriobacteriaceae|g__Enorma|s__[Collinsella]_massiliensis
##   k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Bacillales_unclassified|g__Gemella|s__Gemella_bergeri
##   k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Granulicatella|s__Granulicatella_elegans
##   k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_champanellensis
##   k__Bacteria|p__Firmicutes|c__Erysipelotrichia|o__Erysipelotrichales|f__Erysipelotrichaceae|g__Bulleidia|s__Bulleidia_extructa
##   k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Burkholderiales|f__Sutterellaceae|g__Sutterella|s__Sutterella_parvirubra
##   k__Bacteria|p__Synergistetes|c__Synergistia|o__Synergistales|f__Synergistaceae|g__Cloacibacillus|s__Cloacibacillus_evryensis
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
df = data.frame(colData(se)[, 1:10])
label(df$study_name) = "Study Name"
label(df$body_site) = "Body site"
label(df$study_condition) = "Study condition"
label(df$antibiotics_current_use) = "Current Antibiotics Use"
units(df$age) = "Years"
table1(~ . -  subject_id | study_name, data = df)
LoombaR_2017
(N=45)
QinN_2014
(N=237)
Overall
(N=282)
Body site
stool 45 (100%) 237 (100%) 282 (100%)
Current Antibiotics Use
no 0 (0%) 180 (75.9%) 180 (63.8%)
yes 0 (0%) 57 (24.1%) 57 (20.2%)
Missing 45 (100%) 0 (0%) 45 (16.0%)
Study condition
cirrhosis 9 (20.0%) 123 (51.9%) 132 (46.8%)
control 36 (80.0%) 114 (48.1%) 150 (53.2%)
disease
cirrhosis 9 (20.0%) 9 (3.8%) 18 (6.4%)
fatty_liver 36 (80.0%) 0 (0%) 36 (12.8%)
ascites;cirrhosis 0 (0%) 9 (3.8%) 9 (3.2%)
ascites;cirrhosis;hepatitis 0 (0%) 55 (23.2%) 55 (19.5%)
ascites;cirrhosis;hepatitis;schistosoma 0 (0%) 3 (1.3%) 3 (1.1%)
ascites;cirrhosis;hepatitis;wilson 0 (0%) 1 (0.4%) 1 (0.4%)
ascites;cirrhosis;schistosoma 0 (0%) 1 (0.4%) 1 (0.4%)
ascites;cirrhosis;wilson 0 (0%) 1 (0.4%) 1 (0.4%)
cirrhosis;hepatitis 0 (0%) 43 (18.1%) 43 (15.2%)
healthy 0 (0%) 114 (48.1%) 114 (40.4%)
hepatitis 0 (0%) 1 (0.4%) 1 (0.4%)
age (Years)
Mean (SD) NA (NA) 46.5 (10.9) 46.5 (10.9)
Median [Min, Max] NA [NA, NA] 45.0 [18.0, 78.0] 45.0 [18.0, 78.0]
Missing 45 (100%) 0 (0%) 45 (16.0%)
age_category
adult 36 (80.0%) 221 (93.2%) 257 (91.1%)
senior 9 (20.0%) 15 (6.3%) 24 (8.5%)
schoolage 0 (0%) 1 (0.4%) 1 (0.4%)
gender
female 0 (0%) 81 (34.2%) 81 (28.7%)
male 0 (0%) 156 (65.8%) 156 (55.3%)
Missing 45 (100%) 0 (0%) 45 (16.0%)
country
USA 45 (100%) 0 (0%) 45 (16.0%)
CHN 0 (0%) 237 (100%) 237 (84.0%)

There seems to be strong correlation between study condition and current antibiotics use, raising questions of causality direction in these predictions:

table(df$study_condition, df$antibiotics_current_use)
##            
##              no yes
##   cirrhosis  66  57
##   control   114   0

Compositionality

Many, but not all, columns add up to 100%:

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   97.04   99.98  100.00   99.92  100.00  100.00

The observation with minimum sum of relative abundances is:

## DataFrame with 1 row and 31 columns
##       study_name  subject_id   body_site antibiotics_current_use
##      <character> <character> <character>             <character>
## LV-1   QinN_2014        LV-1       stool                      no
##      study_condition             disease       age age_category      gender
##          <character>         <character> <integer>  <character> <character>
## LV-1       cirrhosis cirrhosis;hepatitis        42        adult        male
##          country non_westernized sequencing_platform DNA_extraction_kit
##      <character>     <character>         <character>        <character>
## LV-1         CHN              no       IlluminaHiSeq                 NA
##             PMID number_reads number_bases minimum_read_length
##      <character>    <integer>    <numeric>           <integer>
## LV-1    25079328     34522116   3452211600                 100
##      median_read_length      NCBI_accession      curator       BMI
##               <integer>         <character>  <character> <numeric>
## LV-1                100 ERR528291;ERR528292 Paolo_Manghi     19.03
##      antibiotics_family disease_subtype  creatine  albumine     alcohol
##             <character>     <character> <numeric> <numeric> <character>
## LV-1                 NA   HBV;cirrhosis        65      31.2         yes
##      disease_stage bilubirin prothrombin_time       inr       ctp
##          <integer> <numeric>        <numeric> <numeric> <integer>
## LV-1            NA      1.82               NA      1.18         6

Note, this a result of species where phylogenetic information was unavailable in building of the TreeSummarizedExperiment. It would be possible to take data directly from ExperimentHub to avoid the data loss, but the data loss is rare and small and probably unimportant for most purposes.

SIAMCAT analysis

From the SIAMCAT vignette.

if(!requireNamespace("SIAMCAT"))
  BiocManager::install("SIAMCAT")
library(SIAMCAT)
labs <-
  create.label(meta = data.frame(colData(se)),
               label = 'study_condition',
               case = 'cirrhosis')
## Label used as case:
##    cirrhosis
## Label used as control:
##    control
## + finished create.label.from.metadata in 0.009 s
sc.obj <- siamcat(feat=assay(se)/100,
    label=labs,
    meta=data.frame(colData(se)))
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 282 sample(s).
## +++ checking sample number per class
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.082 s
show(sc.obj)
## siamcat-class object
## label()                Label object:         150 control and 132 cirrhosis samples
## 
## contains phyloseq-class experiment-level object @phyloseq:
## phyloseq@otu_table()   OTU Table:            [ 697 taxa and 282 samples ]
## phyloseq@sam_data()    Sample Data:          [ 282 samples by 31 sample variables ]

Unsupervised filtering:

sc.obj <- filter.features(sc.obj,
    filter.method = 'abundance',
    cutoff = 0.001)
## Features successfully filtered

Association testing

sc.obj <- check.associations(
    sc.obj,
    sort.by = 'fc',
    alpha = 0.05,
    mult.corr = "fdr",
    detect.lim = 10 ^-6,
    plot.type = "quantile.box",
    panels = c("fc", "prevalence", "auroc"))
## ### WARNING: Not plotting to a pdf-file.
## ### The plot is optimized for landscape DIN-A4 (or similar) layout.
## ### Please make sure that your plotting region is large enough!!!
## ### Use at your own risk...
## Are you sure that you want to continue? (Yes/no/cancel)