Bearkland capstone DED 12.2.2024.Rmd
# Set a CRAN mirror
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("xfun")
## Installing package into '/home/runner/work/_temp/Library'
## (as 'lib' is unspecified)
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("devtools", "tidyverse", "kableExtra"))
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com
## Bioconductor version 3.20 (BiocManager 1.30.25), R 4.4.2 (2024-10-31)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'devtools' 'tidyverse' 'kableExtra'
## Installation paths not writeable, unable to update packages
## path: /opt/R/4.4.2/lib/R/library
## packages:
## class, cluster, foreign, KernSmooth, MASS, Matrix, nlme, nnet, rpart,
## spatial, survival
## Old packages: 'pak'
BiocManager::install(c("waldronlab/bugSigSimple", "waldronlab/BugSigDBStats", "waldronlab/bugsigdbr"))
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com
## Bioconductor version 3.20 (BiocManager 1.30.25), R 4.4.2 (2024-10-31)
## Installing github package(s) 'waldronlab/bugSigSimple',
## 'waldronlab/BugSigDBStats', 'waldronlab/bugsigdbr'
## Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
## Skipping install of 'bugSigSimple' from a github remote, the SHA1 (ef75ae54) has not changed since last install.
## Use `force = TRUE` to force installation
## Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
## Skipping install of 'BugSigDBStats' from a github remote, the SHA1 (37341f83) has not changed since last install.
## Use `force = TRUE` to force installation
## Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
## Skipping install of 'bugsigdbr' from a github remote, the SHA1 (3d187dcd) has not changed since last install.
## Use `force = TRUE` to force installation
## Installation paths not writeable, unable to update packages
## path: /opt/R/4.4.2/lib/R/library
## packages:
## class, cluster, foreign, KernSmooth, MASS, Matrix, nlme, nnet, rpart,
## spatial, survival
## Old packages: 'pak'
suppressPackageStartupMessages({
library(bugSigSimple)
library(BugSigDBStats)
library(bugsigdbr)
library(tidyverse)
library(stringr)
library(kableExtra)
library(dplyr)
library(magrittr)
library(boot)
})
use version=“devel” and cache = FALSE to take the latest version from bugsigdb.org
dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE)
dim(dat)
Examine the data pulled in to view variable names
names(dat)
Subset to include only PMID numbers of interest
included.pmid <-
c(
31463790,
32931939,
37586456,
37762390,
34900990,
37803284,
35350577,
37026303,
32694705,
38111925
)
subset.dat <-
dat[dat$PMID %in% included.pmid,]
dim(subset.dat)
Confirm all studies included
subset.dat
Examine group 0 names
unique(subset.dat$`Group 0 name`)
Examine group 1 names
unique(subset.dat$`Group 1 name`)
To omit the double comparison in Study 856 by removing the ANOSIM analysis
Final subset to include only those Group 0 names of interest (controls with no DED) and only those Group 1 names of interest (some variation on DED)
subset.final <- subset.dat[which(subset.dat$`Group 0 name` %in% c("Healthy Control", "Control", "control", "healthy controls", "Normal Control (NC)","Healthy control", "Healthy Controls","Normal healthy (NDM) children", "Healthy controls")& subset.dat$`Group 1 name`%in% c("Meibomian Gland Dysfunction + Lacrimal Dysfunction", "Meibomian Gland Dysfunction", "Dry Eye","ADDE", "DED patients", "MGD", "Meibomian Gland Dysfunction (MGD)", "Meibomian Gland Dysfunction (MGD) DED", "Meibomian Gland Dysfunction (MGD) Groups", "Mixed DED", "Sjogrens Syndrome Dry Eye (SSDE)", "Non Sjogrens Syndrome Dry Eye (NSSDE)", "Dry Eye Disease patients", "Sjogren's patients with Dry Eye Disease", "Dry Eye Disease patients without Sjogrens", "Diabetic children with Dry Eye Disease (DM-DE)", "Mild Dry Eye", "Mild and Moderate to Severe Dry Eye")),]
subset.final
dim(subset.final)
These are the studies included in the review
bugSigSimple::createStudyTable(subset.final)|> kableExtra::kbl()
This table summarizes the results for the identified taxa.
# Install and load necessary packages
install.packages("openxlsx")
## Installing package into '/home/runner/work/_temp/Library'
## (as 'lib' is unspecified)
library(openxlsx)
library(knitr)
# Create and format the table
taxon_table <- bugSigSimple::createTaxonTable(subset.final, n = 2000)
## Warning: Expected 7 pieces. Additional pieces discarded in 4 rows [112,
## 204, 205, 207].
styled_table <- kable_styling(kbl(taxon_table))
# Create a new workbook
wb <- createWorkbook()
# Add a worksheet
addWorksheet(wb, "Taxon Table")
# Write the data to the worksheet
writeData(wb, "Taxon Table", taxon_table)
# Save the workbook
saveWorkbook(wb, "TaxonTable.xlsx", overwrite = TRUE)
install.packages("writexl")
## Installing package into '/home/runner/work/_temp/Library'
## (as 'lib' is unspecified)
install.packages("ontologyIndex")
## Installing package into '/home/runner/work/_temp/Library'
## (as 'lib' is unspecified)
efo<-getOntology("efo")
## Loading required namespace: ontologyIndex
## Using cached version from 2025-01-30 21:55:38
efo
Top 1000 taxa decreased in relation to the control
mostfreqdec<-bugSigSimple::getMostFrequentTaxa(subset.final, n=1000,sig.type = "decreased")
mostfreqdec
Top 1000 taxa increased in relation to the control
mostfreqinc<-bugSigSimple::getMostFrequentTaxa(subset.final,n=1000, sig.type = "increased")
mostfreqinc
To obtain signatures
allsigs <- bugsigdbr::getSignatures(subset.final, tax.id.type = "taxname")
allsigs <- allsigs[sapply(allsigs, length) > 0] #require length > 0
dim(allsigs)
To obtain signature lengths displayed in a bar graph and import to a word document
# Install and load necessary packages
library(ggplot2)
# Create the bar graph
# Calculate the lengths of signatures
siglengths <- sapply(allsigs, length)
# Create a dataframe
siglengths.df <- data.frame(siglengths = siglengths)
# Plot the bar graph with Y-axis label
plot <- ggplot(siglengths.df, aes(x = siglengths)) +
geom_bar() +
labs(y = "Number of signatures", x = "Signature Lengths")
# Display the plot
print(plot)
# Save the plot to a file
ggsave("bar_graph.png", plot)
## Saving 7.29 x 4.51 in image
Table of signature lengths
table(siglengths)
Calculate pairwise overlap and obtain Jaccard value to determine similarity of signatures
mydists <- BugSigDBStats::calcPairwiseOverlaps(allsigs)
dim(mydists)
library(grid)
library(ComplexHeatmap)
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
jmat <- BugSigDBStats::calcJaccardSimilarity(allsigs)
# Truncate row names at the first underscore
rownames(jmat) <- sub("_.+$", "", rownames(jmat))
# Truncate column names at the first underscore
colnames(jmat) <- sub("_.+$", "", colnames(jmat))
# Check the truncated names
cat("Truncated row names: ", rownames(jmat), "\n")
cat("Truncated column names: ", colnames(jmat), "\n")
# Ensure row_labels and column_labels lengths match the matrix dimensions
row_labels <- rownames(jmat)
col_labels <- colnames(jmat)
# Verify that lengths match
cat("Length of row_labels: ", length(row_labels), "\n")
cat("Length of col_labels: ", length(col_labels), "\n")
# Create custom annotations with line breaks
ha <- HeatmapAnnotation(
`Signature\nLength` = anno_barplot(siglengths, gp = gpar(fill = 4:13),
annotation_name_gp = gpar(fontsize = 10, lineheight = 0.8))
)
hr <- rowAnnotation(
`Signature\nLength` = anno_barplot(siglengths, gp = gpar(fill = 4:13),
annotation_name_gp = gpar(fontsize = 10, lineheight = 0.8))
)
# Adjust heatmap dimensions and settings
hm <- Heatmap(
jmat,
top_annotation = ha,
left_annotation = hr,
row_names_max_width = unit(10, "cm"), # Adjusted width
column_names_max_height = unit(3, "cm"), # Adjusted height for column names
row_labels = row_labels,
column_labels = col_labels,
column_title = "Fig 3 Heatmap of Similarity of Signatures Between Studies", # Title of the heatmap
column_title_gp = gpar(fontsize = 15), # Adjust font size for title
heatmap_legend_param = list(
title = "Legend",
direction = "horizontal" # Set legend direction to horizontal
) # Customizing legend
)
# Draw the heatmap
draw(hm,
heatmap_legend_side = "bottom",
annotation_legend_side = "right")
# Save the heatmap as an image
png("heatmap.png", width = 12, height = 8, units = "in", res = 300)
draw(hm,
heatmap_legend_side = "bottom",
annotation_legend_side = "right")
dev.off()
## ========================================
## InteractiveComplexHeatmap version 1.14.0
## Bioconductor page: http://bioconductor.org/packages/InteractiveComplexHeatmap/
## Github page: https://github.com/jokergoo/InteractiveComplexHeatmap
##
## If you use it in published research, please cite:
## Gu, Z. Make Interactive Complex Heatmaps in R, 2021, Bioinformatics
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(InteractiveComplexHeatmap))
## ========================================
hm <- draw(hm)
htShiny(hm)
library(ComplexHeatmap)
library(grid)
# Assuming jmat and other variables are already defined as before
# Perform hierarchical clustering
hc <- hclust(dist(jmat))
plot(hc)
# Save the cluster map as an image
png("cluster_map.png", width = 12, height = 8, units = "in", res = 300)
plot(hc, main = "Hierarchical Cluster Map of Signatures", xlab = "", sub = "", cex = 0.6)
dev.off()
#all taxon increased and decreased freq
allfreqs <- bugSigSimple::createTaxonTable(subset.final, n = 500) %>% #could change number
arrange(I(decreased_signatures - increased_signatures))
## Warning: Expected 7 pieces. Additional pieces discarded in 4 rows [112,
## 204, 205, 207].