RaggedExperiment_Manuscript_Example.Rmd
https://github.com/LiNk-NY/RaggedExperiment_SoftNote cloned and updated from https://github.com/vjcitn/raggedchk
if (!require("BiocManager"))
install.packages("BiocManager")
BiocManager::install("RaggedExperiment")
library(RaggedExperiment)
library(curatedTCGAData)
library(GenomeInfoDb)
library(RaggedExperiment.SoftNote)
BRCA <- curatedTCGAData(
"BRCA", version = "2.0.1", assays = c("CNASeq", "Mutation"), dry.run = FALSE
)
BRCA
## A MultiAssayExperiment object of 2 listed
## experiments with user-defined names and respective classes.
## Containing an ExperimentList class object of length 2:
## [1] BRCA_CNASeq-20160128: RaggedExperiment with 5298 rows and 38 columns
## [2] BRCA_Mutation-20160128: RaggedExperiment with 90490 rows and 993 columns
## Functionality:
## experiments() - obtain the ExperimentList instance
## colData() - the primary/phenotype DataFrame
## sampleMap() - the sample coordination DataFrame
## `$`, `[`, `[[` - extract colData columns, subset, or experiment
## *Format() - convert into a long or wide DataFrame
## assays() - convert ExperimentList to a SimpleList of matrices
## exportClass() - save data to flat files
object_size(BRCA[["BRCA_CNASeq-20160128"]])
object_size(sparseAssay(BRCA[["BRCA_CNASeq-20160128"]], sparse = TRUE))
object_size(compactAssay(BRCA[["BRCA_CNASeq-20160128"]]))
object_size(sparseAssay(BRCA[["BRCA_CNASeq-20160128"]]))
object_size(BRCA[["BRCA_Mutation-20160128"]])
# object_size(sparseAssay(BRCA[["BRCA_Mutation-20160128"]], sparse = TRUE))
object_size(compactAssay(BRCA[["BRCA_Mutation-20160128"]]))
object_size(sparseAssay(BRCA[["BRCA_Mutation-20160128"]]))
curatedTCGAData
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
genes <- genes(txdb, single.strand.genes.only = FALSE)
genes <- keepStandardChromosomes(genes, pruning.mode = "coarse")
ugenes <- unlist(genes)
seqlevelsStyle
to UCSC
# https://gdac.broadinstitute.org/runs/analyses__2016_01_28/reports/cancer/BRCA-TP/CopyNumberLowPass_Gistic2/nozzle.html
re <- BRCA[["BRCA_CNASeq-20160128"]]
## possible bug -- seqlevelsStyle must go first, then genome
seqlevelsStyle(re) <- "UCSC"
genome(rowRanges(re)) <- "hg19"
rowRanges(re)
## GRanges object with 5298 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 10209-2583075 *
## [2] chr1 2583076-249240606 *
## [3] chr2 10002-243189359 *
## [4] chr3 60175-162511435 *
## [5] chr3 162511436-162626067 *
## ... ... ... ...
## [5294] chr20 60001-62965506 *
## [5295] chr21 9422166-48119869 *
## [5296] chr22 16051206-51244552 *
## [5297] chr23 2699503-116067549 *
## [5298] chr24 2649450-28784074 *
## -------
## seqinfo: 24 sequences from hg19 genome; no seqlengths
re2 <- BRCA[["BRCA_CNASeq-20160128"]]
genome(rowRanges(re2)) <- "hg19"
seqlevelsStyle(re2) <- "UCSC"
rowRanges(re2)
identical(re, re2)
#' [1] FALSE
ingenes <- subsetByOverlaps(re, ugenes)
object_size(compactAssay(ingenes))
object_size(sparseAssay(ingenes))
# https://gdac.broadinstitute.org/runs/analyses__2016_01_28/reports/cancer/BRCA-TP/CopyNumberLowPass_Gistic2/nozzle.html
mre <- BRCA[["BRCA_Mutation-20160128"]]
## possible bug -- seqlevelsStyle must go first, then genome
seqlevelsStyle(mre) <- "UCSC"
## Warning in (function (seqlevels, genome, new_style) : cannot switch some
## GRCh37's seqlevels from NCBI to UCSC style
genome(rowRanges(mre)) <- "hg19"
rowRanges(mre)
## GRanges object with 90490 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr10 116247760 +
## [2] chr12 43944926 +
## [3] chr3 85932472 +
## [4] chr2 25678299 +
## [5] chr17 40272381 +
## ... ... ... ...
## [90486] chr3 48299430 +
## [90487] chr19 52394623 +
## [90488] chr16 30537313 +
## [90489] chr19 35449130 +
## [90490] chr19 53994951 +
## -------
## seqinfo: 26 sequences from hg19 genome; no seqlengths
mingenes <- subsetByOverlaps(mre, ugenes)
object_size(compactAssay(mingenes))
object_size(sparseAssay(mingenes))
RTCGAToolbox
library(RTCGAToolbox)
getLinks("BRCA", CNASeq = TRUE)
## [1] "https://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/BRCA/20160128/gdac.broadinstitute.org_BRCA.Merge_cna__illuminahiseq_dnaseqc__hms_harvard_edu__Level_3__segmentation__seg.Level_3.2016012800.0.0.tar.gz"
BRCAseq <- getFirehoseData("BRCA", CNASeq = TRUE)
## RTCGAToolbox cache directory set to:
## /home/mr148/.cache/R/RTCGAToolbox
## Using locally cached version of /home/mr148/.cache/R/RTCGAToolbox/20160128-BRCA-Clinical.txt
## Using locally cached version of /home/mr148/.cache/R/RTCGAToolbox/20160128-BRCA-CNAseq.txt
cnatoolbox <- biocExtract(BRCAseq, "CNASeq")
## working on: CNASeq
seqlevelsStyle(cnatoolbox) <- "UCSC"
genome(cnatoolbox) <- "hg19"
rowRanges(cnatoolbox)
## GRanges object with 5298 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 10209-2583075 *
## [2] chr1 2583076-249240606 *
## [3] chr2 10002-243189359 *
## [4] chr3 60175-162511435 *
## [5] chr3 162511436-162626067 *
## ... ... ... ...
## [5294] chr20 60001-62965506 *
## [5295] chr21 9422166-48119869 *
## [5296] chr22 16051206-51244552 *
## [5297] chr23 2699503-116067549 *
## [5298] chr24 2649450-28784074 *
## -------
## seqinfo: 24 sequences from hg19 genome; no seqlengths
getLinks("BRCA", Mutation = TRUE)
## [1] "https://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/BRCA/20160128/gdac.broadinstitute.org_BRCA.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz"
BRCAmut <- getFirehoseData("BRCA", Mutation = TRUE)
## RTCGAToolbox cache directory set to:
## /home/mr148/.cache/R/RTCGAToolbox
## Using locally cached version of /home/mr148/.cache/R/RTCGAToolbox/20160128-BRCA-Clinical.txt
muttoolbox <- biocExtract(BRCAmut, "Mutation")
## working on: Mutation
seqlevelsStyle(muttoolbox) <- "UCSC"
## Warning in (function (seqlevels, genome, new_style) : cannot switch some
## GRCh37's seqlevels from NCBI to UCSC style
genome(muttoolbox) <- "hg19"
rowRanges(muttoolbox)
## GRanges object with 90490 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr10 116247760 +
## [2] chr12 43944926 +
## [3] chr3 85932472 +
## [4] chr2 25678299 +
## [5] chr17 40272381 +
## ... ... ... ...
## [90486] chr3 48299430 +
## [90487] chr19 52394623 +
## [90488] chr16 30537313 +
## [90489] chr19 35449130 +
## [90490] chr19 53994951 +
## -------
## seqinfo: 26 sequences from hg19 genome; no seqlengths
RTCGAToolbox
object_size(cnatoolbox)
object_size(BRCAseq@CNASeq)
object_size(sparseAssay(cnatoolbox, sparse = TRUE))
object_size(compactAssay(cnatoolbox))
object_size(sparseAssay(cnatoolbox))
object_size(muttoolbox)
object_size(BRCAmut@Mutation)
# object_size(sparseAssay(muttoolbox, sparse = TRUE)) # typeof character
object_size(compactAssay(muttoolbox))
object_size(sparseAssay(muttoolbox))
incnabox <- subsetByOverlaps(cnatoolbox, ugenes)
inmutbox <- subsetByOverlaps(muttoolbox, ugenes)
object_size(incnabox)
# NA
object_size(sparseAssay(incnabox, sparse = TRUE))
object_size(compactAssay(incnabox))
object_size(sparseAssay(incnabox))
object_size(inmutbox)
# NA
# object_size(sparseAssay(inmutbox, sparse = TRUE))
object_size(compactAssay(inmutbox))
object_size(sparseAssay(inmutbox))
Data Source | Assay | Data Type | RaggedExperiment | as.data.frame | sparse Matrix | matrix (reduced rows) | matrix (sparse) |
---|---|---|---|---|---|---|---|
curatedTCGAData | CNASeq | numeric | 0.2 MB | 0.3 MB | 0.3 MB | 1 MB | 1.9 MB |
curatedTCGAData | CNASeq (in genes) | numeric | 0.2 MB | 0.2 MB | 0.3 MB | 0.9 MB | 1.7 MB |
curatedTCGAData | Mutation | character | 70.6 MB | 71.8 MB | NA | 680.3 MB | 726.2 MB |
curatedTCGAData | Mutation (in genes) | character | 37.6 MB | 38.1 MB | NA | 351.3 MB | 375.5 MB |