library(curatedTCGAData)
library(TCGAutils)
library(RaggedExperiment)
library(RaggedExperiment.SoftNote)
cnvdry <-
  curatedTCGAData(assays = "CNVSNP",
                  version = "2.0.1",
                  dry.run = TRUE)
## snapshotDate(): 2023-03-13
## See '?curatedTCGAData' for 'diseaseCode' and 'assays' inputs
nrow(cnvdry)
## [1] 33
sum(as.numeric(gsub(" Mb", "", cnvdry$file_size, fixed = TRUE)))
## [1] 26.4
cnv <-
  curatedTCGAData("BRCA", assays = "CNVSNP",
                  version = "2.0.1",
                  dry.run = FALSE)
## snapshotDate(): 2023-03-13
## Working on: BRCA_CNVSNP-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_colData-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_metadata-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_sampleMap-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## harmonizing input:
##   removing 13386 sampleMap rows not in names(experiments)
mutsdry <-
  curatedTCGAData(assays = "Mutation",
                  version = "2.0.1",
                  dry.run = TRUE)
## snapshotDate(): 2023-03-13
## See '?curatedTCGAData' for 'diseaseCode' and 'assays' inputs
nrow(mutsdry)
## [1] 32
sum(as.numeric(sub(" Mb", "", mutsdry$file_size)))
## [1] 96.5
muts <-
  curatedTCGAData(diseaseCode = "BRCA", assays = "Mutation",
                  version = "2.0.1",
                  dry.run = FALSE)
## snapshotDate(): 2023-03-13
## Working on: BRCA_Mutation-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_colData-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_metadata-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## Working on: BRCA_sampleMap-20160128
## see ?curatedTCGAData and browseVignettes('curatedTCGAData') for documentation
## loading from cache
## harmonizing input:
##   removing 14592 sampleMap rows not in names(experiments)
##   removing 121 colData rownames not in sampleMap 'primary'
mutsexplist <- experiments(muts)
length(mutsexplist)
## [1] 1
sum(sapply(mutsexplist, ncol))
## [1] 993
print(object.size(mutsexplist), units = "auto")
## 67.4 Mb
system.time(cnvsimp <- simplifyTCGA(cnv[, , "BRCA_CNVSNP-20160128"]))
## 
##   403 genes were dropped because they have exons located on both strands
##   of the same reference sequence or on more than one reference sequence,
##   so cannot be represented by a single genomic range.
##   Use 'single.strand.genes.only=FALSE' to get all the genes in a
##   GRangesList object, or use suppressMessages() to suppress this message.
## Warning in (function (seqlevels, genome, new_style) : cannot switch some hg19's
## seqlevels from UCSC to NCBI style
## 'select()' returned 1:1 mapping between keys and columns
## Warning in .normarg_seqlevelsStyle(value): more than one seqlevels style
## supplied, using the 1st one only
## Warning in (function (seqlevels, genome, new_style) : cannot switch hg19's
## seqlevels from UCSC to NCBI style
## Warning: 'experiments' dropped; see 'metadata'
## harmonizing input:
##   removing 2199 sampleMap rows not in names(experiments)
##    user  system elapsed 
## 169.025   8.955 179.228
object_size(cnv[["BRCA_CNVSNP-20160128"]])
## [1] "8.6 MB"
object_size(cnvsimp[["BRCA_CNVSNP-20160128_simplified"]])
## [1] "406.9 MB"