The purpose of this helper function is to faciltate the creation of a MultiAssayExperiment object by detecting any inconsistencies with all types of names in either the ExperimentList, the colData, or sampleMap.

prepMultiAssay(ExperimentList, colData, sampleMap, ...)

Arguments

ExperimentList

A list of all combined experiments

colData

A DataFrame of the phenotype data for all participants

sampleMap

A DataFrame of sample identifiers, assay samples, and assay names

...

Optional arguments for the MultiAssayExperiment constructor function such as metadata and drops.

Value

A list containing all the essential components of a MultiAssayExperiment as well as a "drops" metadata element that indicates non-matched names. The names of the resulting list correspond to the arguments of the MultiAssayExperiment constructor function.

Checks

The prepMultiAssay function checks that all columns in the sampleMap are character.

It checks that all names and lengths match in both the ExperimentList and in the unique assay names of the sampleMap.

If ExperimentList names and assay names only differ by case and are not duplicated, the function will standardize all names to lowercase.

If names cannot be matched between the colname column of the sampleMap and the colnames of the ExperimentList, those unmatched will be dropped and found in the "drops" element of the resulting list.

Names in the "primary" column of the sampleMap, will be matched to those in the colData. Unmatched "primary" column rows will be dropped from the sampleMap. Suggestions for name fixes in either the ExperimentList or colnames will be made when necessary.

Examples

## Run example
example("MultiAssayExperiment")
#> 
#> MltAsE> ## Run the example ExperimentList
#> MltAsE> example("ExperimentList")
#> 
#> ExprmL> ## Create an empty ExperimentList instance
#> ExprmL> ExperimentList()
#> ExperimentList class object of length 0:
#>  
#> ExprmL> ## Create array matrix and AnnotatedDataFrame to create an ExpressionSet class
#> ExprmL> arraydat <- matrix(data = seq(101, length.out = 20), ncol = 4,
#> ExprmL+     dimnames = list(
#> ExprmL+         c("ENST00000294241", "ENST00000355076",
#> ExprmL+         "ENST00000383706","ENST00000234812", "ENST00000383323"),
#> ExprmL+         c("array1", "array2", "array3", "array4")
#> ExprmL+     ))
#> 
#> ExprmL> colDat <- data.frame(slope53 = rnorm(4),
#> ExprmL+     row.names = c("array1", "array2", "array3", "array4"))
#> 
#> ExprmL> ## SummarizedExperiment constructor
#> ExprmL> exprdat <- SummarizedExperiment::SummarizedExperiment(arraydat,
#> ExprmL+     colData = colDat)
#> 
#> ExprmL> ## Create a sample methylation dataset
#> ExprmL> methyldat <- matrix(data = seq(1, length.out = 25), ncol = 5,
#> ExprmL+     dimnames = list(
#> ExprmL+         c("ENST00000355076", "ENST00000383706",
#> ExprmL+           "ENST00000383323", "ENST00000234812", "ENST00000294241"),
#> ExprmL+         c("methyl1", "methyl2", "methyl3",
#> ExprmL+           "methyl4", "methyl5")
#> ExprmL+     ))
#> 
#> ExprmL> ## Create a sample RNASeqGene dataset
#> ExprmL> rnadat <- matrix(
#> ExprmL+     data = sample(c(46851, 5, 19, 13, 2197, 507,
#> ExprmL+         84318, 126, 17, 21, 23979, 614), size = 20, replace = TRUE),
#> ExprmL+     ncol = 4,
#> ExprmL+     dimnames = list(
#> ExprmL+         c("XIST", "RPS4Y1", "KDM5D", "ENST00000383323", "ENST00000234812"),
#> ExprmL+         c("samparray1", "samparray2", "samparray3", "samparray4")
#> ExprmL+     ))
#> 
#> ExprmL> ## Create a mock RangedSummarizedExperiment from a data.frame
#> ExprmL> rangedat <- data.frame(chr="chr2", start = 11:15, end = 12:16,
#> ExprmL+     strand = c("+", "-", "+", "*", "."),
#> ExprmL+     samp0 = c(0,0,1,1,1), samp1 = c(1,0,1,0,1), samp2 = c(0,1,0,1,0),
#> ExprmL+     row.names = c(paste0("ENST", "00000", 135411:135414), "ENST00000383323"))
#> 
#> ExprmL> rangeSE <- SummarizedExperiment::makeSummarizedExperimentFromDataFrame(rangedat)
#> 
#> ExprmL> ## Combine to a named list and call the ExperimentList constructor function
#> ExprmL> assayList <- list(Affy = exprdat, Methyl450k = methyldat, RNASeqGene = rnadat,
#> ExprmL+                 GISTIC = rangeSE)
#> 
#> ExprmL> ## Use the ExperimentList constructor
#> ExprmL> ExpList <- ExperimentList(assayList)
#> 
#> MltAsE> ## Create sample maps for each experiment
#> MltAsE> exprmap <- data.frame(
#> MltAsE+     primary = c("Jack", "Jill", "Barbara", "Bob"),
#> MltAsE+     colname = c("array1", "array2", "array3", "array4"),
#> MltAsE+     stringsAsFactors = FALSE)
#> 
#> MltAsE> methylmap <- data.frame(
#> MltAsE+     primary = c("Jack", "Jack", "Jill", "Barbara", "Bob"),
#> MltAsE+     colname = c("methyl1", "methyl2", "methyl3", "methyl4", "methyl5"),
#> MltAsE+     stringsAsFactors = FALSE)
#> 
#> MltAsE> rnamap <- data.frame(
#> MltAsE+     primary = c("Jack", "Jill", "Bob", "Barbara"),
#> MltAsE+     colname = c("samparray1", "samparray2", "samparray3", "samparray4"),
#> MltAsE+     stringsAsFactors = FALSE)
#> 
#> MltAsE> gistmap <- data.frame(
#> MltAsE+     primary = c("Jack", "Bob", "Jill"),
#> MltAsE+     colname = c("samp0", "samp1", "samp2"),
#> MltAsE+     stringsAsFactors = FALSE)
#> 
#> MltAsE> ## Combine as a named list and convert to a DataFrame
#> MltAsE> maplist <- list(Affy = exprmap, Methyl450k = methylmap,
#> MltAsE+     RNASeqGene = rnamap, GISTIC = gistmap)
#> 
#> MltAsE> ## Create a sampleMap
#> MltAsE> sampMap <- listToMap(maplist)
#> 
#> MltAsE> ## Create an example phenotype data
#> MltAsE> colDat <- data.frame(sex = c("M", "F", "M", "F"), age = 38:41,
#> MltAsE+     row.names = c("Jack", "Jill", "Bob", "Barbara"))
#> 
#> MltAsE> ## Create a MultiAssayExperiment instance
#> MltAsE> mae <- MultiAssayExperiment(experiments = ExpList, colData = colDat,
#> MltAsE+     sampleMap = sampMap)

## Check if there are any inconsistencies within the different names
preparedMAE <- prepMultiAssay(ExpList, colDat, sampMap)

## Results in a list of components for the MultiAssayExperiment constructor
## function
MultiAssayExperiment(preparedMAE$experiments, preparedMAE$colData,
preparedMAE$sampleMap)
#> A MultiAssayExperiment object of 4 listed
#>  experiments with user-defined names and respective classes.
#>  Containing an ExperimentList class object of length 4:
#>  [1] Affy: SummarizedExperiment with 5 rows and 4 columns
#>  [2] Methyl450k: matrix with 5 rows and 5 columns
#>  [3] RNASeqGene: matrix with 5 rows and 4 columns
#>  [4] GISTIC: RangedSummarizedExperiment with 5 rows and 3 columns
#> Functionality:
#>  experiments() - obtain the ExperimentList instance
#>  colData() - the primary/phenotype DataFrame
#>  sampleMap() - the sample coordination DataFrame
#>  `$`, `[`, `[[` - extract colData columns, subset, or experiment
#>  *Format() - convert into a long or wide DataFrame
#>  assays() - convert ExperimentList to a SimpleList of matrices
#>  exportClass() - save data to flat files

## Alternatively, use the do.call function
do.call(MultiAssayExperiment, preparedMAE)
#> A MultiAssayExperiment object of 4 listed
#>  experiments with user-defined names and respective classes.
#>  Containing an ExperimentList class object of length 4:
#>  [1] Affy: SummarizedExperiment with 5 rows and 4 columns
#>  [2] Methyl450k: matrix with 5 rows and 5 columns
#>  [3] RNASeqGene: matrix with 5 rows and 4 columns
#>  [4] GISTIC: RangedSummarizedExperiment with 5 rows and 3 columns
#> Functionality:
#>  experiments() - obtain the ExperimentList instance
#>  colData() - the primary/phenotype DataFrame
#>  sampleMap() - the sample coordination DataFrame
#>  `$`, `[`, `[[` - extract colData columns, subset, or experiment
#>  *Format() - convert into a long or wide DataFrame
#>  assays() - convert ExperimentList to a SimpleList of matrices
#>  exportClass() - save data to flat files