## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ## ----eval = FALSE------------------------------------------------------------- # ## Install BiocManager, which is required to install packages from Bioconductor # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install(version = "devel") # BiocManager::install("TENET") ## ----eval = FALSE------------------------------------------------------------- # ## Install prerequisite packages to install the development version from GitHub # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # if (!requireNamespace("remotes", quietly = TRUE)) { # install.packages("remotes") # } # # BiocManager::install(version = "devel") # BiocManager::install("rhielab/TENET.ExperimentHub") # BiocManager::install("rhielab/TENET") ## ----message = FALSE---------------------------------------------------------- library(TENET) ## ----message = FALSE---------------------------------------------------------- library(TENET.ExperimentHub) ## ----message = FALSE---------------------------------------------------------- library(TENET.AnnotationHub) ## ----message = FALSE---------------------------------------------------------- ## Load the MultiAssayExperiment package. This is not strictly necessary, but ## allows the user to avoid typing MultiAssayExperiment:: before its functions. library(MultiAssayExperiment) ## ----------------------------------------------------------------------------- ## Load in the example MultiAssayExperiment dataset from the TENET.ExperimentHub ## package exampleTENETMultiAssayExperiment <- TENET.ExperimentHub::exampleTENETMultiAssayExperiment() ## Examine the SummarizedExperiments that should be contained in a ## MultiAssayExperiment object appropriate for use in TENET analyses, including ## the "expression" object experiments(exampleTENETMultiAssayExperiment) class( experiments( exampleTENETMultiAssayExperiment )[["expression"]] ) ## Examine data for the first 6 genes and samples in the assay object of the ## "expression" SummarizedExperiment object assays( experiments( exampleTENETMultiAssayExperiment )[["expression"]] )[[1]][ seq_len(6), seq_len(6) ] ## ----------------------------------------------------------------------------- ## Examine the rowRanges of the "expression" SummarizedExperiment object for the ## first genes. ## Note: Additional columns are included here, but only the chromosome ## (seqnames), coordinates (rowRanges), strand, and geneName columns are used. head( rowRanges( experiments( exampleTENETMultiAssayExperiment )[["expression"]] ) ) ## The names of this object are the gene IDs head( names( rowRanges( experiments( exampleTENETMultiAssayExperiment )[["expression"]] ) ) ) ## ----------------------------------------------------------------------------- ## Again, examine the SummarizedExperiments included in the MultiAssayExperiment ## object, focusing on the "methylation" object here experiments(exampleTENETMultiAssayExperiment) class( experiments( exampleTENETMultiAssayExperiment )[["methylation"]] ) ## Examine data for the first six RE DNA methylation sites and samples in the ## assay object of the "methylation" SummarizedExperiment object assays( experiments( exampleTENETMultiAssayExperiment )[["methylation"]] )[[1]][ seq_len(6), seq_len(6) ] ## ----------------------------------------------------------------------------- ## Examine the rowRanges of the "methylation" SummarizedExperiment object for ## the first six RE DNA methylation sites. ## Note: Additional columns are included here, but only the chromosome ## (seqnames) and coordinates (ranges) are used. head( rowRanges( experiments( exampleTENETMultiAssayExperiment )[["methylation"]] )[, seq_len(6)] ) ## The names of this object are the RE DNA methylation site IDs (usually probe ## IDs) head( names( rowRanges( experiments( exampleTENETMultiAssayExperiment )[["methylation"]] ) ) ) ## ----------------------------------------------------------------------------- ## Examine the number of rows in the colData of the MultiAssayExperiment object ## compared to the number of samples (columns) in the "expression" and ## "methylation" summarized experiment objects. The number of patient entries ## does not need to match the number of samples included in the "expression" or ## "methylation" objects, as a single "Control" and "Case" sample could be ## derived from the same patient (though ideally, no more than one of each) nrow(colData(exampleTENETMultiAssayExperiment)) experiments(exampleTENETMultiAssayExperiment) ## Examine some of the rownames, which should contain a unique identifier for ## each patient. These will be used in the MultiAssayExperiment's sampleMap ## object to match them to the samples included in the "expression" and ## "methylation" objects rownames(colData(exampleTENETMultiAssayExperiment))[seq_len(6)] ## ----------------------------------------------------------------------------- ## The sampleMap object should contain a row for each of the samples included ## in both the "expression" and "methylation" objects nrow(sampleMap(exampleTENETMultiAssayExperiment)) experiments(exampleTENETMultiAssayExperiment) ## 4 columns of data should be included in the sampleMap, "assay", "primary", ## "colname", and "sampleType" colnames(sampleMap(exampleTENETMultiAssayExperiment)) ## The "assay" column should be a factor which lists which data object each ## sample is from ("expression", or "methylation) sampleMap(exampleTENETMultiAssayExperiment)$assay[seq_len(6)] levels(sampleMap(exampleTENETMultiAssayExperiment)$assay) table(sampleMap(exampleTENETMultiAssayExperiment)$assay) ## The "primary" column should note which of the patient IDs from the ## MultiAssayExperiment's colData object each sample maps to. sampleMap(exampleTENETMultiAssayExperiment)$primary[seq_len(6)] table( sampleMap(exampleTENETMultiAssayExperiment)$primary %in% rownames(colData(exampleTENETMultiAssayExperiment)) ) ## The "colname" column should include the sample names of each of the samples ## as they are listed in the colnames of either the "expression" or ## "methylation" SummarizedExperiments' assay objects sampleMap(exampleTENETMultiAssayExperiment)$colname[seq_len(6)] table( sampleMap(exampleTENETMultiAssayExperiment)$colname %in% c( colnames( assays( experiments( exampleTENETMultiAssayExperiment )[["expression"]] )[[1]] ), colnames( assays( experiments( exampleTENETMultiAssayExperiment )[["methylation"]] )[[1]] ) ) ) ## Finally, the "sampleType" column should list whether each sample in the ## "expression" or "methylation" SummarizedExperiment objects is a "Case" or ## "Control" sample for the purposes of TENET analyses. sampleMap(exampleTENETMultiAssayExperiment)$sampleType[seq_len(6)] table(sampleMap(exampleTENETMultiAssayExperiment)$sampleType) ## ----eval = FALSE------------------------------------------------------------- # ## This example first creates a dataset of putative enhancer regulatory elements # ## from consensus datasets and breast invasive carcinoma-relevant sources # ## collected in the TENET.AnnotationHub package, then runs the step 2 through # ## step 6 functions analyzing RE DNA methylation sites in potential # ## enhancer elements located over 1500 bp from transcription start sites # ## listed for genes and transcripts in the GENCODE v36 human genome # ## annotations (already contained in the exampleTENETMultiAssayExperiment # ## object loaded earlier), using a minimum case sample count of 5 and one # ## CPU core to perform the analysis. # exampleObject <- easyTENET( # TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment, # extHM = NA, # extNDR = NA, # publicEnhancer = TRUE, # publicNDR = TRUE, # cancerType = "BRCA", # ENCODEdELS = TRUE, # minCaseCount = 5 # ) # # ## The exampleObject should have data from the step 2 through step 6 functions, # ## as the dataset created by the step1MakeExternalDatasets function is used by # ## and saved in the output of the step2GetDifferentiallyMethylatedSites # ## function. # # ## See the types of data that were saved by the step 2 function # names(metadata(exampleObject)$step2GetDifferentiallyMethylatedSites) # # ## See the GRanges object created by the step 1 function # metadata( # exampleObject # )$step2GetDifferentiallyMethylatedSites$regulatoryElementGRanges # # ## See the types of data that were saved by the step 3 function # names(metadata(exampleObject)$step3GetAnalysisZScores) # # ## See the types of data that were saved by the step 4 function # names( # metadata(exampleObject)$step4SelectMostSignificantLinksPerDNAMethylationSite # ) # # ## See the types of data that were saved by the step 5 function # names(metadata(exampleObject)$step5OptimizeLinks) # # ## See the types of data that were saved by the step 6 function # names( # metadata( # exampleObject # )$step6DNAMethylationSitesPerGeneTabulation # ) ## ----------------------------------------------------------------------------- ## Create an example GRanges object representing putative enhancer regions for ## BRCA using all available enhancer-relevant BRCA datasets present in the ## TENET.ExperimentHub package. These datasets include consensus enhancer ## histone mark and open chromatin datasets from a wide variety of tissue and ## cell types, enhancer histone mark and open chromatin datasets from a ## variety of BRCA-relevant samples from the Cistrome database and TCGA, as well ## as identified distal enhancer regions from the ENCODE SCREEN project. step1Output <- step1MakeExternalDatasets( consensusEnhancer = TRUE, consensusNDR = TRUE, publicEnhancer = TRUE, publicNDR = TRUE, cancerType = "BRCA", ENCODEdELS = TRUE ) ## View the first regions in the created GRanges object head(step1Output) ## ----------------------------------------------------------------------------- ## Identify differentially methylated RE DNA methylation sites using the ## step2GetDifferentiallyMethylatedSites function, using the ## exampleTENETMultiAssayExperiment object loaded previously from the ## TENET.ExperimentHub package as a reference, and the GRanges object that was ## just created using the step1MakeExternalDatasets function. At least 5 case ## samples in the dataset are required to show methylation levels above/below ## the hyper/hypomethylation cutoff for a given RE DNA methylation site to be ## potentially considered differentially methylated. ## All transcription start sites (TSS) included in the rowRanges of the ## elementMetadata of the TENETMultiAssayExperiment object will be considered ## when selecting enhancer DNA methylation sites (which must be at least 1500 ## bp from these TSS). exampleObject <- step2GetDifferentiallyMethylatedSites( TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment, regulatoryElementGRanges = step1Output, minCaseCount = 5 ) ## See the data that were saved by the step 2 function names(metadata(exampleObject)$step2GetDifferentiallyMethylatedSites) ## Since cutoffs were set automatically by the step 2 function in this case, ## we can see what they are set to, using the hypomethylation cutoff as an ## example. metadata( exampleObject )$step2GetDifferentiallyMethylatedSites$hypomethCutoff ## The identities of all identified RE DNA methylation sites, as well as the ## methylated, unmethylated, and most importantly, hyper- and hypomethylated ## RE DNA methylation sites are also recorded in the siteIdentitiesList. To ## demonstrate this, view the first hypomethylated RE DNA methylation sites ## that were identified. head( metadata( exampleObject )$step2GetDifferentiallyMethylatedSites$siteIdentitiesList$ hypomethylatedSites ) ## ----------------------------------------------------------------------------- ## Identify significant Z-scores and initial RE DNA methylation site-gene links ## using the exampleTENETMultiAssayExperiment with results from the ## step2GetDifferentiallyMethylatedSites function. For this analysis, we will ## use the one-sample Z-score function, consider only TFs, rather than all ## genes, and save only significant Z-scores, to cut down on computational time ## and reduce the size of the returned MultiAssayExperiment object. Two CPU ## cores will be used if they exist (except on Windows where the ## parallel::mclapply function is not supported). exampleObject <- step3GetAnalysisZScores( TENETMultiAssayExperiment = exampleObject, pValue = 0.05, TFOnly = TRUE, zScoreCalculation = "oneSample", hypermethAnalysis = TRUE, hypomethAnalysis = TRUE, includeControl = FALSE, sparseResults = TRUE, coreCount = min( parallel::detectCores(), ifelse(.Platform$OS.type != "windows", 2, 1) ) ) ## See the data that were saved by the step 3 function. They are subdivided into ## hypermeth and/or hypometh results based on function options. names( metadata( exampleObject )$step3GetAnalysisZScores ) ## Since the sparseResults argument was set to TRUE, only ## significant Z-scores are saved, and since the TFOnly argument was also set ## to TRUE, only TF genes were analyzed. ## View the significant Z scores for the first TF genes with links to ## hypomethylated RE DNA methylation sites. head( metadata( exampleObject )$step3GetAnalysisZScores$hypomethResults ) ## ----------------------------------------------------------------------------- ## Get the 25 (if present) most significant links per RE DNA methylation site ## identified by the step3GetAnalysisZScores function exampleObject <- step4SelectMostSignificantLinksPerDNAMethylationSite( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = TRUE, hypomethGplusAnalysis = TRUE, linksPerREDNAMethylationSiteMaximum = 25 ) ## See the data that were saved by the step 4 function. They are subdivided into ## hypermeth and/or hypometh results based on function options. names( metadata(exampleObject)$step4SelectMostSignificantLinksPerDNAMethylationSite ) ## View the results for the most significant links to the hypomethylated RE ## DNA methylation sites head( metadata( exampleObject )$step4SelectMostSignificantLinksPerDNAMethylationSite$hypomethGplusResults ) ## ----------------------------------------------------------------------------- ## Identify final optimized RE DNA methylation site-gene links exampleObject <- step5OptimizeLinks( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = TRUE, hypomethGplusAnalysis = TRUE, expressionPvalCutoff = 0.05 ) ## See the data that were saved by the step 5 function. They are again ## subdivided into hypermeth and/or hypometh results based on function options. names(metadata(exampleObject)$step5OptimizeLinks) ## Check the results, which include various metrics used to priortize the ## optimized final RE DNA methylation site-gene links. ## This is a subsection of the data frame detailing all the hypomethylated RE ## DNA methylation site-gene links as an example. head( metadata( exampleObject )$step5OptimizeLinks$hypomethGplusResults ) ## ----------------------------------------------------------------------------- ## Prioritize the top genes by adding up the number of RE DNA methylation sites ## linked to each of the genes exampleObject <- step6DNAMethylationSitesPerGeneTabulation( TENETMultiAssayExperiment = exampleObject ) ## See the data that were saved by the step 6 function. They are again ## subdivided into hypermeth and/or hypometh results based on function options. names( metadata( exampleObject )$step6DNAMethylationSitesPerGeneTabulation ) ## Check the results, which include a count of the RE DNA methylation sites per ## gene, and is organized by decreasing RE DNA methylation site count. ## This is a subsection of the data frame detailing the number of hypomethylated ## RE DNA methylation site links to the top TFs. head( metadata( exampleObject )$step6DNAMethylationSitesPerGeneTabulation$hypomethGplusResults ) ## ----eval = FALSE------------------------------------------------------------- # ## Download a TCGA BRCA dataset with log2-normalized # ## FPKM-UQ expression values from tumor and adjacent normal tissue samples # ## with matching expression and methylation data and keeping only one tumor # ## sample from each patient. Additionally, survival data will be combined # ## from three clinical datasets downloaded by TCGAbiolinks. Raw data files # ## will be saved to the working directory, and the processed dataset will # ## be returned as a variable. # TCGADataset <- TCGADownloader( # rawDataDownloadDirectory = ".", # TCGAStudyAbbreviation = "BRCA", # RNASeqWorkflow = "STAR - FPKM-UQ", # RNASeqLog2Normalization = TRUE, # matchingExpAndMetSamples = TRUE, # clinicalSurvivalData = "combined", # outputFile = NA # ) ## ----eval = FALSE------------------------------------------------------------- # ## Cache all online datasets required by optional TENET features. # ## This function takes no arguments and returns NULL. # TENETCacheAllData() ## ----------------------------------------------------------------------------- ## Load the exampleTENETClinicalDataFrame object from the TENET.ExperimentHub ## package. It contains copy number variation (CNV), somatic mutation (SM), ## and purity data for the top 10 TFs by linked hypomethylated RE ## DNA methylation sites in the exampleTENETMultiAssayExperiment object. exampleTENETClinicalDataFrame <- TENET.ExperimentHub::exampleTENETClinicalDataFrame() CNVData <- subset(exampleTENETClinicalDataFrame, select = grepl("_CNV$", colnames(exampleTENETClinicalDataFrame)) ) SMData <- subset(exampleTENETClinicalDataFrame, select = grepl("_SM$", colnames(exampleTENETClinicalDataFrame)) ) purityData <- subset(exampleTENETClinicalDataFrame, select = "purity") ## ----------------------------------------------------------------------------- ## Show the CNV data for the first 4 TFs head(CNVData[, 1:4]) ## ----------------------------------------------------------------------------- ## Show the SM data for the first 4 TFs head(SMData[, 1:4]) ## ----------------------------------------------------------------------------- ## Show the first few rows of the purity data head(purityData) ## ----------------------------------------------------------------------------- ## Create complex scatterplots using the previously acquired data. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for TFs will be ## skipped is displayed. exampleObject <- step7ExpressionVsDNAMethylationScatterplots( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, purityData = purityData, SMData = SMData, CNVData = CNVData, simpleOrComplex = "complex", topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7ExpressionVsDNAMethylationScatterplots list. ## For each analysis type, results are included in sub-lists, each of which ## contains results for topGenes and topTFs, unless the top genes are ## all TFs, in which case the separate top TFs output is skipped. names( metadata( exampleObject )$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$topGenes ) ## For each gene, scatterplots are generated showing the expression of that ## gene and the methylation of each RE DNA methylation site linked to it for ## the given analysis type. head( names( metadata( exampleObject )$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$ topGenes$ENSG00000124664 ) ) ## ----fig.width=10, fig.height=7----------------------------------------------- metadata( exampleObject )$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$ topGenes$ENSG00000124664$cg25731211 ## ----fig.width=10, fig.height=7----------------------------------------------- ## Run the step7LinkedDNAMethylationSiteCountHistograms function. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. exampleObject <- step7LinkedDNAMethylationSiteCountHistograms( TENETMultiAssayExperiment = exampleObject, hypomethGplusAnalysis = TRUE, hypermethGplusAnalysis = FALSE ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7LinkedDNAMethylationSiteCountHistograms list. ## For each analysis type, histograms are included in sub-lists, each of which ## contains results for topGenes and topTFs, unless the top genes are all TFs, ## in which case the separate top TFs output is skipped. ## Display the histogram. Note the relatively small number of top TF genes with ## larger numbers of linked RE DNA methylation sites. metadata( exampleObject )$step7LinkedDNAMethylationSiteCountHistograms$hypomethGplusResults$topGenes ## ----------------------------------------------------------------------------- ## View the first few available motifs for the FOXM1 TF head(names(MotifDb::query(MotifDb::MotifDb, "FOXA1"))) ## View the first few available motifs for the ESR1 TF head(names(MotifDb::query(MotifDb::MotifDb, "ESR1"))) ## ----------------------------------------------------------------------------- ## The human HOCOMOCO v11 core motif is the 3rd listed for FOXA1, and 4th for ## ESR1 TFMotifList <- list( "FOXA1" = MotifDb::query(MotifDb::MotifDb, "FOXA1")[[3]], "ESR1" = MotifDb::query(MotifDb::MotifDb, "ESR1")[[4]] ) TFMotifList ## ----eval = FALSE------------------------------------------------------------- # exampleObject <- step7LinkedDNAMethylationSitesMotifSearching( # TENETMultiAssayExperiment = exampleObject, # TFMotifList = TFMotifList, # matchPWMMinScore = "80%" # ) # # ## For each analysis type and TF, a seqLogo diagram of the chosen PWM and two # ## data frames with information on the found motifs in the vicinity of RE # ## DNA methylation sites, and total motifs found per RE DNA methylation site # ## linked to those TFs, respectively, are saved in the metadata of the returned # ## MultiAssayExperiment object under the # ## step7LinkedDNAMethylationSitesMotifSearching list # names( # metadata( # exampleObject # )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$FOXA1 # ) # # ## View the motif occurrences near hypomethylated RE DNA methylation sites # ## linked to the FOXA1 TF # head( # metadata( # exampleObject # )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$ # FOXA1$DNAMethylationSiteMotifOccurrences # ) # # ## Also see the total number of motifs found in the vicinity of each RE DNA # ## methylation site # head( # metadata( # exampleObject # )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$ # FOXA1$totalMotifOccurrencesPerREDNAMethylationSite # ) ## ----------------------------------------------------------------------------- head( metadata( exampleObject )$step6DNAMethylationSitesPerGeneTabulation$hypomethGplusResults ) ## ----------------------------------------------------------------------------- DNAMethylationSites <- subset( metadata( exampleObject )$step5OptimizeLinks$hypomethGplusResults, geneID == "ENSG00000129514" )$DNAMethylationSiteID head(DNAMethylationSites) ## ----------------------------------------------------------------------------- exampleObject <- step7SelectedDNAMethylationSitesCaseVsControlBoxplots( TENETMultiAssayExperiment = exampleObject, DNAMethylationSites = DNAMethylationSites ) ## Each plot is saved under the ID of the RE DNA methylation site and included ## in the metadata of the returned MultiAssayExperiment object under the ## step7SelectedDNAMethylationSitesCaseVsControlBoxplots list head(names( metadata( exampleObject )$step7SelectedDNAMethylationSitesCaseVsControlBoxplots )) ## ----fig.width=10, fig.height=7----------------------------------------------- ## Note: There may be a warning for "rows containing non-finite values" if there ## are any samples lacking methylation data for the RE DNA methylation site. metadata( exampleObject )$step7SelectedDNAMethylationSitesCaseVsControlBoxplots$cg13776095 ## ----------------------------------------------------------------------------- ## Calculate potential link status for case samples for hypomethylated RE DNA ## methylation site-gene links exampleObject <- step7StatesForLinks( TENETMultiAssayExperiment = exampleObject, hypomethGplusAnalysis = TRUE, hypermethGplusAnalysis = FALSE ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7StatesForLinks list. ## A single data frame is returned for each analysis type, with the combined RE ## DNA methylation site ID and gene ID from each link in the rows, and each case ## sample in the columns. dim( metadata( exampleObject )$step7StatesForLinks$hypomethGplusResults ) ## Show the results for the first 6 case samples. 1 indicates that a given case ## sample might harbor a given RE DNA methylation site-gene link, and 0 ## indicates that it does not. NA values are shown for samples that lack ## methylation data for the site or expression data for the gene. head( metadata( exampleObject )$step7StatesForLinks$hypomethGplusResults[ , seq_len(6) ] ) ## ----------------------------------------------------------------------------- ## Run the step7TopGenesCaseVsControlExpressionBoxplots function. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. exampleObject <- step7TopGenesCaseVsControlExpressionBoxplots( TENETMultiAssayExperiment = exampleObject, hypomethGplusAnalysis = TRUE, hypermethGplusAnalysis = FALSE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesCaseVsControlExpressionBoxplots list. ## For each analysis type, results are included in sub-lists, each of which ## contains lists with results for topGenes and topTFs, unless the ## top genes are all TFs, in which case the separate top TFs output is skipped. ## Each boxplot is saved under its gene ID. names( metadata( exampleObject )$step7TopGenesCaseVsControlExpressionBoxplots$ hypomethGplusResults$topGenes ) ## ----fig.width=10, fig.height=7----------------------------------------------- ## Note: There may be a warning for "rows containing non-finite values" if there ## are any samples lacking expression data for the given gene. metadata( exampleObject )$step7TopGenesCaseVsControlExpressionBoxplots$hypomethGplusResults$ topGenes$ENSG00000107485 ## ----fig.width=8, fig.height=8, out.width="80%", out.height="80%"------------- ## Run the step7TopGenesCircosPlots function exampleObject <- step7TopGenesCircosPlots( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesCircosPlots list. ## For each analysis type, results are included in sub-lists, each ## of which contains lists with results for topGenes and topTFs, unless the ## top genes are all TFs, in which case the separate top TFs output is skipped. ## Each Circos plot is saved under its gene ID. ## Note: Since we performed analyses only using TFs in the step 3 function, ## the top genes are all TFs, so topTFs will be NA here. names( metadata( exampleObject )$step7TopGenesCircosPlots$hypomethGplusResults$topGenes ) ## Display an example Circos plot for ENSG00000091831 (ESR1). ## Note: Plots may take some time to display. metadata( exampleObject )$step7TopGenesCircosPlots$hypomethGplusResults$topGenes$ENSG00000091831 ## ----fig.width=10, fig.height=10.5, out.width="80%", out.height="80%"--------- ## Run the step7TopGenesExpressionCorrelationHeatmaps function exampleObject <- step7TopGenesExpressionCorrelationHeatmaps( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesExpressionCorrelationHeatmaps list. ## For each analysis type, results are included in sub-lists, each ## of which contains lists with results for the topGenes and topTFs, unless ## the top genes are all TFs, in which case the separate top TFs output is ## skipped. For each of these, the heatmap is generated along with a data frame ## with the correlation values displayed in the heatmap. ## Display the mirrored heatmap. ## Note: Since we performed analyses only using TFs in the step 3 function, ## the top genes are all TFs, so topTFs will be NA here. metadata( exampleObject )$step7TopGenesExpressionCorrelationHeatmaps$hypomethGplusResults$ topGenes$heatmap ## Display the data frame with correlation values head( metadata( exampleObject )$step7TopGenesExpressionCorrelationHeatmaps$hypomethGplusResults$ topGenes$correlationMatrix ) ## ----------------------------------------------------------------------------- ## Run the step7TopGenesDNAMethylationHeatmaps function exampleObject <- step7TopGenesDNAMethylationHeatmaps( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesDNAMethylationHeatmaps list. ## For each analysis type, results are included in sub-lists, each ## of which contains heatmaps for the topGenes and topTFs, unless the ## top genes are all TFs, in which case the separate top TFs output is skipped. ## Note: Since we performed analyses only using TFs in the step 3 function, ## the top genes are all TFs, so topTFs will be NA here. names(metadata( exampleObject )$step7TopGenesDNAMethylationHeatmaps$hypomethGplusResults) ## ----fig.width=20, fig.height=14, out.width="80%", out.height="80%"----------- ## Run the step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps function exampleObject <- step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps ## list. For each analysis type, results are included in sub-lists, each of ## which contains lists with results for the topGenes and topTFs, unless ## the top genes are all top TFs, in which case the separate top TFs output is ## skipped. For each of these, the heatmap is generated along with a data frame ## with the correlation values displayed in the heatmap. ## Display the binary heatmap. ## Note: Since we performed analyses only using TFs in the step 3 function, ## the top genes are all TFs, so topTFs will be NA here. metadata( exampleObject )$step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps$ hypomethGplusResults$topGenes$heatmap ## Display a subset of the data frame noting the presence/absence of links. ## 1 indicates a link, while 0 indicates no link. head( metadata( exampleObject )$step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps$ hypomethGplusResults$topGenes$linkTable[ , seq_len(6) ] ) ## ----------------------------------------------------------------------------- ## Load the exampleTENETClinicalDataFrame object from the TENET.ExperimentHub ## package. It contains the vital_status (survival status) and time (survival ## time) data for each sample in the exampleTENETMultiAssayExperiment exampleTENETClinicalDataFrame <- TENET.ExperimentHub::exampleTENETClinicalDataFrame() vitalStatusData <- subset( exampleTENETClinicalDataFrame, select = "vital_status" ) survivalTimeData <- subset(exampleTENETClinicalDataFrame, select = "time") ## ----------------------------------------------------------------------------- ## Show the vital status data head(vitalStatusData) ## Show the survival time data head(survivalTimeData) ## ----fig.width=10, fig.height=7----------------------------------------------- ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. exampleObject <- step7TopGenesSurvival( TENETMultiAssayExperiment = exampleObject, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, vitalStatusData = vitalStatusData, survivalTimeData = survivalTimeData, topGeneNumber = 10, generatePlots = TRUE ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesSurvival list. ## For each analysis type, results are included in sub-lists, each ## of which contains lists with results for topGenes and topTFs, unless the ## top genes are all TFs, in which case the separate top TFs output is skipped. ## Each includes two data frames with the survival statistics for Kaplan-Meier ## and Cox regression survival analyses, and if the generatePlots ## argument is TRUE, topGenesSurvivalPlots and topMethylationSitesSurvivalPlots ## lists are included which contain the Kaplan-Meier survival plots for the top ## genes and each of their unique linked RE DNA methylation sites, respectively. names( metadata( exampleObject )$step7TopGenesSurvival$hypomethGplusResults$topGenes ) ## The topGenesSurvivalStats and topMethylationSitesSurvivalStats variables are ## data frames containing survival statistics. ## Note: A significant amount of data is output, so selected values are shown ## here. head( metadata( exampleObject )$step7TopGenesSurvival$hypomethGplusResults$ topGenes$topGenesSurvivalStats[ , c(1:2, 15, 17, 22:24, 26) ] ) head( metadata( exampleObject )$step7TopGenesSurvival$hypomethGplusResults$ topGenes$topMethylationSitesSurvivalStats[ , c(1, 24, 26, 31:33, 35) ] ) ## Show the names of the gene survival plots names( metadata( exampleObject )$step7TopGenesSurvival$hypomethGplusResults$ topGenes$topGenesSurvivalPlots ) ## Plot the Kaplan-Meier survival plot for GATA3 (ENSG00000107485) as an example metadata( exampleObject )$step7TopGenesSurvival$hypomethGplusResults$ topGenes$topGenesSurvivalPlots$ENSG00000107485 ## ----------------------------------------------------------------------------- ## Load the example TAD dataset from the TENET.ExperimentHub package exampleTADRegions <- TENET.ExperimentHub::exampleTENETTADRegions() ## TAD files for this function must include the chromosome of each TAD region ## in the first column, and the start and end positions of each in the second ## and third columns respectively. Additional columns can be included but ## are not considered in this function. class(exampleTADRegions) head(exampleTADRegions) ## ----------------------------------------------------------------------------- ## Use the example TAD object to perform TAD overlapping. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. exampleObject <- step7TopGenesTADTables( TENETMultiAssayExperiment = exampleObject, TADFiles = exampleTADRegions, hypomethGplusAnalysis = TRUE, hypermethGplusAnalysis = FALSE, topGeneNumber = 10 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesTADTables list. ## For each analysis type, results are included in sub-lists, each ## of which contains results in the form of a data frame for topGenes and ## topTFs, unless the top genes are all TFs, in which case ## the separate top TFs output is skipped. class( metadata( exampleObject )$step7TopGenesTADTables$hypomethGplusResults$topGenes ) ## Display results for selected hypomethylated RE DNA methylation sites. A ## variety of data are included for each RE DNA methylation site, including its ## location, the top genes it is linked to, and information on the count and ## identities of other genes found within the same TAD of the RE DNA methylation ## site. Note: A significant amount of data is output, so selected values are ## shown here. head( metadata( exampleObject )$step7TopGenesTADTables$hypomethGplusResults$topGenes[ c(1:6, 16:17) ] ) ## ----------------------------------------------------------------------------- ## Get the path to a temporary directory in which to save the output interact ## file tempDirectory <- tempdir() ## Run the step7TopGenesUCSCBedFiles function. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. filePaths <- step7TopGenesUCSCBedFiles( TENETMultiAssayExperiment = exampleObject, outputDirectory = tempDirectory, hypomethGplusAnalysis = TRUE, hypermethGplusAnalysis = FALSE, topGeneNumber = 10 ) ## Unlike other functions, this function does not return the ## given TENETMultiAssayExperiment with additional information generated by the ## function in its metadata, but rather returns an object with information on ## where to find the created interact file. ## Get the path of the output file for top genes with hypomethylated G+ links bedPath <- filePaths$hypoGplus$topGenes ## Read the first few lines of the file. ## The file largely contains information about each RE DNA methylation site-gene ## link, with additional information in the first line which allows it to be ## loaded by the UCSC Genome Browser. cat(head(readLines(bedPath)), sep = "\n") ## Delete the output file, since this is just an example. ## Do not use this line if running on real data, as it will delete your created ## file. invisible(file.remove(unlist(bedPath))) ## ----------------------------------------------------------------------------- ## Load the example peak dataset from the TENET.ExperimentHub package examplePeakFile <- TENET.ExperimentHub::exampleTENETPeakRegions() ## Peak files for this function must include the chromosome of each peak region ## in the first column, and the start and end positions of each peak in the ## second and third columns respectively. Additional columns can be included, ## but are not considered in this function. class(examplePeakFile) head(examplePeakFile) ## ----------------------------------------------------------------------------- ## Run the step7TopGenesUserPeakOverlap function. ## Since we performed analyses only using TFs in the step 3 function, the ## top genes are all TFs, so a message that separate output for ## TFs will be skipped is displayed. exampleObject <- step7TopGenesUserPeakOverlap( TENETMultiAssayExperiment = exampleObject, peakData = examplePeakFile, hypermethGplusAnalysis = FALSE, hypomethGplusAnalysis = TRUE, topGeneNumber = 10, distanceFromREDNAMethylationSites = 100 ) ## Results are included in the metadata of the returned MultiAssayExperiment ## object under the step7TopGenesSurvival list. ## For each analysis type, data frames with peak overlap information are ## included in sub-lists, with each data frame saved under the names ## topGenes and topTFs, unless the top genes are all TFs, in which case ## the separate top TFs output is skipped. ## Display the data frame of results for RE DNA methylation sites linked to the ## top TFs. A variety of data are included for each RE DNA methylation site, ## including its location, the coordinates of its search window, the top genes ## it is linked to, and whether it was found within the specified distance to ## any peak in each of the peak files. head( metadata( exampleObject )$step7TopGenesUserPeakOverlap$hypomethGplusResults$topGenes ) ## ----------------------------------------------------------------------------- ## Load the humanTranscriptionFactorList dataset data("humanTranscriptionFactorList", package = "TENET") ## Display the names of the first few TFs on the list head(humanTranscriptionFactorList) ## ----------------------------------------------------------------------------- sessionInfo()