% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step2GetDifferentiallyMethylatedSites.R
\name{step2GetDifferentiallyMethylatedSites}
\alias{step2GetDifferentiallyMethylatedSites}
\title{Identify differentially methylated RE DNA methylation sites}
\usage{
step2GetDifferentiallyMethylatedSites(
  TENETMultiAssayExperiment,
  regulatoryElementGRanges = NA,
  geneAnnotationDataset = NA,
  DNAMethylationArray = NA,
  assessPromoter = FALSE,
  TSSDist = 1500,
  purityData = NA,
  methCutoff = NA,
  hypomethCutoff = NA,
  hypermethCutoff = NA,
  unmethCutoff = NA,
  methUnmethProportionOffset = 0.2,
  hypomethHypermethProportionOffset = 0.1,
  minCaseCount,
  cgDNAMethylationSitesOnly = TRUE
)
}
\arguments{
\item{TENETMultiAssayExperiment}{Specify a MultiAssayExperiment object
containing expression and methylation SummarizedExperiment objects,
such as one created by the TCGADownloader function. Coordinates for
genes and DNA methylation sites must be included in the rowRanges
of their respective SummarizedExperiment objects and should be annotated
to the same genome build as the regions given in the
regulatoryElementGRanges object.}

\item{regulatoryElementGRanges}{Specify a GRanges object containing genomic
regions representing regulatory elements of interest to the user.
Coordinates for the regulatory element regions should be annotated to the
same genome build as the gene and DNA methylation site coordinates given
in the TENETMultiAssayExperiment object. If this argument is set to NA or
not specified, this function will use all DNA methylation sites
representing regulatory elements of interest as defined by the
assessPromoter and TSSDist arguments. Defaults to NA.}

\item{geneAnnotationDataset}{Specify a gene annotation dataset which is
used to identify transcription start sites in order to find DNA methylation
sites within regulatory elements of interest (promoters or enhancers) in
conjunction with the settings of the assessPromoter and TSSDist arguments.
The dataset will be filtered to only genes and transcripts. The argument must
be either a GRanges object (such as one imported via \code{rtracklayer::import})
or a path to a GFF3 or GTF file. Both GENCODE and Ensembl annotations are
supported. Other annotation datasets may work, but have not been tested.
See the "Input data" section of the vignette for information on the required
dataset format.
Specify NA to use the start coordinates of all entries in the elementMetadata
of the rowRanges of the "expression" SummarizedExperiment object within the
TENETMultiAssayExperiment object, in which case no filtering will be done
and all entries will be assumed to represent transcripts. Defaults to NA.}

\item{DNAMethylationArray}{Specify the name of a DNA methylation probe array
supported by the sesameData package (see
\code{?sesameData::sesameData_getManifestGRanges}). If an array is specified, RE
DNA methylation sites and their locations in that array's manifest are
cross-referenced with RE DNA methylation site IDs included in the rownames
of the methylation dataset provided in the "methylation"
SummarizedExperiment object within the TENETMultiAssayExperiment object, and
only those overlapping will be considered for analysis. If set to NA, all RE
DNA methylation sites with locations listed in the rowRanges of the
"methylation" SummarizedExperiment object are used. Defaults to NA.}

\item{assessPromoter}{Set to TRUE to identify DNA methylation sites
that mark promoter regions or FALSE to identify distal enhancer regions.
Defaults to FALSE.}

\item{TSSDist}{Specify a positive integer distance in base pairs to any
transcription start site (see \code{geneAnnotationDataset}) within which DNA
methylation sites are considered promoter DNA methylation sites. DNA
methylation sites outside this distance from any transcription start site
will be considered enhancer methylation sites. Defaults to 1500.}

\item{purityData}{Specify a SummarizedExperiment object which contains DNA
methylation datasets collected from potential cell types which might affect
the purity of the patient samples contained in the
TENETMultiAssayExperiment. The coordinates for DNA methylation sites in
this dataset should be included in the rowRanges of the purityData
SummarizedExperiment object. Additionally, the DNA methylation site IDs
in the purityData SummarizedExperiment object should overlap with DNA
methylation sites present in the TENETMultiAssayExperiment and only those
that do overlap will be considered for analysis. Defaults to NA.}

\item{methCutoff}{Specify a number from 0 to 1 to be the beta-value cutoff
for methylated RE DNA methylation sites. If unspecified or NA, an algorithm
will be used to find the optimal cutoff value.}

\item{hypomethCutoff}{Specify a number from 0 to 1 to be the beta-value
cutoff for hypomethylated RE DNA methylation sites. Should be set lower than
the methCutoff. If unspecified or NA, an algorithm will be used to find the
optimal cutoff value.}

\item{hypermethCutoff}{Specify a number from 0 to 1 to be the beta-value
cutoff for hypermethylated RE DNA methylation sites. Should be set higher
than the unmethCutoff. If unspecified or NA, an algorithm will be used to
find the optimal cutoff value.}

\item{unmethCutoff}{Specify a number from 0 to 1 to be the beta-value cutoff
for unmethylated RE DNA methylation sites. If unspecified or NA, an
algorithm will be used to find the optimal cutoff value.}

\item{methUnmethProportionOffset}{Specify a number from 0 to 1 indicating a
proportion of the size of the region between the first and last
local maxima in the density plot of the mean methylation values of the
RE DNA methylation sites in the control samples. This proportion will be
added to or subtracted from the position of these local maxima to set the
unmethylation and methylation cutoffs, respectively, if they are not defined
by the user. Ideally should not exceed 0.5. Defaults to 0.2.}

\item{hypomethHypermethProportionOffset}{Specify a number from 0 to 1
indicating a proportion of the size of the region between the first and
last local maxima in the density plot of the mean methylation values
of the RE DNA methylation sites in the case samples. This proportion will be
added to or subtracted from the calculated unmethylation and methylation
cutoffs to set the hypermethylation and hypomethylation cutoffs,
respectively, if they are not defined by the user. Ideally should not exceed
0.5. Defaults to 0.1.}

\item{minCaseCount}{Specify the minimum number of case samples to be
considered for the hyper- and/or hypomethylated groups. Must be a positive
integer less than the total number of case samples.}

\item{cgDNAMethylationSitesOnly}{Set to TRUE to include only RE DNA
methylation sites with IDs that start with "cg". TRUE means that RE DNA
methylation sites whose IDs do not start with "cg" will be removed from
TENET analyses. Defaults to TRUE.}
}
\value{
Returns the MultiAssayExperiment object given as the
TENETMultiAssayExperiment argument with an additional list named
"step2GetDifferentiallyMethylatedSites" in its metadata containing the output
of this function. These data include the set of calculated cutoff values,
the identities and counts of the classified RE DNA methylation sites, as
well as plots of the mean methylation distributions of the identified
regulatory element DNA methylation sites in the case and control samples and
the set cutoff values. \strong{Note:} If assessPromoter is TRUE, two distribution
plots are saved, one using all promoter DNA methylation sites, and one using
only promoter DNA methylation sites which are identified to overlap REs.
}
\description{
This function identifies DNA methylation sites that mark putative regulatory
elements (REs), including enhancer and promoter regions. These are sites
that lie within regions from a user-supplied GRanges object, such as one
created by the \code{step1MakeExternalDatasets} function, and which are located at
a user-specified distance relative to the transcription start sites (TSS)
listed in either the rowRanges of the elementMetadata of the "expression"
SummarizedExperiment in the TENETMultiAssayExperiment object, or the
selected \code{geneAnnotationDataset} (which will be filtered to only genes and
transcripts). After identifying DNA methylation sites representing the
specified REs, the function classifies the RE DNA methylation sites as
methylated, unmethylated, hypermethylated, or hypomethylated based on their
differential methylation between the control and case samples supplied by
the user, defined by cutoff values which are either automatically based
on the mean methylation densities of the identified RE DNA methylation
sites, or manually set by the user. \strong{Note:} Using the algorithm to set
cutoffs is recommended for use with DNA methylation array data, and may not
work for whole-genome DNA methylation data.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
## This example uses datasets provided in the TENET.ExperimentHub package to
## perform an example analysis, considering RE DNA methylation sites in
## potential enhancer elements located over 1500 bp from transcription
## start sites listed for genes and transcripts in the GENCODE v36 human
## genome annotations, using a minimum case sample count of 5, and otherwise
## using default settings.

## Load the example TENET MultiAssayExperiment object, and the example
## GRanges object created by the TENET step 1 function, from the
## TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()
exampleStep1MakeExternalDatasetsGRangesObject <-
    TENET.ExperimentHub::exampleTENETStep1MakeExternalDatasetsGRanges()

## Use the example datasets to identify differentially methylated
## RE DNA methylation sites
returnValue <- step2GetDifferentiallyMethylatedSites(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    regulatoryElementGRanges =
        exampleTENETStep1MakeExternalDatasetsGRanges,
    minCaseCount = 5
)

## This example uses the same datasets, this time analyzing DNA methylation
## sites in promoter elements, considering all RE DNA methylation sites
## found within 2000 bp of only the transcription start sites provided in the
## MultiAssayExperiment. All methylation cutoffs are manually specified, the
## minimum case sample count is set to 10, and all RE DNA methylation sites
## are considered regardless of whether their IDs begin with "cg".

## Load the example TENET MultiAssayExperiment object, and the example
## GRanges object created by the TENET step 1 function, from the
## TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()
exampleStep1MakeExternalDatasetsGRangesObject <-
    TENET.ExperimentHub::exampleTENETStep1MakeExternalDatasetsGRanges()

## Use the example datasets to identify differentially methylated
## RE DNA methylation sites
returnValue <- step2GetDifferentiallyMethylatedSites(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    regulatoryElementGRanges =
        exampleTENETStep1MakeExternalDatasetsGRanges,
    geneAnnotationDataset = NA,
    assessPromoter = TRUE,
    TSSDist = 2000,
    methCutoff = 0.8,
    hypomethCutoff = 0.7,
    hypermethCutoff = 0.3,
    unmethCutoff = 0.2,
    minCaseCount = 10,
    cgDNAMethylationSitesOnly = FALSE
)
\dontshow{\}) # examplesIf}
}
