% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step7LinkedDNAMethylationSitesMotifSearching.R
\name{step7LinkedDNAMethylationSitesMotifSearching}
\alias{step7LinkedDNAMethylationSitesMotifSearching}
\title{Search for transcription factor motifs in the vicinity of
DNA methylation sites and/or within custom regions defined by the user}
\usage{
step7LinkedDNAMethylationSitesMotifSearching(
  TENETMultiAssayExperiment,
  hypermethGplusAnalysis = TRUE,
  hypomethGplusAnalysis = TRUE,
  DNAMethylationSites = NA,
  distanceFromREDNAMethylationSites = 100,
  GRangesToSearch = NA,
  andStrings = NULL,
  orStrings = NULL,
  notStrings = NULL,
  TFMotifList,
  useOnlyDNAMethylationSitesLinkedToTFs = TRUE,
  geneAnnotationDataset = NA,
  DNAMethylationArray = NA,
  matchPWMMinScore = "75\%",
  coreCount = 1
)
}
\arguments{
\item{TENETMultiAssayExperiment}{Specify a MultiAssayExperiment object
containing expression and methylation SummarizedExperiment objects, such as
one created by the TCGADownloader function. The object's metadata must
contain the results from the \code{step5OptimizeLinks} function if
\code{hypermethGplusAnalysis} or \code{hypomethGplusAnalysis} are TRUE.}

\item{hypermethGplusAnalysis}{Set to TRUE to search for motifs in the
vicinity of hypermethylated RE DNA methylation sites with at least one linked
TF. \strong{Note}: If \code{useOnlyDNAMethylationSitesLinkedToTFs} is also TRUE, only
RE DNA methylation sites linked to TFs specified via the
\code{TFMotifList} argument will be used. Defaults to TRUE.}

\item{hypomethGplusAnalysis}{Set to TRUE to search for motifs in the
vicinity of hypomethylated RE DNA methylation sites with at least one linked
TF. \strong{Note}: If \code{useOnlyDNAMethylationSitesLinkedToTFs} is also TRUE, only
RE DNA methylation sites linked to TFs specified via the
\code{TFMotifList} argument will be used. Defaults to TRUE.}

\item{DNAMethylationSites}{Supply a vector of IDs of DNA methylation sites
to search for motifs in the vicinity of these sites, in addition to any
RE DNA methylation sites selected by the \code{hypermethGplusAnalysis}
and \code{hypomethGplusAnalysis} arguments. If set to NA, no additional DNA
methylation sites will be included in the search. Defaults to NA.}

\item{distanceFromREDNAMethylationSites}{Specify the positive integer
distance from the DNA methylation sites selected by the
\code{hypermethGplusAnalysis},
\code{hypomethGplusAnalysis}, and \code{DNAMethylationSites} arguments within which
motif searching will be performed. Defaults to 100.}

\item{GRangesToSearch}{Specify a GRanges object which contains genomic
coordinates of regions within which to search for motifs. The coordinates
should correspond to the human hg38 genome. Any regions included in this
GRanges object will be combined with regions defined by the
\code{hypermethGplusAnalysis}, \code{hypomethGplusAnalysis}, \code{DNAMethylationSites}, and
\code{distanceFromREDNAMethylationSites} arguments. If set to NA, no additional
regions will be included in the motif search. Defaults to NA.}

\item{andStrings}{Specify a vector of values which will be provided to the
\code{andStrings} argument of the \code{query()} function in the MotifDb package, used
to search for motif PWMs. Potential values include species and transcription
factor database names to refine the search. Set to NULL to include no terms
in this search. Defaults to NULL. \strong{Note:} If both \code{andStrings} and
\code{orStrings} are set to NULL, only the PWMs specified by the \code{TFMotifList}
argument will be used.}

\item{orStrings}{Specify a vector of values which will be provided to the
\code{orStrings} argument of the \code{query()} function in the MotifDb package, used
to search for motif PWMs. Potential values include names of specific TFs to
limit the search to. The value "humanTranscriptionFactors" may be specified
to use all TFs identified in 'The Human Transcription Factors' by Lambert et
al. 2018. Set to NULL to include no terms in this search. Defaults to NULL.
\strong{Note:} If both \code{andStrings} and \code{orStrings} are set to NULL, only the PWMs
specified by the \code{TFMotifList} argument will be used.}

\item{notStrings}{Specify a vector of values which will be provided to the
\code{notStrings} argument of the \code{query()} function in the MotifDb package, used
to exclude results from the motif PWM search. The value
"humanTranscriptionFactors" may be specified to use all TFs identified in
'The Human Transcription Factors' by Lambert et al. 2018. Set to NULL to
exclude no terms from this search. Defaults to NULL.}

\item{TFMotifList}{Specify a named list mapping transcription factor gene
names and/or IDs to their respective motif position weight matrix (PWM). The
PWMs should be in the form of a 4xN matrix. PWMs specified in this list are
combined with any TF PWMs retrieved via the MotifDb package using the
\code{andStrings}, \code{orStrings}, and \code{notStrings} arguments. Set to NA to only
include PWMs retrieved by the MotifDb package in the search.}

\item{useOnlyDNAMethylationSitesLinkedToTFs}{If set to TRUE, only
hypomethylated or hypermethylated RE DNA methylation sites, as selected by
the \code{hypermethGplusAnalysis} and \code{hypomethGplusAnalysis} arguments, which are
found to be linked to the TFs in the given \code{TFMotifList} by TENET will be
analyzed. To use this functionality, at least one of \code{hypermethGplusAnalysis}
or \code{hypomethGplusAnalysis} must be set to TRUE, \code{DNAMethylationSites},
\code{andStrings}, and \code{orStrings} must be NA, and the name of each PWM in the
list given to \code{TFMotifList} must match the gene name or Ensembl ID of a gene
in the TENETMultiAssayExperiment with RE DNA methylation sites linked to it
for the specified analysis types. Defaults to TRUE.}

\item{geneAnnotationDataset}{Specify a gene annotation dataset which is
used to identify names for genes by their Ensembl IDs. The argument must be
either a GRanges object (such as one imported via \code{rtracklayer::import}) or a
path to a GFF3 or GTF file. Both GENCODE and Ensembl annotations are
supported. Other annotation datasets may work, but have not been tested.
See the "Input data" section of the vignette for information on the required
dataset format.
Specify NA to use the gene names listed in the "geneName" column of the
elementMetadata of the rowRanges of the "expression" SummarizedExperiment
object within the TENETMultiAssayExperiment object. Defaults to NA.}

\item{DNAMethylationArray}{Specify the name of a DNA methylation probe array
supported by the sesameData package (see
\code{?sesameData::sesameData_getManifestGRanges}). If an array is specified, RE
DNA methylation sites and their locations in that array's manifest are
cross-referenced with RE DNA methylation site IDs included in the rownames
of the methylation dataset provided in the "methylation"
SummarizedExperiment object within the TENETMultiAssayExperiment object, and
only those overlapping will be considered for analysis. If set to NA, all RE
DNA methylation sites with locations listed in the rowRanges of the
"methylation" SummarizedExperiment object are used. Defaults to NA.}

\item{matchPWMMinScore}{Specify the \code{min.score} argument passed to the
matchPWM function for motif searching. See \code{?Biostrings::matchPWM} for more
details. Defaults to "75\%".}

\item{coreCount}{Argument passed as the mc.cores argument to mclapply. See
\code{?parallel::mclapply} for more details. Defaults to 1.}
}
\value{
Returns the MultiAssayExperiment object given as the
TENETMultiAssayExperiment argument with an additional list
named 'step7LinkedDNAMethylationSitesMotifSearching' in its metadata
containing the output of this function. This list includes the object
"DNAMethylationSitesGRanges" containing the regions in which motif searching
was performed, "TFMotifPWMList" containing the TF PWMs searched for,
"TFMotifSeqLogoList" which includes visual sequence logo representations of
these PWMs, the "DNAMethylationSitesMotifOccurrences" data frame, which notes
the location and PWM of all motifs found, the regions they were found within,
as well as a "totalMotifOccurrencesPerDNAMethylationSite" data frame noting
how many times each PWM listed in the "TFMotifPWMList" was found in each
region in the "DNAMethylationSitesGRanges" object. If
\code{useOnlyDNAMethylationSitesLinkedToTFs} was set to TRUE, an additional data
frame "linkedUniqueDNAMethylationSitesTFOverlap" is included, which notes
which TFs in the "TFMotifPWMList" the hyper- or hypomethylated RE DNA
methylation sites used in the analysis were linked to; otherwise, it will be
NA.
}
\description{
This function takes a user-specified named list of transcription factors
(TFs) and their binding motifs in the form of position weight matrices
(PWMs), and/or search terms to identify additional TF binding motifs. The
function identifies if each motif is found within a user-specified distance
from RE DNA methylation sites in the hyper- and/or hypomethylated G+ analysis
quadrants and/or sites specified by the user, and/or within specified genomic
regions.
}
\details{
\strong{Note:} Using many input motifs or RE DNA methylation sites may cause the
search to take a significant amount of time, so in this case, using multiple
CPU cores is highly recommended.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
## Show available motifs for example TF FOXA1
names(MotifDb::query(MotifDb::MotifDb, "FOXA1"))

## The sequence logos for all input motifs will be included in the output
## of this function. Alternatively, individual motifs can be visualized
## with the seqLogo function from the seqLogo package.
seqLogo::seqLogo(MotifDb::query(MotifDb::MotifDb, "FOXA1")[[3]])

## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to perform motif searching in the vicinity of
## all hyper- and hypomethylated RE DNA methylation sites linked to the
## FOXA1 and ESR1 TF genes. The motifs these TFs bind to will be retrieved
## via the MotifDb package. Gene names and locations, and the locations of RE
## DNA methylation sites, will be retrieved from the rowRanges of the
## 'expression' and 'methylation' SummarizedExperiment objects in the
## example MultiAssayExperiment. Regions within 100 bp of linked RE DNA
## methylation sites will be considered in the search, and a motif similarity
## threshold of 75\% will be used. The analysis will be performed using one
## CPU core.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to perform the motif searching
returnValue <- step7LinkedDNAMethylationSitesMotifSearching(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    orStrings = c("FOXA1", "ESR1")
)

## This example is similar, but performs motif searching in the vicinity
## of only hypomethylated RE DNA methylation sites linked to the FOXA1 and
## ESR1 TF genes. Regions within 50 bp of linked RE DNA methylation sites
## will be considered in the search, and a motif similarity threshold of 80\%
## will be used. The analysis will be performed using 8 CPU cores.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to perform the motif searching
returnValue <- step7LinkedDNAMethylationSitesMotifSearching(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    orStrings = c("FOXA1", "ESR1"),
    hypermethGplusAnalysis = FALSE,
    distanceFromREDNAMethylationSites = 50,
    matchPWMMinScore = "80\%",
    coreCount = 8
)

## This example demonstrates how to search for motifs in the vicinity of only
## specific DNA methylation sites, regardless of whether they are linked to
## TFs, and how to specify custom motif position weight matrices (PWMs),
## while also including motifs for all human transcription factors in the
## SwissRegulon database accessed by the `MotifDb::query()` function. The
## rest of the options are set to the default values described in the first
## example above.

## Create a list of example PWMs. For the purposes of this example, they
## are retrieved using the MotifDb package, although this functionality is
## intended for user-specified motifs that do not appear in the MotifDb
## database.
exampleTFMotifList <- list(
    "FOXA1" = MotifDb::query(MotifDb::MotifDb, "FOXA1")[[3]],
    "MYBL2" = MotifDb::query(MotifDb::MotifDb, "MYBL2")[[5]]
)

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to perform the motif searching
returnValue <- step7LinkedDNAMethylationSitesMotifSearching(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = FALSE,
    DNAMethylationSites = c("cg04134755", "cg10216151"),
    andStrings = c("Hsapiens", "SwissRegulon"),
    orStrings = "humanTranscriptionFactors",
    TFMotifList = exampleTFMotifList,
    useOnlyDNAMethylationSitesLinkedToTFs = FALSE
)
\dontshow{\}) # examplesIf}
}
