% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Annotations.R
\name{ranges2annot}
\alias{ranges2annot}
\title{Hierarchical annotation of genomic regions.}
\usage{
ranges2annot(ranges, annot, upstream = 500, downstream = 500)
}
\arguments{
\item{ranges}{A \code{\link[GenomicRanges:GRanges-class]{GenomicRanges::GRanges}} object, for example extracted from
a \code{RangedSummarizedExperiment} object with the \code{\link[MatrixGenerics:rowRanges]{MatrixGenerics::rowRanges}}
command.}

\item{annot}{A \code{GRanges} from which promoter positions will be inferred.
Typically GENCODE.  If the \code{type} metadata is present, it should
contain \code{gene}, \code{exon} and \code{transcript} among its values.  Otherwise,
all entries are considered transcripts. If the \code{transcript_type}
metadata is available, the entries that may not be primary products
(for instance \sQuote{snoRNA}) are discarded.}

\item{upstream}{Number of bases \emph{upstream} the start of the transcript models
to be considered as part of the \emph{promoter region}.}

\item{downstream}{Number of bases \emph{downstream} the start of the transcript
models to be considered as part of the \emph{promoter region}.}
}
\value{
A Run-length-encoded (\code{\link[S4Vectors:Rle-class]{S4Vectors::Rle}}) factor of same length as the \code{CTSS}
object, indicating if the interval is \code{promoter}, \code{exon}, \code{intron} or
\code{unknown}, or just \code{promoter}, \code{gene}, \code{unknown} if the \code{type}
metadata is absent.
}
\description{
Assigns region types such as \code{promoter}, \code{exon} or \code{unknown} to genomic
regions such as \emph{CTSS}, \emph{tag clusters}, or \emph{consensus clusters}.
}
\details{
Only the biotypes that are likely to have a pol II promoter will be
filtered in.  This is currently hardcoded in the function; see its source
code.  Example of biotypes without a pol II promoter: VDJ segments, miRNA,
but also snoRNA, etc.  Thus, the \emph{Intergenic} category displayed in output of
the \code{\link{plotAnnot}} may include counts overlaping with real exons of discarded
transcribed regions: be careful that large percentages do not necessarly
suggest abundance of novel promoters.
}
\examples{
CAGEr:::ranges2annot(CTSScoordinatesGR(exampleCAGEexp), exampleZv9_annot)

ctss <- GenomicRanges::GRanges("chr1", IRanges::IPos(c(1,100,200,1500)), "+")
ctss <- GenomicRanges::GPos(ctss, stitch = FALSE)
ctss <- as(ctss, "CTSS")
gr1   <- GenomicRanges::GRanges( "chr1"
                               , IRanges::IRanges(c(650, 650, 1400), 2000), "+")
CAGEr:::ranges2annot(ctss, gr1)
gr2 <- gr1
gr2$type            <- c("transcript",     "exon",           "transcript")
gr2$transcript_type <- c("protein_coding", "protein_coding", "miRNA")
CAGEr:::ranges2annot(ctss, gr2, up=500, down=20)

}
\seealso{
\code{\link{CTSScoordinatesGR}}, \code{\link{exampleZv9_annot}}

Other CAGEr annotation functions: 
\code{\link{annotateCTSS}()},
\code{\link{plotAnnot}()},
\code{\link{ranges2genes}()},
\code{\link{ranges2names}()}
}
\author{
Charles Plessy
}
\concept{CAGEr annotation functions}
