% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sequence_features.R
\name{kozakSequenceScore}
\alias{kozakSequenceScore}
\title{Make a score for each ORFs start region by proximity to Kozak}
\usage{
kozakSequenceScore(grl, tx, faFile, species = "human", include.N = FALSE)
}
\arguments{
\item{grl}{a \code{\link{GRangesList}} grouped by ORF}

\item{tx}{a \code{\link{GRangesList}}, the reference area for ORFs, each ORF
must have a coresponding tx.}

\item{faFile}{\code{\link{FaFile}}, BSgenome, fasta/index file path or an
ORFik \code{\link{experiment}}. This file is usually used to find the
transcript sequences from some GRangesList.}

\item{species}{("human"), which species to use,
currently supports human (Homo sapiens), zebrafish (Danio rerio) and mouse (Mus musculus).
Both scientific or common name for these species will work.
You can also specify a pfm for your own species.
Syntax of pfm is an rectangular integer matrix,
where all columns must sum to the same value, normally 100.
See example for more information.
Rows are in order: c("A", "C", "G", "T")}

\item{include.N}{logical (F), if TRUE, allow N bases to be counted as hits,
score will be average of the other bases. If True, N bases will be
added to pfm, automaticly, so dont include them if you make your own pfm.}
}
\value{
a numeric vector with values between 0 and 1

an integer vector, one score per orf
}
\description{
The closer the sequence is to the Kozak sequence
the higher the score, based on the experimental pwms
from article referenced.
Minimum score is 0 (worst correlation), max is 1 (the best
base per column was chosen).
}
\details{
Ranges that does not have minimum 15 length (the kozak requirement as a
sliding window of size 15 around grl start),
will be set to score 0. Since they should not have the posibility to make
an efficient ribosome binding.
}
\examples{
# Usually the ORFs are found in orfik, which makes names for you etc.
# Here we make an example from scratch
seqName <- "Chromosome"
ORF1 <- GRanges(seqnames = seqName,
                   ranges = IRanges(c(1007, 1096), width = 60),
                   strand = c("+", "+"))
ORF2 <- GRanges(seqnames = seqName,
                    ranges = IRanges(c(400, 100), width = 30),
                    strand = c("-", "-"))
ORFs <- GRangesList(tx1 = ORF1, tx2 = ORF2)
ORFs <- makeORFNames(ORFs) # need ORF names
tx <- extendLeaders(ORFs, 100)
# get faFile for sequences
faFile <- FaFile(system.file("extdata/references/danio_rerio", "genome_dummy.fasta",
 package = "ORFik"))
kozakSequenceScore(ORFs, tx, faFile)
# For more details see vignettes.
}
\references{
doi: https://doi.org/10.1371/journal.pone.0108475
}
\seealso{
Other features: 
\code{\link{computeFeatures}()},
\code{\link{computeFeaturesCage}()},
\code{\link{countOverlapsW}()},
\code{\link{disengagementScore}()},
\code{\link{distToCds}()},
\code{\link{distToTSS}()},
\code{\link{entropy}()},
\code{\link{floss}()},
\code{\link{fpkm}()},
\code{\link{fpkm_calc}()},
\code{\link{fractionLength}()},
\code{\link{initiationScore}()},
\code{\link{insideOutsideORF}()},
\code{\link{isInFrame}()},
\code{\link{isOverlapping}()},
\code{\link{orfScore}()},
\code{\link{rankOrder}()},
\code{\link{ribosomeReleaseScore}()},
\code{\link{ribosomeStallingScore}()},
\code{\link{startRegion}()},
\code{\link{startRegionCoverage}()},
\code{\link{stopRegion}()},
\code{\link{subsetCoverage}()},
\code{\link{translationalEff}()}
}
\concept{features}
