% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/buildFeatureVectorForScoring.R
\name{buildFeatureVectorForScoring}
\alias{buildFeatureVectorForScoring}
\title{Build feature vectors}
\usage{
buildFeatureVectorForScoring(
  hits,
  gRNA.size = 20,
  canonical.PAM = "NGG",
  subPAM.position = c(22, 23),
  PAM.size = 3,
  PAM.location = "3prime"
)
}
\arguments{
\item{hits}{A Data frame generated from searchHits, which contains
\itemize{
 \item IsMismatch.posX - Indicator variable indicating whether this position 
 X is a mismatch or not, (1 means yes and 0 means no). X takes on values 
 from 1 to gRNA.size, representing all positions in the guide RNA (gRNA).
 \item strand - strand of the off-target, + for plus and - for minus strand
 \item chrom - chromosome of the off-target
 \item chromStart - start position of the off-target
 \item chromEnd - end position of the off-target
 \item name - gRNA name
 \item gRNAPlusPAM - gRNA sequence with PAM sequence concatenated
 \item OffTargetSequence - the genomic sequence of the off-target
 \item n.mismatch - number of mismatches between the off-target and the gRNA
 \item forViewInUCSC - string for viewing in UCSC genome browser, e.g., 
 chr14:31665685-31665707
 \item score - Set to 100, and will be calculated in getOfftargetScore
}}

\item{gRNA.size}{gRNA size. The default is 20}

\item{canonical.PAM}{Canonical PAM. The default is NGG for spCas9, TTTN for Cpf1}

\item{subPAM.position}{The start and end positions of the sub PAM to fetch.
Default to 22 and 23 for SP with 20bp gRNA and NGG as preferred PAM}

\item{PAM.size}{Size of PAM, default to 3 for spCas9, 4 for Cpf1}

\item{PAM.location}{PAM location relative to gRNA. For example, default to
3prime for spCas9 PAM.  Please set to 5prime for cpf1 PAM since it's PAM is
located on the 5 prime end}
}
\value{
A data frame with hits plus features used for calculating scores and
for generating report, including 
\itemize{
 \item IsMismatch.posX - Indicator variable indicating whether this position 
 X is a mismatch or not, (1 means yes and 0 means no, X = 1 - gRNA.size), 
 representing all positions in the gRNA.
 \item strand - strand of the off-target, + for plus and - for minus strand
 \item chrom - chromosome of the off-target
 \item chromStart - start position of the off-target
 \item chromEnd - end position of the off-target
 \item name - gRNA name
 \item gRNAPlusPAM - gRNA sequence with PAM sequence concatenated
 \item OffTargetSequence - the genomic sequence of the off-target
 \item n.mismatch - number of mismatches between the off-target and the gRNA
 \item forViewInUCSC - string for viewing in UCSC genome browser, e.g., 
 chr14:31665685-31665707
 \item score - score of the off-target
 \item mismatch.distance2PAM - a comma-separated list of all mismatches' 
 distances to PAM, e.g., 14,11 means one mismatch is 14 bp away from PAM and 
 the other mismatch is 11 bp away from PAM
 \item alignment - alignment between gRNA and off-target, 
 e.g., ......G..C.......... means that this off-target aligns with gRNA 
 except that G and C are mismatches
 \item NGG - whether this off-target contains canonical PAM (1 for yes and 
 0 for no)
 \item mean.neighbor.distance.mismatch - mean distance between neighboring 
 mismatches
}
}
\description{
Build feature vectors for calculating scores of off targets
}
\examples{

    hitsFile <-  system.file("extdata", "hits.txt", package = "CRISPRseek")
    hits <- read.table(hitsFile, sep= "\t", header = TRUE,
        stringsAsFactors = FALSE)
    buildFeatureVectorForScoring(hits)
}
\seealso{
offTargetAnalysis
}
\author{
Lihua Julie Zhu
}
\keyword{misc}
