% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RiboORFs_detect_and_classify.R
\name{detect_ribo_orfs}
\alias{detect_ribo_orfs}
\title{Detect ORFs by Ribosome profiling data}
\usage{
detect_ribo_orfs(
  df,
  out_folder,
  ORF_categories_to_keep,
  prefix_result = paste(c(ORF_categories_to_keep, gsub(" ", "_", organism(df))), collapse
    = "_"),
  mrna = loadRegion(df, "mrna"),
  cds = loadRegion(df, "cds"),
  libraries = outputLibs(df, type = "pshifted", output.mode = "envirlist"),
  orf_candidate_ranges = findORFs(seqs = txSeqsFromFa(mrna, df, TRUE), longestORF =
    longestORF, startCodon = startCodon, stopCodon = stopCodon, minimumLength =
    minimumLength),
  orfs_gr = categorize_and_filter_ORFs(orf_candidate_ranges, ORF_categories_to_keep, cds,
    mrna),
  export_metrics_table = TRUE,
  longestORF = FALSE,
  startCodon = startDefinition(1),
  stopCodon = stopDefinition(1),
  minimumLength = 0,
  minimum_reads_ORF = 10,
  minimum_reads_start = 3
)
}
\arguments{
\item{df}{an ORFik \code{\link{experiment}}}

\item{out_folder}{Directory to save files}

\item{ORF_categories_to_keep}{options, any subset of: \code{c("uORF", "uoORF", "annotated", "NTE",
"NTT", "internal", "doORF", "dORF", "ncORF", "a_error", "all")}.
\itemize{
 \item{uORF : Upstream ORFs (Starting in 5' UTR), not overlapping CDS}
 \item{uoORF : Upstream ORFs (Starting in 5' UTR), overlapping CDS}
 \item{annotated : The defined CDS for that transcript}
 \item{NTE : 5' Start codon extension of annotated CDS}
 \item{NTT : 5' Start codon truncation of annotated CDS}
 \item{CTE : 3' stop codon extension of annotated CDS, i.e. readthrough}
 \item{CTT : 5' Start codon truncation of annotated CDS, original cds was defined with readthrough}
 \item{internal : Starting inside CDS, ending before CDS ends}
 \item{doORF : Downstream ORFs (Ending in 3' UTR), overlapping CDS}
 \item{dORF : Downstream ORFs (Ending in 3' UTR), not overlapping CDS}
 \item{ncORF : Any ORF on a transcript without a defined CDS}
 \item{a_error : Any ORF detect not in the above categories}
 \item{all : use all ORF types above}
}}

\item{prefix_result}{the prefix name of output files to out_folder. Default:
\code{paste(c(ORF_categories_to_keep, gsub(" ", "_", organism(df))), collapse = "_")}}

\item{mrna}{= \code{loadRegion(df, "mrna")}}

\item{cds}{= \code{loadRegion(df, "cds")}}

\item{libraries}{the ribo-seq libraries loaded into R as list, default:
\code{outputLibs(df, type = "pshifted", output.mode = "envirlist")}}

\item{orf_candidate_ranges}{IRangesList, =
\code{findORFs(seqs = txSeqsFromFa(mrna, df, TRUE),
longestORF = longestORF, startCodon = startCodon, stopCodon = stopCodon,
minimumLength = minimumLength)}}

\item{orfs_gr}{= categorize_and_filter_ORFs(orf_candidate_ranges,
ORF_categories_to_keep, cds, mrna). The GRangesList set of ORFs to actually search.}

\item{export_metrics_table}{logical, default TRUE. Export table of statistics to file
with suffix: "_prediction_table.rds"}

\item{longestORF}{(logical) Default TRUE. Keep only the longest ORF per
unique stopcodon: (seqname, strand, stopcodon) combination, Note: Not longest
per transcript! You can also use function
\code{\link{longestORFs}} after creation of ORFs for same result.}

\item{startCodon}{(character vector) Possible START codons to search for.
Check \code{\link{startDefinition}} for helper function. Note that it is
case sensitive, so "atg" would give 0 hits for a sequence with only capital
"ATG" ORFs.}

\item{stopCodon}{(character vector) Possible STOP codons to search for.
Check \code{\link{stopDefinition}} for helper function. Note that it is
case sensitive, so "tga" would give 0 hits for a sequence with only capital
"TGA" ORFs.}

\item{minimumLength}{(integer) Default is 0. Which is START + STOP = 6 bp.
Minimum length of ORF, without counting 3bps for START and STOP codons.
For example minimumLength = 8 will result in size of ORFs to be at least
START + 8*3 (bp) + STOP = 30 bases. Use this param to restrict search.}

\item{minimum_reads_ORF}{numeric, default 10, orf removed if less reads overlap whole orf}

\item{minimum_reads_start}{numeric, default 3, orf removed if less reads overlap start}
}
\value{
invisible(NULL), all ORF results saved to disc
}
\description{
Finding all ORFs:
1. Find all ORFs in mRNA using ORFik findORFs, with defined parameters.\cr
To create the candidate ORFs (all ORFs returned):\cr
Steps (candidate set):\cr
Define a candidate search set by these 3 rules:\cr
  1.a Allowed ORF type: uORF, NTE, etc (only keep these in candidate list)\cr
  1.b Must have at least x reads over whole orf (default 10 reads)\cr
  1.c Must have at least x reads over start site (default 3 reads)\cr
The total list is defined by these names, and saved according to allowed ORF type/types.\cr
To create the prediction status (TRUE/FALSE) per candidate\cr
 Steps (prediction status)\cr
(UP_NT is a 20nt window upstream of ORF, that stops 2NT before ORF starts) :\cr
  1. ORF mean reads per NT > (UP_NT mean reads per NT * 1.3)\cr
  2. ORFScore > 2.5\cr
  3. TIS total reads + 3 >  ORF median reads per NT\cr
  4. Given expression above, a TRUE prediction is defined with the AND operatior: 1. & 2. & 3.
\cr\cr
In code that is:\cr
\code{predicted <- (orfs_cov_stats$mean > upstream_cov_stats$mean*1.3) & orfs_cov_stats$ORFScores > 2.5 &
 ((reads_start[candidates] + 3) >  orfs_cov_stats$median)}
}
\examples{
# Pre requisites
# 1. Create ORFik experiment
#  ORFik::create.experiment(...)
# 2. Create ORFik optimized annotation:
# makeTxdbFromGenome(gtf = ORFik:::getGtfPathFromTxdb(df), genome = df@fafile,
#                      organism = organism(df), optimize = TRUE)
# 3. There must exist pshifted reads, either as default files, or in a relative folder called
# "./pshifted/". See ?shiftFootprintsByExperiment
# EXAMPLE:
df <- ORFik.template.experiment()
df <- df[df$libtype == "RFP",][c(1,2),]
result_folder <- riboORFsFolder(df, tempdir())
results <- detect_ribo_orfs(df, result_folder, c("uORF", "uoORF", "annotated", "NTE"))

# Load results of annotated ORFs
table <- riboORFs(df[1,], type = "table", result_folder)
table # See all statistics
sum(table$predicted) # How many were predicted as Ribo-seq ORFs
# Load 2 results
table <- riboORFs(df[1:2,], type = "table", result_folder)
table # See all statistics
sum(table$predicted) # How many were predicted as Ribo-seq ORFs

# Load GRangesList
candidates_gr <- riboORFs(df[1,], type = "ranges_candidates", result_folder)
prediction <- riboORFs(df[1,], type = "predictions", result_folder)

predicted_gr <- riboORFs(df[1:2,], type = "ranges_predictions", result_folder)
identical(predicted_gr[[1]], candidates_gr[[1]][prediction[[1]]])
## Inspect predictions in RiboCrypt
# library(RiboCrypt)
# Inspect Predicted
view <- predicted_gr[[1]][1]
#multiOmicsPlot_ORFikExp(view, df, view, leader_extension = 100, trailer_extension = 100)
# Inspect not predicted
view <- candidates_gr[[1]][!prediction[[1]]][1]
#multiOmicsPlot_ORFikExp(view, df, view, leader_extension = 100, trailer_extension = 100)
}
