% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/txdbHelpers.R
\name{makeTxdbFromGenome}
\alias{makeTxdbFromGenome}
\title{Make txdb from genome}
\usage{
makeTxdbFromGenome(
  gtf,
  genome = NULL,
  organism,
  optimize = FALSE,
  gene_symbols = FALSE,
  uniprot_id = FALSE,
  pseudo_5UTRS_if_needed = NULL,
  minimum_5UTR_percentage = 30,
  return = is.null(txdb_file_out_path),
  txdb_file_out_path = paste0(gtf, ".db"),
  symbols_file_out_path = file.path(dirname(gtf), "gene_symbol_tx_table.fst")
)
}
\arguments{
\item{gtf}{path to gtf file}

\item{genome}{character, default NULL. Path to fasta genome
corresponding to the gtf. If NULL, can not set seqlevels.
If value is NULL or FALSE, it will be ignored.}

\item{organism}{Scientific name of organism, first letter
must be capital! Example: Homo sapiens. Will force first letter
to capital and convert any "_" (underscore) to " " (space)}

\item{optimize}{logical, default FALSE. Create a folder
within the output folder (defined by txdb_file_out_path),
that includes optimized objects
to speed up loading of annotation regions from up to 15 seconds
on human genome down to 0.1 second. ORFik will then load these optimized
objects instead. Currently optimizes filterTranscript() function and
loadRegion() function for 5' UTRs, 3' UTRs, CDS,
 mRNA (all transcript with CDS) and tx (all transcripts).}

\item{gene_symbols}{logical default FALSE. If TRUE, will download
and store all gene symbols for all transcripts (coding and noncoding)-
In a file called: "gene_symbol_tx_table.fst" in same folder as txdb.
hgcn for human, mouse symbols for mouse and rat, more to be added.}

\item{uniprot_id}{logical default FALSE.  If TRUE, will download
and store all uniprot id for all transcripts (coding and noncoding)-
In a file called: "gene_symbol_tx_table.fst" in same folder as txdb.}

\item{pseudo_5UTRS_if_needed}{integer, default NULL. If defined > 0,
will add pseudo 5' UTRs of maximum this length if 'minimum_5UTR_percentage" (default 30%) of
mRNAs (coding transcripts) do not have a leader. (NULL and 0 are both the ignore command)}

\item{minimum_5UTR_percentage}{numeric, default 30. What minimum percentage
of mRNAs most have a 5' UTRs (leaders), to not do the pseudo_UTR addition.
If percentage is higher, addition is ignored, set to 101 to always do it.}

\item{return}{logical, default FALSE. If TRUE, return TXDB object,
else invisible(NULL).}

\item{txdb_file_out_path}{character path, default paste0(gtf, ".db").
Set to NULL to not write file to disc.}

\item{symbols_file_out_path}{character path, default
file.path(dirname(gtf), "gene_symbol_tx_table.fst").
Must be defined as character if "gene_symbols" is TRUE. Ignored if
"gene_symbols" is FALSE.}
}
\value{
logical, default is.null(txdb_file_out_path),
Txdb saved to disc named default paste0(gtf, ".db").
Set 'return' argument to TRUE, to also get txdb back as an object.
}
\description{
Make a Txdb with defined seqlevels and
seqlevelsstyle from the fasta genome.
This makes it more fail safe than standard Txdb creation.
Example is that you can not create a coverage window outside
the chromosome boundary, this is only possible if you have
set the seqlengths.
}
\examples{
gtf <- "/path/to/local/annotation.gtf"
genome <- "/path/to/local/genome.fasta"
#makeTxdbFromGenome(gtf, genome, organism = "Saccharomyces cerevisiae")
# Runnable full example
df <- ORFik.template.experiment()
gtf <- sub("\\\\.db$", "", df@txdb)
genome <- df@fafile
txdb <- makeTxdbFromGenome(gtf, genome, organism = "Saccharomyces cerevisiae",
  txdb_file_out_path = NULL)
## Add pseudo UTRs if needed (< 30\% of cds have a defined 5'UTR)
}
