% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/velocity_methods.R
\name{.get_velocity_files}
\alias{.get_velocity_files}
\title{Generate RNA velocity files for GRanges}
\usage{
.get_velocity_files(
  gr,
  L,
  Genome,
  Transcriptome = NULL,
  out_path = ".",
  style = c("genome", "Ensembl", "UCSC", "NCBI", "other"),
  isoform_action = c("separate", "collapse"),
  exon_option = c("full", "junction"),
  transcript_id = "transcript_id",
  gene_id = "gene_id",
  transcript_version = "transcript_version",
  gene_version = "gene_version",
  version_sep = ".",
  transcript_biotype_col = "transcript_biotype",
  gene_biotype_col = "gene_biotype",
  transcript_biotype_use = "all",
  gene_biotype_use = "all",
  chrs_only = TRUE,
  save_filtered_gtf = FALSE,
  compress_fa = FALSE,
  width = 80L
)
}
\arguments{
\item{gr}{A \code{GRanges} object for gene annotation.}

\item{L}{Length of the biological read. For instance, 10xv1: 98 nt,
10xv2: 98 nt, 10xv3: 91 nt, Drop-seq: 50 nt. If in doubt check read length
in a fastq file for biological reads with the \code{bash} commands:
If the fastq file is gzipped, then do \verb{zcat your_file.fastq.gz | head} on
Linux. If on Mac, then \code{zcat < your_file.fastq.gz | head}. Then you will see
lines with nucleotide bases. Copy one of those lines and determine its length
with \code{\link{str_length}} in R or \verb{echo -n <the sequence> | wc -c} in
\code{bash}. Which file corresponds to biological reads depends on the particular
technology.}

\item{Genome}{Either a \code{\link{BSgenome}} or a \code{\link{XStringSet}}
object of genomic sequences, where the intronic sequences will be extracted
from. Use \code{\link{genomeStyles}} to check which styles are supported for
your organism of interest; supported styles can be interconverted. If the
style in your genome or annotation is not supported, then the style of
chromosome names in the genome and annotation should be manually set to be
consistent.}

\item{Transcriptome}{A \code{\link{XStringSet}}, a path to a fasta
file (can be gzipped) of the transcriptome which contains sequences of
spliced transcripts, or \code{NULL}. The transcriptome here will be concatenated
with the intronic sequences to give one fasta file. When \code{NULL}, the
transriptome sequences will be extracted from the genome
given the gene annotation, so it will be guaranteed that transcript IDs in
the transcriptome and in the annotation match. Otherwise, the type of
transcript ID in the transcriptome must match that in the gene annotation
supplied via argument \code{X}.}

\item{out_path}{Directory to save the outputs written to disk. If this
directory does not exist, then it will be created. Defaults to the current
working directory.}

\item{style}{Formatting of chromosome names. Use
\code{\link{genomeStyles}} to check which styles are supported for your
organism of interest and what those styles look like. This can also be a
style supported for your organism different from the style used by the
annotation and the genome. Then this style will be used for both the
annotation and the genome. Can take the following values:
\describe{
\item{genome}{If style of the annnotation is different from that of the
genome, then the style of the genome will be used.}
\item{other}{Custom style, need to manually ensure that the style in
annotation matches that of the genome.}
\item{Ensembl}{Or \code{UCSC} or \code{NCBI}, whichever is supported by your species
of interest.}
}}

\item{isoform_action}{Character, indicating action to take with different
transcripts of the same gene. Must be one of the following:
\describe{
\item{collapse}{First, the union of all exons of different transcripts of a
gene will be taken. Then the introns will be inferred from this union. Only
the flanked intronic sequences are affected; isoforms will always be taken
into account for spliced sequences or exon-exon junctions.}
\item{separate}{Introns from different transcripts will be kept separate.}
}}

\item{exon_option}{Character, indicating how exonic sequences should be
included in the kallisto index. Must be one of the following:
\describe{
\item{full}{The full cDNA sequences, which include the full exonic sequences,
will be used. This is the default.}
\item{junction}{Only the exon-exon junctions, with L-1 bases on each side
of the junctions, will be used.}
}}

\item{transcript_id}{Character vector of length 1. Tag in \code{attribute}
field corresponding to transcript IDs. This argument must be supplied and
cannot be \code{NA} or \code{NULL}. Will throw error if tag indicated in this
argument does not exist.}

\item{gene_id}{Character vector of length 1. Tag in \code{attribute}
field corresponding to gene IDs. This argument must be supplied and
cannot be \code{NA} or \code{NULL}. Note that this is different from gene
symbols, which do not have to be unique. This can be Ensembl or Entrez IDs.
However, if the gene symbols are in fact unique for each gene, you may
supply the tag for human readable gene symbols to this argument. Will throw
error if tag indicated in this argument does not exist. This is typically
"gene_id" for annotations from Ensembl and "gene" for refseq.}

\item{transcript_version}{Character vector of length 1. Tag in \code{attribute}
field corresponding to \emph{transcript} version number. If your GTF file does not
include transcript version numbers, or if you do not wish to include the
version number, then use \code{NULL} for this argument. To decide whether to
include transcript version number, check whether version numbers are included
in the \code{transcripts.txt} in the \code{kallisto} output directory. If that file
includes version numbers, then trannscript version numbers must be included
here as well. If that file does not include version numbers, then transcript
version numbers must not be included here.}

\item{gene_version}{Character vector of length 1. Tag in \code{attribute}
field corresponding to \emph{gene} version number. If your GTF file does not
include gene version numbers, or if you do not wish to include the
version number, then use \code{NULL} for this argument. Unlike transcript
version number, it's up to you whether to include gene version number.}

\item{version_sep}{Character to separate bewteen the main ID and the version
number. Defaults to ".", as in Ensembl.}

\item{transcript_biotype_col}{Character vector of length 1. Tag in
\code{attribute} field corresponding to \emph{transcript} biotype.}

\item{gene_biotype_col}{Character vector of length 1. Tag in \code{attribute}
field corresponding to \emph{gene} biotype.}

\item{transcript_biotype_use}{Character, can be "all" or
a vector of \emph{transcript} biotypes to be used. Transcript biotypes aren't
entirely the same as gene biotypes. For instance, in Ensembl annotation,
\code{retained_intron} is a transcript biotype, but not a gene biotype. If
"cellranger", then a warning will be given. See \code{data("ensembl_tx_biotypes")}
for all available transcript biotypes from Ensembl.}

\item{gene_biotype_use}{Character, can be "all", "cellranger", or
a vector of \emph{gene} biotypes to be used. If "cellranger", then the biotypes
used by Cell Ranger's reference are used. See \code{data("cellranger_biotypes")}
for gene biotypes the Cell Ranger reference uses. See
\code{data("ensembl_gene_biotypes")} for all available gene biotypes from Ensembl.
Note that gene biotypes and transcript biotypes are not always the same.}

\item{chrs_only}{Logical, whether to include chromosomes only, for GTF and
GFF files can contain annotations for scaffolds, which are not incorporated
into chromosomes. This will also exclude haplotypes. Defaults to \code{TRUE}.
Only applicable to species found in \code{genomeStyles()}.}

\item{save_filtered_gtf}{Logical. If filtering type, biotypes, and/or
chromosomes, whether to save the filtered \code{GRanges} as a GTF file.}

\item{compress_fa}{Logical, whether to compress the output fasta file. If
\code{TRUE}, then the fasta file will be gzipped.}

\item{width}{Maximum number of letters per line of sequence in the output
fasta file. Must be an integer.}
}
\value{
See \code{\link{get_velocity_files}}
}
\description{
Generate RNA velocity files for GRanges
}
