% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulation.R
\name{generate_reads}
\alias{generate_reads}
\title{Generate synthetic sequence reads}
\usage{
generate_reads(
  n = 10000,
  seq = NULL,
  output = NULL,
  adapter = NULL,
  srna_length = NULL,
  read_length = 150,
  mismatch_prob = 0,
  peaks = NULL,
  read_name_prefix = NULL
)
}
\arguments{
\item{n}{Number of reads should be generated.}

\item{seq}{A file path to a genome sequence in FASTA format file or
a string of genome sequence.}

\item{output}{A file path to store the synthetic reads in FASTQ format file.
The extension should be one of \code{.fq}, \code{.fastq}.
Note that to compress the FASTQ format file, add \code{.gz} or \code{.gzip} to the
extension (e.g., \code{.fq.gz}, \code{.fq.gzip}).}

\item{adapter}{A path to a FASTA format file containing a string of adapter
sequence.
If \code{NULL} is specified, the sequence "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
is used as the adapter sequence.
If \code{NA} is specified, the adapter sequence is not included in the
synthetic reads.}

\item{srna_length}{A data frame to specify the lengths of sequence reads
sampled from the genome sequence.
The data frame should contain two columns named as \code{length} and \code{prob}.
The values in the \code{length} column is used to specify
the lengths of sequence reads;
the values in the \code{prob} column is used to specify the probability
that reads with specified length among all reads.
If the argument is not given (i.e., \code{srna_length = NULL}),
a data frame is randomly generated before sampling the reads.}

\item{read_length}{The length of synthetic reads.
If \code{adapter} is specified, the reads are generated by concatenating
sequence reads and adapter sequences until the specified length.
If \code{adapter = None}, ignore this argument.}

\item{mismatch_prob}{A vector to specify probabilities
of mismatches occurring in the reads.
In order not to allow any mismatches in the reads,
set the argument to \code{0}.
To allow multiple mismatches in the reads, set multiple probabilities
(e.g., \code{c(0.05, 0.01)}).}

\item{peaks}{A data frame to specify the peaks of the alignment coverage.
The data frame should contain four columns named as \code{mean}, \code{std},
\code{strand}, and \code{prob}.
The values in the \code{mean} and \code{std} columns are used to sample the start
position of sequence reads from the genome sequence given by \code{seq}.
The values in the \code{strand} column should be \code{+} or \code{-} to specify
which read strand generates the peak.
The values in the \code{prob} column should be probabilities to use the
\code{mean}, \code{std}, and \code{strand} of the same row for read generation.
If the argument is not given (i.e., \code{peaks = NULL}),
a data frame is randomly generated before sampling the reads.}

\item{read_name_prefix}{The prefix of read name in FASTQ format file.
If \code{NULL}, generate the prefix randomly.}
}
\value{
A \code{\link{CircSeqAlignTkSim-class}} object containing parameters
for read generation.
}
\description{
This function generates synthetic sequence reads to mimic RNA-seq reads
sequenced from organelles or organisms with circular genome sequences
in FASTQ format file.
}
\examples{
output_dpath <- tempdir()

sim <- generate_reads(output = file.path(output_dpath, 'sample1.fq.gz'))

srna_length <- data.frame(length = c(21, 22, 23, 24),
                          prob = c(0.5, 0.3, 0.1, 0.1))
sim <- generate_reads(output = file.path(output_dpath, 'sample2.fq.gz'),
                      srna_length = srna_length)


sim <- generate_reads(output = file.path(output_dpath, 'sample3.fq.gz'),
                      mismatch_prob = c(0.1, 0.1))


peaks <- data.frame(mean =   c( 50, 100, 150),
                    std =    c(  3,   5,   5),
                    strand = c('+', '-', '+'),
                    prob =   c(0.4, 0.4, 0.2))
sim <- generate_reads(output = file.path(output_dpath, 'sample4.fq.gz'),
                      peaks = peaks)
}
\seealso{
\code{\link{CircSeqAlignTkSim-class}}
}
