% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-sequence.R
\name{utils-sequence}
\alias{utils-sequence}
\alias{calc_complexity}
\alias{calc_windows}
\alias{count_klets}
\alias{get_klets}
\alias{mask_ranges}
\alias{mask_seqs}
\alias{meme_alph}
\alias{shuffle_string}
\alias{slide_fun}
\alias{window_string}
\title{Sequence-related utility functions.}
\usage{
calc_complexity(string, complexity.method = c("WoottonFederhen",
  "WoottonFederhenFast", "Trifonov", "TrifonovFast", "DUST"), alph = NULL,
  trifonov.max.word.size = 7)

calc_windows(n, window = 1, overlap = 0, return.incomp = TRUE)

count_klets(string, k = 1, alph)

get_klets(lets, k = 1)

mask_ranges(seqs, ranges, letter = "-")

mask_seqs(seqs, pattern, RC = FALSE, letter = "-")

meme_alph(core, file = stdout(), complements = NULL, ambiguity = NULL,
  like = NULL, alph.name = NULL, letter.names = NULL, colours = NULL)

shuffle_string(string, k = 1, method = c("euler", "linear", "markov"),
  rng.seed = sample.int(10000, 1))

slide_fun(string, FUN, FUN.VALUE, window = 1, overlap = 0,
  return.incomp = TRUE)

window_string(string, window = 1, overlap = 0, return.incomp = TRUE,
  nthreads = 1)
}
\arguments{
\item{string}{\code{character(1)} A character vector containing a single string,
with the exception of \code{\link[=calc_complexity]{calc_complexity()}} where \code{string} can be a length
greater than one.}

\item{complexity.method}{\code{character(1)} Complexity algorithm. See
\code{\link[=sequence_complexity]{sequence_complexity()}}.}

\item{alph}{\code{character(1)} A single character string with the desired
sequence alphabet. If missing, finds the unique letters within each
string.}

\item{trifonov.max.word.size}{\code{integer(1)} Maximum word size for use
in the Trifonov complexity methods. See \code{\link[=sequence_complexity]{sequence_complexity()}}.}

\item{n}{\code{integer(1)} Total size from which to calculate sliding windows.}

\item{window}{\code{integer(1)} Window size to slide along.}

\item{overlap}{\code{integer(1)} Overlap size between windows.}

\item{return.incomp}{\code{logical(1)} Whether to return the last window if it is
smaller then the requested window size.}

\item{k}{\code{integer(1)} K-let size.}

\item{lets}{\code{character} A character vector where each element will be
considered a single unit.}

\item{seqs}{\code{XStringSet} Sequences to mask. Cannot be \code{BStringSet}.}

\item{ranges}{\code{GRanges} The ranges to mask. Must be a \code{GRanges} object
from the \code{GenomicRanges} package.}

\item{letter}{\code{character(1)} Character to use for masking.}

\item{pattern}{\code{character(1)} Pattern to mask.}

\item{RC}{\code{logical(1)} Whether to mask the reverse complement of the pattern.}

\item{core}{\code{character(1)} Core alphabet symbols. If complements are also
provided, then only half of the letters should be provided to this argument.}

\item{file}{Output file.}

\item{complements}{\code{character(1)}, \code{NULL} Complementary letters to the core symbols.}

\item{ambiguity}{\code{character(1)}, \code{NULL} A named vector providing ambiguity codes for
the custom alphabet.}

\item{like}{\code{character(1)}, \code{NULL} How to classify the custom alphabet. If not
\code{NULL}, then one of \code{c("DNA", "RNA", "PROTEIN")}.}

\item{alph.name}{\code{character(1)}, \code{NULL} Custom alphabet name.}

\item{letter.names}{\code{character}, \code{NULL} Named vector of core symbol names.}

\item{colours}{\code{character}, \code{NULL} Named vector of core symbol colours.
MEME requires hex colours.}

\item{method}{\code{character(1)} Shuffling method. One of \code{c("euler", "linear", "markov")}. See \code{\link[=shuffle_sequences]{shuffle_sequences()}}.}

\item{rng.seed}{\code{numeric(1)} Set random number generator seed. Since shuffling
in \code{\link[=shuffle_sequences]{shuffle_sequences()}} can occur simultaneously in multiple threads using C++,
it cannot communicate
with the regular \code{R} random number generator state and thus requires an
independent seed. Since \code{\link[=shuffle_string]{shuffle_string()}} uses the same underlying code
as \code{\link[=shuffle_sequences]{shuffle_sequences()}}, it also requires a separate seed even if it is
run in serial.}

\item{FUN}{\code{closure} The function to apply per window. (See \code{?vapply}.)}

\item{FUN.VALUE}{The expected return type for \code{FUN}. (See \code{?vapply}.)}

\item{nthreads}{\code{integer(1)} Number of threads to use. Zero uses all
available threads.}
}
\value{
For \code{\link[=calc_complexity]{calc_complexity()}}: A vector of \code{numeric} values.

For \code{\link[=calc_windows]{calc_windows()}}: A \code{data.frame} with columns \code{start} and \code{stop}.

For \code{\link[=count_klets]{count_klets()}}: A \code{data.frame} with columns \code{lets} and \code{counts}.

For \code{\link[=get_klets]{get_klets()}}: A \code{character} vector of k-lets.

For \code{\link[=mask_ranges]{mask_ranges()}}: The masked \code{XStringSet} object.

For \code{\link[=mask_seqs]{mask_seqs()}}: The masked \code{XStringSet} object.

For \code{\link[=meme_alph]{meme_alph()}}: \code{NULL}, invisibly.

For \code{\link[=shuffle_string]{shuffle_string()}}: A single \code{character} string.

For \code{\link[=slide_fun]{slide_fun()}}: A vector with type \code{FUN.VALUE}.

For \code{\link[=window_string]{window_string()}}: A \code{character} vector.
}
\description{
Sequence-related utility functions.
}
\examples{
#######################################################################
## calc_complexity
## Calculate complexity for abitrary strings
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhen")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhenFast")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "Trifonov")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "TrifonovFast")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "DUST")

#######################################################################
## calc_windows
## Calculate window coordinates for any value 'n'.
calc_windows(100, 10, 5)

#######################################################################
## count_klets
## Count k-lets for any string of characters
count_klets("GCAAATGTACGCAGGGCCGA", k = 2)
## The default 'k' value (1) counts individual letters
count_klets("GCAAATGTACGCAGGGCCGA")

#######################################################################
## get_klets
## Generate all possible k-lets for a set of characters
get_klets(c("A", "C", "G", "T"), 3)
## Note that each element in 'lets' is considered a single unit;
## see:
get_klets(c("AA", "B"), k = 2)

#######################################################################
## mask_ranges
## Mask arbitrary ranges
if (requireNamespace("GenomicRanges", quiet = TRUE)) {
ranges <- GenomicRanges::GRanges("A", IRanges::IRanges(1, 5))
seq <- Biostrings::DNAStringSet(c(A = "ATGACTGATTACTTATA"))
mask_ranges(seq, ranges, "-")
}

#######################################################################
## mask_seqs
## Mask repetitive seqeuences
data(ArabidopsisPromoters)
mask_seqs(ArabidopsisPromoters, "AAAAAA")

#######################################################################
## meme_alph
## Create MEME custom alphabet definition files
meme_alph("ACm", complements = "TGM", alph.name = "MethDNA",
  letter.names = c(A = "Adenine", C = "Cytosine", G = "Guanine",
    T = "Thymine", m = "Methylcytosine", M = "mC:Guanine"),
  like = "DNA", ambiguity = c(N = "ACGTmM"))

#######################################################################
## shuffle_string
## Shuffle any string of characters
shuffle_string("ASDADASDASDASD", k = 1)

#######################################################################
## slide_fun
## Apply a function to a character vector along sliding windows
FUN <- function(x) grepl("[GC]", x)
data.frame(
  Window = window_string("ATGCATCTATGCA", 2, 1),
  HasGC = slide_fun("ATGCATCTATGCA", FUN, logical(1), 2, 1)
)

#######################################################################
## window_string
## Get sliding windows for a string of characters
window_string("ABCDEFGHIJ", 2, 1)

}
\seealso{
\code{\link[=create_sequences]{create_sequences()}}, \code{\link[=get_bkg]{get_bkg()}}, \code{\link[=sequence_complexity]{sequence_complexity()}},
\code{\link[=shuffle_sequences]{shuffle_sequences()}}
}
\author{
Benjamin Jean-Marie Tremblay, \email{benjamin.tremblay@uwaterloo.ca}
}
