% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sequenceEncoder.R
\name{sequenceEncoder}
\alias{sequenceEncoder}
\alias{onehotEncoder}
\alias{propertyEncoder}
\alias{geometricEncoder}
\title{Universal Amino-acid Sequence Encoder}
\usage{
sequenceEncoder(
  input.sequences,
  mode = c("onehot", "property", "geometric"),
  property.set = NULL,
  property.matrix = NULL,
  method = "BLOSUM62",
  theta = pi/3,
  sequence.dictionary = amino.acids,
  padding.symbol = ".",
  summary.fun = "",
  max.length = NULL,
  nthreads = parallel::detectCores(),
  verbose = TRUE,
  ...
)

onehotEncoder(..., mode = "onehot")

propertyEncoder(..., mode = "property")

geometricEncoder(..., mode = "geometric")
}
\arguments{
\item{input.sequences}{`character` vector. Sequences (uppercase
single-letter code).}

\item{mode}{Either `"onehot"`, `"property"`, or `"geometric"`.}

\item{property.set}{Character string (one of the supported names) 
 Defaults to `"atchleyFactors"`, but includes: `"crucianiProperties"`, 
`"FASGAI"`, `"kideraFactors"`, `"MSWHIM"`, `"ProtFP"`, `"stScales"`, 
`"tScales"`, `"VHSE"`, `"zScales"` Ignored if `property.matrix` is supplied.}

\item{property.matrix}{*Optional numeric matrix (`20 × P`)*. Overrides
`property.set` in `"property"` mode.}

\item{method}{*(For geometric mode)* Character key for a built-in substitution
matrix (e.g., "BLOSUM62"), or a 20x20 numeric matrix itself.}

\item{theta}{*(For geometric mode)* Rotation angle in radians (default `pi/3`).}

\item{sequence.dictionary}{Character vector of the alphabet (default = 20
standard amino acids).}

\item{padding.symbol}{Single character for right-padding (non-geometric modes).}

\item{summary.fun}{For property mode only: `"mean"` or `""` (none).}

\item{max.length}{Integer for truncation/padding. If `NULL` (default), the
longest sequence sets the maximum. Not used in geometric mode.}

\item{nthreads}{Number of threads for C++ backend. Not used in geometric mode.}

\item{verbose}{Logical. If `TRUE` (default), prints a progress message.}

\item{...}{Additional arguments passed to `sequenceEncoder()` when using 
wrapper functions (`onehotEncoder`, `propertyEncoder`, `geometricEncoder`).}
}
\value{
A named `list` containing the encoded data and metadata.
\describe{
  \item{`cube`}{3D Numeric array. `NULL` in geometric mode.}
  \item{`flattened`}{2D Numeric matrix. `NULL` in geometric mode.}
  \item{`summary`}{2D Numeric matrix containing sequence-level representations.
    This is the primary output for geometric mode.}
  \item{...}{Other metadata related to the encoding process.}
}
}
\description{
`sequenceEncoder()` is a high-level function that converts a character vector
of amino-acid sequences into one of three representations:
1.  **one-hot**: A binary representation for each amino acid position.
2.  **property-based**: A numerical representation based on amino acid properties
    (e.g., atchleyFactors, kideraFactors, etc).
3.  **geometric**: A fixed-length 20-dimensional vector for each sequence,
    derived from a substitution matrix and geometric rotation.
}
\details{
The function acts as a wrapper for either the C++ backend (for one-hot and
property modes) or the R-based geometric transformation.
}
\section{Property Mode}{

If you supply `property.matrix` directly, it **must** be a numeric matrix
whose **rows correspond to the 20 canonical amino acids in the order of
`sequence.dictionary`** and whose columns are the property scales.
}

\section{Geometric Mode}{

This mode projects sequences into a 20D space. It calculates the average
vector for each sequence using a substitution matrix (e.g., "BLOSUM62")
and then applies a planar rotation to the resulting vector.
}

\examples{
aa <- c("CARDRST", "YYYGMD", "ACACACAC")

# One-hot encoding
enc_onehot <- sequenceEncoder(aa, 
                              mode = "onehot")

# Property-based encoding
enc_prop <- sequenceEncoder(aa, 
                            mode = "property", 
                            property.set = "atchleyFactors")

# Geometric encoding
enc_geo <- sequenceEncoder(aa, 
                           mode = "geometric", 
                           method = "BLOSUM62")

}
