% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/splsda.R
\name{splsda}
\alias{splsda}
\title{Sparse Partial Least Squares Discriminant Analysis (sPLS-DA)}
\usage{
splsda(
  X,
  Y,
  ncomp = 2,
  keepX,
  scale = TRUE,
  tol = 1e-06,
  max.iter = 100,
  near.zero.var = FALSE,
  logratio = "none",
  multilevel = NULL,
  all.outputs = TRUE
)
}
\arguments{
\item{X}{numeric matrix of predictors with the rows as individual
observations. missing values (\code{NA}s) are allowed.}

\item{Y}{a factor or a class vector for the discrete outcome.}

\item{ncomp}{Positive Integer. The number of components to include in the
model. Default to 2.}

\item{keepX}{numeric vector of length \code{ncomp}, the number of variables
to keep in \eqn{X}-loadings. By default all variables are kept in the model.}

\item{scale}{Logical. If scale = TRUE, each block is standardized to zero
means and unit variances (default: TRUE)}

\item{tol}{Positive numeric used as convergence criteria/tolerance during the
iterative process. Default to \code{1e-06}.}

\item{max.iter}{Integer, the maximum number of iterations. Default to  100.}

\item{near.zero.var}{Logical, see the internal \code{\link{nearZeroVar}}
function (should be set to TRUE in particular for data with many zero
values). Setting this argument to FALSE (when appropriate) will speed up the
computations. Default value is FALSE.}

\item{logratio}{Character, one of ('none','CLR') specifies
the log ratio transformation to deal with compositional values that may
arise from specific normalisation in sequencing data. Default to 'none'.
See \code{?logratio.transfo} for details.}

\item{multilevel}{sample information for multilevel decomposition for
repeated measurements. A numeric matrix or data frame indicating the
repeated measures on each individual, i.e. the individuals ID. See examples
in \code{?splsda}.}

\item{all.outputs}{Logical. Computation can be faster when some specific
(and non-essential) outputs are not calculated. Default = \code{TRUE}.}
}
\value{
\code{splsda} returns an object of class \code{"splsda"}, a list
that contains the following components:

\item{X}{the centered and standardized original predictor matrix.}
\item{Y}{the centered and standardized indicator response vector or matrix.}
\item{ind.mat}{the indicator matrix.} \item{ncomp}{the number of components
included in the model.} \item{keepX}{number of \eqn{X} variables kept in the
model on each component.} \item{variates}{list containing the variates.}
\item{loadings}{list containing the estimated loadings for the \code{X} and
\code{Y} variates.} \item{names}{list containing the names to be used for
individuals and variables.} \item{nzv}{list containing the zero- or
near-zero predictors information.} \item{tol}{the tolerance used in the
iterative algorithm, used for subsequent S3 methods} \item{iter}{Number of
iterations of the algorithm for each component} \item{max.iter}{the maximum
number of iterations, used for subsequent S3 methods} \item{scale}{Logical
indicating whether the data were scaled in MINT S3 methods}
\item{logratio}{whether logratio transformations were used for compositional
data} \item{prop_expl_var}{Proportion of variance explained per
component after setting possible missing values in the data to zero (note
that contrary to PCA, this amount may not decrease as the aim of the method
is not to maximise the variance, but the covariance between X and the
dummy matrix Y).} \item{mat.c}{matrix of coefficients from the regression of
X / residual matrices X on the X-variates, to be used internally by
\code{predict}.} \item{defl.matrix}{residual matrices X for each dimension.}
}
\description{
Function to perform sparse Partial Least Squares to classify samples
(supervised analysis) and select variables.
}
\details{
\code{splsda} function fits an sPLS model with \eqn{1, \ldots ,}\code{ncomp}
components to the factor or class vector \code{Y}. The appropriate indicator
(dummy) matrix is created. 

Logratio transformation and multilevel analysis are
performed sequentially as internal pre-processing step, through
\code{\link{logratio.transfo}} and \code{\link{withinVariation}}
respectively. Logratio can only be applied if the data do not contain any 0 value (for
count data, we thus advise the normalise raw data with a 1 offset).

The type of deflation used is \code{'regression'} for discriminant algorithms.
i.e. no deflation is performed on Y.
}
\examples{
## First example
data(breast.tumors)
X <- breast.tumors$gene.exp
# Y will be transformed as a factor in the function,
# but we set it as a factor to set up the colors.
Y <- as.factor(breast.tumors$sample$treatment)

res <- splsda(X, Y, ncomp = 2, keepX = c(25, 25))


# individual names appear
plotIndiv(res, ind.names = Y, legend = TRUE, ellipse =TRUE)

\dontrun{
## Second example: one-factor analysis with sPLS-DA, selecting a subset of variables
# as in the paper Liquet et al.
#--------------------------------------------------------------
data(vac18)
X <- vac18$genes
Y <- vac18$stimulation
# sample indicates the repeated measurements
design <- data.frame(sample = vac18$sample)
Y = data.frame(stimul = vac18$stimulation)

# multilevel sPLS-DA model
res.1level <- splsda(X, Y = Y, ncomp = 3, multilevel = design,
keepX = c(30, 137, 123))

# set up colors for plotIndiv
col.stim <- c("darkblue", "purple", "green4","red3")
plotIndiv(res.1level, ind.names = Y, col = col.stim)

## Third example: two-factor analysis with sPLS-DA, selecting a subset of variables
# as in the paper Liquet et al.
#--------------------------------------------------------------

data(vac18.simulated) # simulated data

X <- vac18.simulated$genes
design <- data.frame(sample = vac18.simulated$sample)
Y = data.frame( stimu = vac18.simulated$stimulation,
time = vac18.simulated$time)

res.2level <- splsda(X, Y = Y, ncomp = 2, multilevel = design,
keepX = c(200, 200))

plotIndiv(res.2level, group = Y$stimu, ind.names = vac18.simulated$time,
legend = TRUE, style = 'lattice')



## Fourth example: with more than two classes
# ------------------------------------------------

data(liver.toxicity)
X <- as.matrix(liver.toxicity$gene)
# Y will be transformed as a factor in the function,
# but we set it as a factor to set up the colors.
Y <- as.factor(liver.toxicity$treatment[, 4])

splsda.liver <- splsda(X, Y, ncomp = 2, keepX = c(20, 20))

# individual name is set to the treatment
plotIndiv(splsda.liver, ind.names = Y, ellipse = TRUE, legend = TRUE)


## Fifth example: 16S data with multilevel decomposion and log ratio transformation
# ------------------------------------------------

data(diverse.16S)
splsda.16S = splsda(
X = diverse.16S$data.TSS,  # TSS normalised data
Y =  diverse.16S$bodysite,
multilevel = diverse.16S$sample, # multilevel decomposition
ncomp = 2,
keepX =  c(10, 150),
logratio= 'CLR')  # CLR log ratio transformation


plotIndiv(splsda.16S, ind.names = FALSE, pch = 16, ellipse = TRUE, legend = TRUE)
#OTUs selected at the family level
diverse.16S$taxonomy[selectVar(splsda.16S, comp = 1)$name,'Family']
}
}
\references{
On sPLS-DA: Lê Cao, K.-A., Boitard, S. and Besse, P. (2011).
Sparse PLS Discriminant Analysis: biologically relevant feature selection
and graphical displays for multiclass problems. \emph{BMC Bioinformatics}
\bold{12}:253. On log ratio transformations: Filzmoser, P., Hron, K.,
Reimann, C.: Principal component analysis for compositional data with
outliers. Environmetrics 20(6), 621-632 (2009) Lê Cao K.-A., Costello ME,
Lakis VA, Bartolo, F,Chua XY, Brazeilles R, Rondeau P. MixMC: Multivariate
insights into Microbial Communities. PLoS ONE, 11(8): e0160169 (2016). On
multilevel decomposition: Westerhuis, J.A., van Velzen, E.J., Hoefsloot,
H.C., Smilde, A.K.: Multivariate paired data analysis: multilevel plsda
versus oplsda. Metabolomics 6(1), 119-128 (2010) Liquet, B., Lê Cao K.-A.,
Hocini, H., Thiebaut, R.: A novel approach for biomarker selection and the
integration of repeated measures experiments from two assays. BMC
bioinformatics 13(1), 325 (2012)
}
\seealso{
\code{\link{spls}}, \code{\link{summary}}, \code{\link{plotIndiv}},
\code{\link{plotVar}}, \code{\link{cim}}, \code{\link{network}},
\code{\link{predict}}, \code{\link{perf}}, \code{\link{mint.block.splsda}},
\code{\link{block.splsda}} and http://www.mixOmics.org for more details.
}
\author{
Florian Rohart, Ignacio González, Kim-Anh Lê Cao, Al J abadi
}
\keyword{multivariate}
\keyword{regression}
