% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scudoClassify.R
\name{scudoClassify}
\alias{scudoClassify}
\title{Performes classification using SCUDO}
\usage{
scudoClassify(trainExpData, testExpData, N, nTop, nBottom,
    trainGroups, maxDist = 1, weighted = TRUE, complete = FALSE, beta = 1,
    alpha = 0.1, foldChange = TRUE, featureSel = TRUE, logTransformed = NULL,
    parametric = FALSE, pAdj = "none", distFun = NULL)
}
\arguments{
\item{trainExpData}{either an
\code{\link[Biobase:class.ExpressionSet]{ExpressionSet}}, a
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{
SummarizedExperiment}}, a data.frame or a matrix of gene expression data,
with a column for each sample and a row for each feature}

\item{testExpData}{either an
\code{\link[Biobase:class.ExpressionSet]{ExpressionSet}}, a
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{
SummarizedExperiment}}, a data.frame or a matrix of gene expression data,
with a column for each sample and a row for each feature}

\item{N}{a number between 0 and 1, representing the fraction of the
signature-to-signature distances that will be used to draw the graph}

\item{nTop}{number of up-regulated features to include in the signatures}

\item{nBottom}{number of down-regulated features to include in the
signatures}

\item{trainGroups}{factor containing group labels for each sample in
\code{trainExpData}}

\item{maxDist}{an integer. Only nodes with a distance from a testing node
less or equal to \code{maxDist} are used to perform the classification}

\item{weighted}{logical, whether to consider the distances associated to the
edges to compute the scores for the classification. For a description of the
classification method, see Details below}

\item{complete}{logical, whether to consider all the nodes in the training
set to perform the classification. If TRUE, the arguments \code{N},
\code{maxDist}, \code{weighted} and \code{beta} are ignored. For a
description of the classification method, see Details below}

\item{beta}{a coefficient used to down-weight the influence of distant nodes
on the classification outcome. For a description of the
classification method, see Details below}

\item{alpha}{p-value cutoff for the optional feature selection step. If
feature selection is skipped, alpha is ignored}

\item{foldChange}{logical, whether or not to compute fold-changes from
expression data}

\item{featureSel}{logical, whether or not to perform a feature selection.
Feature selection is performed using one of four tests: Student's t-test,
ANOVA, Wilcoxon-Mann-Withney test, or Kruskal-Wallis test. The test
used depends on the number of groups and the \code{parametric} argument}

\item{logTransformed}{logical or NULL. It indicates whether the data is
log-transformed. If NULL, an attempt is made to guess if the data is
log-transformed}

\item{parametric}{logical, whether to use a parametric or a non-parametric
test for the feature selection}

\item{pAdj}{pAdj method to use to adjust the p-values in the feature
selection step. See \code{\link[stats:p.adjust]{p.adjust.methods}} for a list
of adjustment methods}

\item{distFun}{the function used to compute the distance between two
samples. See Details of \code{\link{scudoTrain}} for the specification of
this function}
}
\value{
A \code{list} containing a factor with the predicticted class for
each sample in \code{testExpData} and a data.frame of the classification
scores used to generate the predictions.
}
\description{
Performs supervised classification of samples in a testing set using a
network of samples generated by SCUDO during a training step.
}
\details{
This function performs supervised classification of samples in a testing set,
using the networks similar to the one generated by \code{\link{scudoTrain}}
and \code{\link{scudoNetwork}} as a model.

For each sample S in the testing set, a new distance matrix is computed using
the expression profiles in the training set and the expression profile of S.
The distance matrix is computed as described in the Details of
\code{\link{scudoTrain}}.

If the argument \code{complete} is \code{TRUE}, the distance matrix is
converted in a similarity score matrix. Then, the similarity scores between S
and all the samples in the training set are aggregated according to groups.
The mean similarity scores are computed for each group and classification
scores are generated dividing them by their sum, obtaining values bewteen 0
and 1.

If the argument \code{complete} is \code{FALSE}, the distance matrix obtained
form S and the training set is used to generate a network of samples, using
the parameter \code{N} as a threshold for edge selection (see Details of
\code{\link{scudoNetwork}} for a more complete description). Then the
neighbors of S in the network are explored, up to a distance controlled by
the parameter \code{maxDist}. If the \code{weighted} parameter is
\code{FALSE}, the classification scores for each group are computed as the
number of edges connecting S or one of its neighbors to a node of that group.
The scores are than rescaled dividing them by their sum, in order to obtain
values between 0 and 1. If the \code{weighted} parameter is \code{TRUE}, the
classification scores for each group are computed as the sum of the
similarity scores associated to edges connecting S or one of its neighbors to
nodes of that group. The scores are than rescaled dividing them by their sum,
in order to obtain values between 0 and 1. The parameter \code{beta} can be
used to down-weight the contribution to the classification scores of edges
connecting nodes distant form S, both in the weighed and unweighted cases.

The predicted group for each sample is the one with the largest
classification score. Both predictions and classification scores are
returned. Note that if the argument \code{complete} is \code{FALSE}, the
classification socres for a sample may be all zero, which happens when the
correspoonding node is isolated in the network of samples. In this case the
predicted group is \code{NA}.
The tuning of the parameters can be performed automatically using the
\code{\link[caret]{train}} function form the package \code{caret} and the
function \code{\link{scudoModel}}.
}
\examples{
expData <- data.frame(a = 1:10, b = 2:11, c = 10:1, d = 11:2,
    e = c(1:4, 10:5), f = c(7:10, 6:1), g = c(8:4, 1:3, 10, 9),
    h = c(6:10, 5:1), i = c(5:1, 6:10))
rownames(expData) <- letters[1:10]
groups <- factor(c(1,1,1,2,2,2,1,1,1))
inTrain <- 1:5

# perform classification
res <- scudoClassify(expData[, inTrain], expData[, -inTrain], 0.9, 3, 3,
    groups[inTrain], featureSel = FALSE)

#explore predictions
predictions <- res$predicted
scores <- res$scores

}
\seealso{
\code{\link{scudoTrain}}, \code{\link{scudoModel}}
}
\author{
Matteo Ciciani \email{matteo.ciciani@gmail.com}, Thomas Cantore
\email{cantorethomas@gmail.com}
}
