% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mainClustering.R
\name{mainClustering}
\alias{mainClustering}
\alias{mainClustering,character-method}
\alias{mainClustering,ClusterFunction-method}
\alias{getPostProcessingArgs,ClusterFunction-method}
\alias{getPostProcessingArgs}
\title{Cluster distance matrix from subsampling}
\usage{
\S4method{mainClustering}{character}(clusterFunction, ...)

\S4method{mainClustering}{ClusterFunction}(
  clusterFunction,
  inputMatrix,
  inputType,
  clusterArgs = NULL,
  minSize = 1,
  orderBy = c("size", "best"),
  format = c("vector", "list"),
  returnData = FALSE,
  warnings = TRUE,
  ...
)

\S4method{getPostProcessingArgs}{ClusterFunction}(clusterFunction)
}
\arguments{
\item{clusterFunction}{a \code{\link{ClusterFunction}} object that defines
the clustering routine. See \code{\link{ClusterFunction}} for required
format of user-defined clustering routines. User can also give a character
value to the argument \code{clusterFunction} to indicate the use of
clustering routines provided in package. Type
\code{\link{listBuiltInFunctions}} at command prompt to see the built-in
clustering routines. If \code{clusterFunction} is missing, the default is
set to "pam".}

\item{...}{arguments passed to the post-processing steps of the clustering.
The available post-processing arguments for a \code{ClusterFunction} object
depend on it's algorithm type and can be found by calling
\code{getPostProcessingArgs}. See details below for documentation.}

\item{inputMatrix}{numerical matrix on which to run the clustering or a
\code{\link[SummarizedExperiment]{SummarizedExperiment}},
\code{\link{SingleCellExperiment}}, or \code{\link{ClusterExperiment}}
object.}

\item{inputType}{a character vector defining what type of input is given in
the \code{inputMatrix} argument. Must consist of values "diss","X", or
"cat" (see details). "X" and "cat" should be indicate
matrices with features in the row and samples in the column; "cat"
corresponds to the features being numerical integers corresponding to
categories, while "X" are continuous valued features. "diss" corresponds to
an \code{inputMatrix} that is a NxN dissimilarity matrix. "cat" is largely
used internally for clustering of sets of clusterings.}

\item{clusterArgs}{arguments to be passed directly to the \code{clusterFUN}
slot of the \code{ClusterFunction} object}

\item{minSize}{the minimum number of samples in a cluster. Clusters found 
below this size will be discarded and samples in the cluster will be given 
a cluster assignment of "-1" to indicate that they were not clustered.}

\item{orderBy}{how to order the cluster (either by size or by maximum alpha 
value). If orderBy="size" the numbering of the clusters are reordered by 
the size of the cluster, instead of by the internal ordering of the 
\code{clusterFUN} defined in the \code{ClusterFunction} object (an internal
ordering is only possible if slot \code{outputType} of the
\code{ClusterFunction} is \code{"list"}).}

\item{format}{whether to return a list of indices in a cluster or a vector of
clustering assignments. List is mainly for compatibility with sequential 
part.}

\item{returnData}{logical as to whether to return the \code{diss} or \code{x}
matrix in the output. If \code{FALSE} only the clustering vector is
returned.}

\item{warnings}{logical as to whether should give warning if arguments given
that don't match clustering choices given. Otherwise, inapplicable 
arguments will be ignored without warning.}
}
\value{
If \code{returnData=FALSE}, mainClustering returns a vector of cluster assignments (if
  format="vector") or a list of indices for each cluster (if format="list").
  Clusters less than minSize are removed. If \code{returnData=TRUE}, then mainClustering returns a list
\itemize{
\item{results}{The clusterings of each sample.}
\item{inputMatrix}{The input matrix given to argument \code{inputMatrix}. Useful if input is result of subsampling, in which case input is the set of clusterings found over subsampling.}
}
}
\description{
Given input data, this function will try to find the clusters
  based on the given ClusterFunction object.
}
\details{
\code{mainClustering} is not meant to be called by the user. It is
  only an exported function so as to be able to clearly document the
  arguments for \code{mainClustering} which can be passed via the argument
  \code{mainClusterArgs} in functions like \code{\link{clusterSingle}} and
  \code{\link{clusterMany}}.

Post-processing Arguments: For post-processing the clustering,
  currently only type 'K' algorithms have a defined post-processing.
  Specifically
\itemize{
 \item{"findBestK"}{logical, whether should find best K based on average
  silhouette width (only used if clusterFunction of type "K").}
 \item{"kRange"}{vector of integers to try for k values if findBestK=TRUE. If
 \code{k} is given in \code{clusterArgs}, then default is k-2 to k+20,
 subject to those values being greater than 2; if not the default is
 \code{2:20}. Note that default values depend on the input k, so running for
 different choices of k and findBestK=TRUE can give different answers unless
 kRange is set to be the same.}
 \item{"removeSil"}{logical as to whether remove the assignment of a sample
 to a cluster when the sample's silhouette value is less than
 \code{silCutoff}}
 \item{"silCutoff"}{Cutoff on the minimum silhouette width to be included in
  cluster (only used if removeSil=TRUE).}
}
}
\examples{
data(simData)
cl1<-mainClustering(inputMatrix=simData, inputType="X", 
    clusterFunction="pam",clusterArgs=list(k=3))
#supply a dissimilarity, use algorithm type "01"
diss<-as.matrix(dist(t(simData),method="manhattan"))
cl2<-mainClustering(diss, inputType="diss", clusterFunction="hierarchical01",
    clusterArgs=list(alpha=.1))
cl3<-mainClustering(inputMatrix=diss, inputType="diss", clusterFunction="pam",
    clusterArgs=list(k=3))

# run hierarchical method for finding blocks, with method of evaluating
# coherence of block set to evalClusterMethod="average", and the hierarchical
# clustering using single linkage:
# (clustering function requires type 'diss'),
clustSubHier <- mainClustering(diss, inputType="diss",
    clusterFunction="hierarchical01", minSize=5,
    clusterArgs=list(alpha=0.1,evalClusterMethod="average", method="single"))

#post-process results of pam -- must pass diss for silhouette calculation
clustSubPamK <- mainClustering(simData, inputType="X", clusterFunction="pam", 
    silCutoff=0, minSize=5, diss=diss, removeSil=TRUE, clusterArgs=list(k=3))
clustSubPamBestK <- mainClustering(simData, inputType="X", clusterFunction="pam", silCutoff=0,
    minSize=5, diss=diss, removeSil=TRUE, findBestK=TRUE, kRange=2:10)

# note that passing the wrong arguments for an algorithm results in warnings
# (which can be turned off with warnings=FALSE)
clustSubTight_test <- mainClustering(diss, inputType="diss", 
   clusterFunction="tight", 
   clusterArgs=list(alpha=0.1), minSize=5, removeSil=TRUE)
clustSubTight_test2 <- mainClustering(diss, inputType="diss",
   clusterFunction="tight",
   clusterArgs=list(alpha=0.1,evalClusterMethod="average"))
}
