% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/packClust.R
\name{packClust}
\alias{packClust}
\title{Cluster Transposons with VSEARCH}
\usage{
packClust(
  packMatches,
  Genome,
  identity = 0.6,
  threads = 1,
  identityDefinition = 2,
  maxWildcards = 0.05,
  strand = "both",
  saveFolder = NULL,
  vSearchPath = "vsearch"
)
}
\arguments{
\item{packMatches}{A dataframe of potential Pack-TYPE transposable elements, 
in the format given by \code{\link{packSearch}}. This 
dataframe is in the format produced by coercing a 
\code{link[GenomicRanges:GRanges-class]{GRanges}} 
object to a dataframe: \code{data.frame(GRanges)}. 
Will be saved as a FASTA file for VSEARCH.}

\item{Genome}{A DNAStringSet object containing sequences referred to 
in \code{packMatches} (the object originally used to 
predict the transposons \code{\link{packSearch}}).}

\item{identity}{The sequence identity of two transposable elements in 
\code{packMatches} required to be grouped into a cluster.}

\item{threads}{The number of threads to be used by VSEARCH.}

\item{identityDefinition}{The pairwise identity definition used by VSEARCH. 
Defaults to 2, the standard VSEARCH definition.}

\item{maxWildcards}{The maximal allowable proportion of wildcards in the 
sequence of each match (defaults to \code{0.05}).}

\item{strand}{The strand direction (+, - or *) to be clustered.}

\item{saveFolder}{The folder to save output files (uc, blast6out, FASTA)}

\item{vSearchPath}{When the package is run on windows systems, the 
location of the VSEARCH executable file must be 
given; this should be left as default on 
Linux/MacOS systems.}
}
\value{
Saves cluster information, including a \code{uc} and 
\code{blast6out} file, to the specified location. Returns 
the given \code{packMatches} dataframe with an additional 
column, \code{cluster}, containing cluster IDs.
}
\description{
Cluster potential pack-TYPE elements by sequence 
similarity. Resulting groups may be aligned with 
\code{\link{packAlign}}, or the clusters may be 
analysed with \code{\link{tirClust}}
}
\note{
In order to cluster sequences using VSEARCH, the 
executable file must first be installed.
}
\examples{
data(arabidopsisThalianaRefseq)
data(packMatches)

# packClust run on a Linux/MacOS system
\dontrun{
    packClust(packMatches, Genome)
}

# packClust run on a Windows system
\dontrun{
    packClust(packMatches, Genome, 
            vSearchPath = "path/to/vsearch/vsearch.exe")
}

}
\references{
VSEARCH may be downloaded from 
\url{https://github.com/torognes/vsearch}. See 
\url{https://www.ncbi.nlm.nih.gov/pubmed/27781170} 
for further information.
}
\seealso{
\code{\link{tirClust}}, \code{\link{packAlign}}, 
\code{\link{readBlast}}, \code{\link{readUc}},
\code{\link{filterWildcards}}, \code{\link{packSearch}}
}
\author{
Jack Gisby
}
