% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CompDbSource.R, R/matchSpectra.R
\name{matchSpectra,Spectra,CompDbSource,Param-method}
\alias{matchSpectra,Spectra,CompDbSource,Param-method}
\alias{CompareSpectraParam}
\alias{CompareSpectraParam-class}
\alias{MatchForwardReverseParam-class}
\alias{MatchForwardReverseParam}
\alias{matchSpectra,Spectra,Spectra,CompareSpectraParam-method}
\alias{matchSpectra,Spectra,CompDb,Param-method}
\alias{matchSpectra,Spectra,Spectra,MatchForwardReverseParam-method}
\title{Matching MS Spectra against a reference}
\usage{
\S4method{matchSpectra}{Spectra,CompDbSource,Param}(
  query,
  target,
  param,
  BPPARAM = BiocParallel::SerialParam(),
  addOriginalQueryIndex = TRUE
)

CompareSpectraParam(
  MAPFUN = joinPeaks,
  tolerance = 0,
  ppm = 5,
  FUN = MsCoreUtils::ndotproduct,
  requirePrecursor = TRUE,
  requirePrecursorPeak = FALSE,
  THRESHFUN = function(x) which(x >= 0.7),
  toleranceRt = Inf,
  percentRt = 0,
  matchedPeaksCount = FALSE,
  ...
)

MatchForwardReverseParam(
  MAPFUN = joinPeaks,
  tolerance = 0,
  ppm = 5,
  FUN = MsCoreUtils::ndotproduct,
  requirePrecursor = TRUE,
  requirePrecursorPeak = FALSE,
  THRESHFUN = function(x) which(x >= 0.7),
  THRESHFUN_REVERSE = NULL,
  toleranceRt = Inf,
  percentRt = 0,
  ...
)

\S4method{matchSpectra}{Spectra,Spectra,CompareSpectraParam}(
  query,
  target,
  param,
  rtColname = c("rtime", "rtime"),
  BPPARAM = BiocParallel::SerialParam(),
  addOriginalQueryIndex = TRUE
)

\S4method{matchSpectra}{Spectra,CompDb,Param}(
  query,
  target,
  param,
  rtColname = c("rtime", "rtime"),
  BPPARAM = BiocParallel::SerialParam(),
  addOriginalQueryIndex = TRUE
)

\S4method{matchSpectra}{Spectra,Spectra,MatchForwardReverseParam}(
  query,
  target,
  param,
  rtColname = c("rtime", "rtime"),
  BPPARAM = BiocParallel::SerialParam(),
  addOriginalQueryIndex = TRUE
)
}
\arguments{
\item{query}{for \code{matchSpectra}: \link[Spectra:Spectra]{Spectra::Spectra} object with the query
spectra.}

\item{target}{for \code{matchSpectra}: \link[Spectra:Spectra]{Spectra::Spectra}, \link[CompoundDb:CompDb]{CompoundDb::CompDb}
or object extending \link{CompAnnotationSource} (such as \link{CompDbSource}) with
the target (reference) spectra to compare \code{query} against.}

\item{param}{for \code{matchSpectra}: parameter object (such as
\code{CompareSpectraParam}) defining the settings for the matching.}

\item{BPPARAM}{for \code{matchSpectra}: parallel processing setup (see the
\code{BiocParallel} package for more information). Parallel processing is
disabled by default (with the default setting \code{BPPARAM = SerialParam()}).}

\item{addOriginalQueryIndex}{for \code{matchSpectra()}: \code{logical(1)} whether an
additional spectra variable \code{".original_query_index"} should be added to
the \code{query} \code{Spectra} object providing the index of the spectrum in this
originally provided object. This spectra variable can be useful to link
back to the original \code{Spectra} object if the \code{MatchedSpectra} object gets
subsetted/processed.}

\item{MAPFUN}{\code{function} used to map peaks between the compared spectra.
Defaults for \code{CompareSpectraParam} to \code{\link[Spectra:joinPeaks]{Spectra::joinPeaks()}}. See
\code{\link[Spectra:compareSpectra]{Spectra::compareSpectra()}} for details.}

\item{tolerance}{\code{numeric(1)} for an absolute maximal accepted difference
between m/z values. This will be used in \code{compareSpectra} as well as for
eventual precursor m/z matching.}

\item{ppm}{\code{numeric(1)} for a relative, m/z-dependent, maximal accepted
difference between m/z values. This will be used in \code{compareSpectra} as
well as for eventual precursor m/z matching.}

\item{FUN}{\code{function} used to calculate similarity between spectra. Defaults
for \code{CompareSpectraParam} to \code{\link[MsCoreUtils:distance]{MsCoreUtils::ndotproduct()}}. See
\code{\link[MsCoreUtils:distance]{MsCoreUtils::ndotproduct()}} for details.}

\item{requirePrecursor}{\code{logical(1)} whether only target spectra are
considered in the similarity calculation with a precursor m/z that matches
the precursor m/z of the query spectrum (considering also \code{ppm} and
\code{tolerance}). With \code{requirePrecursor = TRUE} (the default) the function
will complete much faster, but will not find any hits for target (or query
spectra) with missing precursor m/z. It is suggested to check first the
availability of the precursor m/z in \code{target} and \code{query}.}

\item{requirePrecursorPeak}{\code{logical(1)} whether only target spectra will be
considered in the spectra similarity calculation that have a peak with an
m/z matching the precursor m/z of the query spectrum. Defaults to
\code{requirePrecursorPeak = FALSE}. It is suggested to check first the
availability of the precursor m/z in \code{query}, as no match will be reported
for query spectra with missing precursor m/z.}

\item{THRESHFUN}{\code{function} applied to the similarity score to define which
target spectra are considered \emph{matching}. Defaults to
\code{THRESHFUN = function(x) which(x >= 0.7)} hence selects
all target spectra matching a query spectrum with a similarity higher or
equal than \code{0.7}. Any function that takes a numeric vector with similarity
scores from the comparison of a query spectrum with all target spectra (as
returned by \code{\link[Spectra:compareSpectra]{Spectra::compareSpectra()}}) as input and returns a
\code{logical} vector (same dimensions as the similarity scores) or an integer
with the matches is supported.}

\item{toleranceRt}{\code{numeric} of length 1 or equal to the number of query
spectra defining the maximal accepted (absolute) difference in retention
time between query and target spectra. By default
(with \code{toleranceRt = Inf}) the retention time-based filter is not
considered. See help of \code{CompareSpectraParam} above for more
information.}

\item{percentRt}{\code{numeric} of length 1 or equal to the number of query
spectra defining the maximal accepted relative difference in retention
time between query and target spectra expressed in percentage of the query
rt. For \code{percentRt = 10}, similarities are defined between the query
spectrum and all target spectra with a retention time within query rt
+/- 10\% of the query. By default (with \code{toleranceRt = Inf}) the retention
time-based filter is not considered. Thus, to consider the \code{percentRt}
parameter, \code{toleranceRt} should be set to a value different than that.
See help of \code{CompareSpectraParam} above for more information.}

\item{matchedPeaksCount}{\code{logical(1)} for \code{CompareSpectraParam()}: whether
also the number of matching peaks should be reported (in column
\code{"matched_peaks_count"}). This number represents the number of peaks
reported \emph{matching} by the \code{MAPFUN}.}

\item{...}{for \code{CompareSpectraParam}: additional parameters passed along
to the \code{\link[Spectra:compareSpectra]{Spectra::compareSpectra()}} call, including eventual additional
parameters of the selected mapping or similarity calculation functions.}

\item{THRESHFUN_REVERSE}{for \code{MatchForwardReverseParam}: optional additional
\emph{thresholding function} to filter the results on the reverse score. If
specified the same format than \code{THRESHFUN} is expected.}

\item{rtColname}{\code{character(2)} with the name of the spectra variable
containing the retention time information for compounds to be used in
retention time matching (only used if \code{toleranceRt} is not \code{Inf}).
It can also be \code{character(1)} if the two names are the same.
Defaults to \code{rtColname = c("rtime", "rtime")}.}
}
\value{
\code{matchSpectra} returns a \code{\link[=MatchedSpectra]{MatchedSpectra()}} object with the matching
results. If \code{target} is a \code{CompAnnotationSource} only matching target
spectra will be reported.

Constructor functions return an instance of the class.
}
\description{
\code{matchSpectra} compares experimental (\emph{query}) MS2 spectra against
reference (\emph{target}) MS2 spectra and reports matches with a similarity that
passing a specified threshold. The function performs the similarity
calculation between each query spectrum against each target spectrum.
Parameters \code{query} and \code{target} can be used to define the query and target
spectra, respectively, while parameter \code{param} allows to define and configure
the similarity calculation and matching condition. Parameter \code{query} takes
a \link[Spectra:Spectra]{Spectra::Spectra} object while \code{target} can be either a
\link[Spectra:Spectra]{Spectra::Spectra} object, a \link[CompoundDb:CompDb]{CompoundDb::CompDb} (reference library)
object defined in the \code{CompoundDb} package or
a \link{CompAnnotationSource} (e.g. a \code{\link[=CompDbSource]{CompDbSource()}})
with the reference or connection information to a supported annotation
resource).

Some notes on performance and information on parallel processing are
provided in the vignette.

Currently supported parameter objects defining the matching are:
\itemize{
\item \code{CompareSpectraParam}: the \emph{generic} parameter object allowing to set all
settings for the \code{\link[Spectra:compareSpectra]{Spectra::compareSpectra()}} call that is used to
perform the similarity calculation.
This includes \code{MAPFUN} and \code{FUN} defining the peak-mapping and similarity
calculation functions and \code{ppm} and \code{tolerance} to define an acceptable
difference between m/z values of the compared peaks. Parameter
\code{matchedPeaksCount} is also passed to \code{compareSpectra()} and, if set to
\code{TRUE} (default is \code{FALSE}) will report the number of peaks defined to be
\emph{matching} by the \code{MAPFUN}.
Additional parameters to the \code{compareSpectra} call can be passed along
with \code{...}. See the help of \code{\link[Spectra:Spectra]{Spectra::Spectra()}} for more information on
these parameters. Importantly, if \emph{msentropy} or a GNPS-like similarity
calculation is used, \code{MAPFUN} should be selected accordingly (see section
\emph{Using alternative spectra similarity functions} in the package vignette
for more information).
By default, parameters \code{ppm} and \code{tolerance} are passed to the similarity
calculation function, but if this function uses different parameters
(e.g., \code{msentropy_similarity()} uses \code{ms2_tolerance_in_ppm} instead of
\code{ppm}), these should be submitted to the \code{CompareSpectraParam()} function
throught the \code{...} parameter.
Parameters \code{requirePrecursor} (default \code{TRUE}) and \code{requirePrecursorPeak}
(default \code{FALSE}) allow to pre-filter the target spectra prior to the
actual similarity calculation for each individual query spectrum.
Parameters \code{ppm} and \code{tolerance} are also used to define the maximal
acceptable difference in precursor m/z if \code{requirePrecursor} or
\code{requirePrecursorPeak} are set to \code{TRUE}.
Target spectra can also be pre-filtered based on
retention time if parameter \code{toleranceRt} is set to a value different than
the default \code{toleranceRt = Inf}. Only target spectra with a retention time
within the query's retention time +/- (\code{toleranceRt} + \code{percentRt}\% of the
query's retention time) are considered. Note that while for \code{ppm} and
\code{tolerance} only a single value is accepted, \code{toleranceRt} and \code{percentRt}
can be also of length equal to the number of query spectra hence allowing
to define different rt boundaries for each query spectrum.
While these pre-filters can considerably improve performance, it should be
noted that no matches will be found between query and target spectra with
missing values in the considered variable (precursor m/z or retention
time). For target spectra without retention times (such as for \code{Spectra}
from a public reference database such as MassBank) the default
\code{toleranceRt = Inf} should thus be used.
Finally, parameter \code{THRESHFUN} allows to define a function to be applied to
the similarity scores to define which matches to report. See below for more
details.
\item \code{MatchForwardReverseParam}: performs spectra matching as with
\code{CompareSpectraParam} but reports, similar to MS-DIAL, also the \emph{reverse}
similarity score and the \emph{presence ratio}. Please refer to the
documentation of \code{CompareSpectraParam} for explanation of the parameters.
With \code{MatchForwardReverseParam}, the matching of query
spectra to target spectra is performed by considering all peaks from the
query and all peaks from the target (reference) spectrum (i.e. \emph{forward}
matching using an \emph{outer join}-based peak matching strategy). For matching
spectra also the \emph{reverse} similarity is calculated considering only peaks
present in the target (reference) spectrum (i.e. using a \emph{right join}-based
peak matching). This is reported as spectra variable \code{"reverse_score"}.
In addition, the ratio between the number of matched peaks and the total
number of peaks in the target (reference) spectra is reported as the
\emph{presence ratio} (spectra variable \code{"presence_ratio"}) and the total
number of matched peaks as \code{"matched_peaks_count"}. See examples below
for details. Parameter \code{THRESHFUN_REVERSE} allows to define an additional
\emph{threshold function} to filter matches. If \code{THRESHFUN_REVERSE} is defined
only matches with a spectra similarity fulfilling both \code{THRESHFUN} \strong{and}
\code{THRESHFUN_REVERSE} are returned. With the default
\code{THRESHFUN_REVERSE = NULL} all matches passing \code{THRESHFUN} are reported.
}
}
\examples{

library(Spectra)
library(msdata)
fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", package = "msdata")
pest_ms2 <- filterMsLevel(Spectra(fl), 2L)

## subset to selected spectra.
pest_ms2 <- pest_ms2[c(808, 809, 945:955)]

## Load a small example MassBank data set
load(system.file("extdata", "minimb.RData", package = "MetaboAnnotation"))

## Match spectra with the default similarity score (normalized dot product)
csp <- CompareSpectraParam(requirePrecursor = TRUE, ppm = 10)
mtches <- matchSpectra(pest_ms2, minimb, csp)

mtches

## Are there any matching spectra for the first query spectrum?
mtches[1]
## No

## And for the second query spectrum?
mtches[2]
## The second query spectrum matches 4 target spectra. The scores for these
## matches are:
mtches[2]$score

## To access the score for the full data set
mtches$score

## Below we use a THRESHFUN that returns for each query spectrum the (first)
## best matching target spectrum.
csp <- CompareSpectraParam(requirePrecursor = FALSE, ppm = 10,
    THRESHFUN = function(x) which.max(x))
mtches <- matchSpectra(pest_ms2, minimb, csp)
mtches

## Each of the query spectra is matched to one target spectrum
length(mtches)
matches(mtches)

## Match spectra considering also measured retention times. This requires
## that both query and target spectra have non-missing retention times.
rtime(pest_ms2)
rtime(minimb)

## Target spectra don't have retention times. Below we artificially set
## retention times to show how an additional retention time filter would
## work.
rtime(minimb) <- rep(361, length(minimb))

## Matching spectra requiring a matching precursor m/z and the difference
## of retention times between query and target spectra to be <= 2 seconds.
csp <- CompareSpectraParam(requirePrecursor = TRUE, ppm = 10,
    toleranceRt = 2)
mtches <- matchSpectra(pest_ms2, minimb, csp)
mtches
matches(mtches)

## Note that parameter `rtColname` can be used to define different spectra
## variables with retention time information (such as retention indices etc).

## A `CompDb` compound annotation database could also be used with
## parameter `target`. Below we load the test `CompDb` database from the
## `CompoundDb` Bioconductor package.
library(CompoundDb)
fl <- system.file("sql", "CompDb.MassBank.sql", package = "CompoundDb")
cdb <- CompDb(fl)
res <- matchSpectra(pest_ms2, cdb, CompareSpectraParam())

## We do however not find any matches since the used compound annotation
## database contains only a very small subset of the MassBank.
res

## As `target` we have now however the MS2 spectra data from the compound
## annotation database
target(res)

## See the package vignette for details, descriptions and more examples,
## also on how to retrieve e.g. MassBank reference databases from
## Bioconductor's AnnotationHub.
}
\author{
Johannes Rainer, Michael Witting
}
