% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/XcmsExperiment.R,
%   R/functions-Params.R, R/methods-XCMSnExp.R
\name{refineChromPeaks}
\alias{refineChromPeaks}
\alias{FilterIntensityParam-class}
\alias{show,FilterIntensityParam-method}
\alias{CleanPeaksParam-class}
\alias{show,CleanPeaksParam-method}
\alias{MergeNeighboringPeaksParam-class}
\alias{show,MergeNeighboringPeaksParam-method}
\alias{refineChromPeaks,XcmsExperiment,CleanPeaksParam-method}
\alias{refineChromPeaks,XcmsExperiment,MergeNeighboringPeaksParam-method}
\alias{refineChromPeaks,XcmsExperiment,FilterIntensityParam-method}
\alias{CleanPeaksParam}
\alias{MergeNeighboringPeaksParam}
\alias{FilterIntensityParam}
\alias{refineChromPeaks,XCMSnExp,CleanPeaksParam-method}
\alias{refineChromPeaks,XCMSnExp,MergeNeighboringPeaksParam-method}
\alias{refineChromPeaks,XCMSnExp,FilterIntensityParam-method}
\title{Refine Identified Chromatographic Peaks}
\usage{
refineChromPeaks(object, param, ...)

\S4method{refineChromPeaks}{XcmsExperiment,CleanPeaksParam}(object, param = CleanPeaksParam(), msLevel = 1L)

\S4method{refineChromPeaks}{XcmsExperiment,MergeNeighboringPeaksParam}(
  object,
  param,
  msLevel = 1L,
  chunkSize = 2L,
  BPPARAM = bpparam()
)

\S4method{refineChromPeaks}{XcmsExperiment,FilterIntensityParam}(
  object,
  param,
  msLevel = 1L,
  chunkSize = 2L,
  BPPARAM = bpparam()
)

CleanPeaksParam(maxPeakwidth = 10)

MergeNeighboringPeaksParam(
  expandRt = 2,
  expandMz = 0,
  ppm = 10,
  minProp = 0.75
)

FilterIntensityParam(threshold = 0, nValues = 1L, value = "maxo")

\S4method{refineChromPeaks}{XCMSnExp,CleanPeaksParam}(object, param = CleanPeaksParam(), msLevel = 1L)

\S4method{refineChromPeaks}{XCMSnExp,MergeNeighboringPeaksParam}(
  object,
  param = MergeNeighboringPeaksParam(),
  msLevel = 1L,
  BPPARAM = bpparam()
)

\S4method{refineChromPeaks}{XCMSnExp,FilterIntensityParam}(
  object,
  param = FilterIntensityParam(),
  msLevel = 1L,
  BPPARAM = bpparam()
)
}
\arguments{
\item{object}{\link{XCMSnExp} or \link{XcmsExperiment} object with identified
chromatographic peaks.}

\item{param}{Object defining the refinement method and its settings.}

\item{...}{ignored.}

\item{msLevel}{\code{integer} defining for which MS level(s) the chromatographic
peaks should be cleaned.}

\item{chunkSize}{For \code{refineChromPeaks} if \code{object} is either an
\code{XcmsExperiment}: \code{integer(1)} defining the number of files (samples)
that should be loaded into memory and processed at the same time.
Peak refinement is then performed in parallel (per sample) on this subset
data. This setting thus allows to balance between memory
demand and speed (due to parallel processing). Because parallel
processing can only performed on the subset of data currently loaded
into memory in each iteration, the value for \code{chunkSize} should match
the defined  parallel setting setup. Using a parallel processing setup
using 4 CPUs (separate processes) but using \verb{chunkSize = }1\verb{will not perform any parallel processing, as only the data from one sample is loaded in memory at a time. On the other hand, setting}chunkSize` to
the total number of samples in an experiment will load the full MS data
into memory and will thus in most settings cause an out-of-memory error.}

\item{BPPARAM}{parameter object to set up parallel processing. Uses the
default parallel processing setup returned by \code{bpparam()}. See
\code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for details and examples.}

\item{maxPeakwidth}{For \code{CleanPeaksParam}: \code{numeric(1)} defining the maximal
allowed peak width (in retention time).}

\item{expandRt}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} defining by
how many seconds the retention time window is expanded on both sides to
check for overlapping peaks.}

\item{expandMz}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} constant
value by which the m/z range of each chromatographic peak is expanded
(on both sides!) to check for overlapping peaks.}

\item{ppm}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} defining a m/z
relative value (in parts per million) by which the m/z range of each
chromatographic peak is expanded (on each side) to check for overlapping
peaks.}

\item{minProp}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} between \code{0}
and \code{1} representing the proporion of intensity required for peaks to be
joined. See description for more details. With default (\code{minProp = 0.75})
only peaks are joined if the signal half way between them is larger than
75\% of the smallest of the two peak's \code{"maxo"} (maximal intensity at
peak apex).}

\item{threshold}{For \code{FilterIntensityParam}: \code{numeric(1)} defining the
threshold below which peaks are removed.}

\item{nValues}{For \code{FilterIntensityParam}: \code{integer(1)} defining the number
of data points (for each chromatographic peak) that have to be
\verb{>= threshold}. Defaults to \code{nValues = 1}.}

\item{value}{For \code{FilterIntensityParam}: \code{character(1)} defining the name
of the column in \code{chromPeaks} that contains the values to be used for
the filtering.}
}
\value{
\code{XCMSnExp} or \link{XcmsExperiment} object with the refined
chomatographic peaks.
}
\description{
The \code{refineChromPeaks} method performs a post-processing of the
chromatographic peak detection step to eventually clean and improve the
results. The function can be applied to a \code{\link[=XcmsExperiment]{XcmsExperiment()}} or \code{\link[=XCMSnExp]{XCMSnExp()}}
object \strong{after} peak detection with \code{\link[=findChromPeaks]{findChromPeaks()}}. The type of peak
refinement and cleaning can be defined, along with all its settings, using
one of the following parameter objects:
\itemize{
\item \code{CleanPeaksParam}: remove chromatographic peaks with a retention time
range larger than the provided maximal acceptable width (\code{maxPeakwidth}).
\item \code{FilterIntensityParam}: remove chromatographic peaks with intensities
below the specified threshold. By default (with \code{nValues = 1}) values in
the \code{chromPeaks} matrix are evaluated: all peaks with a value in the
column defined with parameter \code{value} that are \code{>=} a threshold (defined
with parameter \code{threshold}) are retained. If \code{nValues} is larger than 1,
the individual peak intensities from the raw MS files are evaluated:
chromatographic peaks with at least \code{nValues} mass peaks \verb{>= threshold}
are retained.
\item \code{MergeNeighboringPeaksParam}: peak detection sometimes fails to identify a
chromatographic peak correctly, especially for broad peaks and if the peak
shape is irregular (mostly for HILIC data). In such cases several smaller
peaks are reported. Also, peak detection with \emph{centWave} can result in
partially or completely overlapping peaks. This method aims to reduce
such peak detection artifacts by merging chromatographic peaks that are
overlapping or close in RT and m/z dimension (considering also the measured
signal between them). See section \emph{Details for MergeNeighboringPeaksParam}
for details and a comprehensive description of the approach.
}

\code{refineChromPeaks} methods will always remove feature definitions, because
a call to this method can change or remove identified chromatographic peaks,
which may be part of features.
}
\section{Details for MergeNeighboringPeaksParam}{


For peak refinement using the \code{MergeNeighboringPeaksParam}, chromatographic
peaks are first expanded in m/z and retention time dimension (based on
parameters \code{expandMz}, \code{ppm} and \code{expandRt}) and subsequently grouped into
sets of merge candidates if they are (after expansion) overlapping in both
m/z and rt (within the \strong{same} sample). Note that \strong{each} peak gets
expanded by \code{expandRt} and \code{expandMz}, thus peaks differing by less than
\code{2 * expandMz} (or \code{2 * expandRt}) will be evaluated for merging.
Peak merging is performed along the retention time axis, i.e., the peaks are
first ordered by their \code{"rtmin"} and merge candidates are defined iteratively
starting with the first peak.
Candidate peaks are merged if the
average intensity of the 3 data points in the middle position between them
(i.e., at half the distance between \code{"rtmax"} of the first and \code{"rtmin"} of
the second peak) is larger than a certain proportion (\code{minProp}) of the
smaller (\code{"maxo"}) intensity of both peaks. In cases in which this calculated
mid point is not located between the apexes of the two peaks (e.g., if the
peaks are largely overlapping) the average signal intensity at half way
between the apexes is used instead. Candidate peaks are not merged if all 3
data points between them have \code{NA} intensities.

Merged peaks get the \code{"mz"}, \code{"rt"}, \code{"sn"} and \code{"maxo"} values from the
peak with the largest signal (\code{"maxo"}) as well as its row in the metadata
of the peak (\code{chromPeakData}). The \code{"rtmin"} and \code{"rtmax"} of the merged
peaks are updated and \code{"into"} is recalculated based on all signal between
\code{"rtmin"} and \code{"rtmax"} and the newly defined \code{"mzmin"} and \code{"mzmax"} (which
is the range of \code{"mzmin"} and \code{"mzmax"} of the merged peaks after expanding
by \code{expandMz} and \code{ppm}). The reported \code{"mzmin"} and \code{"mzmax"} for the
merged peak represents the m/z range of all non-NA intensities used for the
calculation of the peak signal (\code{"into"}).
}

\examples{

## Load a test data set with detected peaks
library(xcms)
library(MsExperiment)
faahko_sub <- loadXcmsData("faahko_sub2")

## Disable parallel processing for this example
register(SerialParam())

####
## CleanPeaksParam:

## Distribution of chromatographic peak widths
quantile(chromPeaks(faahko_sub)[, "rtmax"] - chromPeaks(faahko_sub)[, "rtmin"])

## Remove all chromatographic peaks with a width larger 60 seconds
data <- refineChromPeaks(faahko_sub, param = CleanPeaksParam(60))

quantile(chromPeaks(data)[, "rtmax"] - chromPeaks(data)[, "rtmin"])

####
## FilterIntensityParam:

## Remove all peaks with a maximal intensity below 50000
res <- refineChromPeaks(faahko_sub,
    param = FilterIntensityParam(threshold = 50000))

nrow(chromPeaks(faahko_sub))
nrow(chromPeaks(res))

####
## MergeNeighboringPeaksParam:

## Subset to a single file
xd <- filterFile(faahko_sub, file = 1)

## Example of a split peak that will be merged
mzr <- 305.1 + c(-0.01, 0.01)
chr <- chromatogram(xd, mz = mzr, rt = c(2700, 3700))
plot(chr)

## Combine the peaks
res <- refineChromPeaks(xd, param = MergeNeighboringPeaksParam(expandRt = 4))
chr_res <- chromatogram(res, mz = mzr, rt = c(2700, 3700))
plot(chr_res)

## Example of a peak that was not merged, because the signal between them
## is lower than the cut-off minProp
mzr <- 496.2 + c(-0.01, 0.01)
chr <- chromatogram(xd, mz = mzr, rt = c(3200, 3500))
plot(chr)
chr_res <- chromatogram(res, mz = mzr, rt = c(3200, 3500))
plot(chr_res)
}
\author{
Johannes Rainer, Mar Garcia-Aloy
}
\concept{chromatographic peak refinement methods}
