% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MsExperiment-functions.R, R/MsExperiment.R
\docType{class}
\name{experimentFiles}
\alias{experimentFiles}
\alias{experimentFiles<-}
\alias{sampleData}
\alias{sampleData<-}
\alias{qdata}
\alias{qdata<-}
\alias{spectraSampleIndex}
\alias{MsExperiment}
\alias{MsExperiment-class}
\alias{show,MsExperiment-method}
\alias{length,MsExperiment-method}
\alias{spectra,MsExperiment-method}
\alias{spectra<-,MsExperiment-method}
\alias{otherData}
\alias{otherData<-}
\alias{linkSampleData}
\alias{[,MsExperiment,ANY,ANY,ANY-method}
\alias{filterSpectra,MsExperiment,function-method}
\title{Managing Mass Spectrometry Experiments}
\usage{
experimentFiles(object)

experimentFiles(object) <- value

sampleData(object)

sampleData(object) <- value

qdata(object)

qdata(object) <- value

spectraSampleIndex(x, duplicates = c("first", "keep"))

MsExperiment(
  experimentFiles = MsExperimentFiles(),
  otherData = List(),
  qdata = NULL,
  sampleData = DataFrame(),
  spectra = NULL
)

\S4method{show}{MsExperiment}(object)

\S4method{length}{MsExperiment}(x)

\S4method{spectra}{MsExperiment}(object)

\S4method{spectra}{MsExperiment}(object) <- value

otherData(object)

otherData(object) <- value

linkSampleData(
  object,
  with = character(),
  sampleIndex = seq_len(nrow(sampleData(object))),
  withIndex = integer(),
  subsetBy = 1L
)

\S4method{[}{MsExperiment,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE)

\S4method{filterSpectra}{MsExperiment,function}(object, filter, ...)
}
\arguments{
\item{object}{An instance of class \code{MsExperiment}.}

\item{value}{An object of the appropriate class for the slot to be
populated.}

\item{x}{an \code{MsExperiment}.}

\item{duplicates}{for \code{spectraSampleIndex()}: \code{character(1)} defining the
type of result returned by \code{spectraSampleIndex()}. With
\code{duplicates = "first"} an \code{integer} vector is returned with the first
match while \code{duplicates = "keep"} returns a \code{list} of \code{integer} with
the index of all matches.}

\item{experimentFiles}{\code{\link[=MsExperimentFiles]{MsExperimentFiles()}} defining (external) files
to data or annotation.}

\item{otherData}{\code{List} with arbitrary additional (\emph{other}) information or
data.}

\item{qdata}{\code{QFeatures} or \code{SummarizedExperiment} with the quantification
data.}

\item{sampleData}{\code{DataFrame} (or \code{data.frame}) with information on
individual samples of the experiment.}

\item{spectra}{\code{\link[Spectra:Spectra]{Spectra::Spectra()}} object with the MS spectra data of the
experiment.}

\item{with}{for \code{linkSampleData()}: \code{character(1)} defining the data to which
samples should be linked. See section \emph{Linking sample data to other
experimental data} for details.}

\item{sampleIndex}{for \code{linkSampleData()}: \code{integer} with the indices of the
samples in \code{sampleData(object)} that should be linked.}

\item{withIndex}{for \code{linkSampleData()}: \code{integer} with the indices of the
elements in \code{with} to which the samples (specified by \code{sampleIndex})
should be linked to.}

\item{subsetBy}{for \code{linkSampleData()}: optional \code{integer(1)} defining the
dimension on which the subsetting will occurr on the linked data.
Defaults to \code{subsetBy = 1L} thus subsetting will happen on the first
dimension (rows or elements).}

\item{i}{for \code{[}: an \code{integer}, \code{character} or \code{logical} referring to the
indices or names (rowname of \code{sampleData}) of the samples to subset.}

\item{j}{for \code{[}: not supported.}

\item{...}{optional additional parameters. For \code{filterSpectra()}: parameters
to be passed to the filter function (parameter \code{filter}).}

\item{drop}{for \code{[}: ignored.}

\item{filter}{for \code{filterSpectra()}: any filter function supported by
\code{\link[Spectra:Spectra]{Spectra::Spectra()}} to filter the spectra object (such as \code{filterRt} or
\code{filterMsLevel}). Parameters for the filter function can be passed
through \code{...}.}
}
\value{
See help of the individual functions.
}
\description{
The \code{MsExperiment} class allows the storage and management of all
aspects related to a complete proteomics or metabolomics mass
spectrometry experiment. This includes experimantal design (i.e. a table
with samples), raw mass spectromtry data as spectra and chromatograms,
quantitative features, and identification data or any other relevant data
files.

For details, see https://rformassspectrometry.github.io/MsExperiment

This package is part of the RforMassSpectrometry initiative:
https://www.rformassspectrometry.org/
}
\section{Slots}{

\describe{
\item{\code{experimentFiles}}{An instance of class \code{MsExperimentFiles} or \code{NULL}.}

\item{\code{spectra}}{An instance of class \code{Spectra} or \code{NULL}.}

\item{\code{qdata}}{An instance of class \code{QFeatures}, \code{SummarizedExperiment} or
\code{NULL}.}

\item{\code{otherData}}{A \code{List} to store any additional data objects.}

\item{\code{sampleData}}{A \code{DataFrame} documenting the experimental design.}

\item{\code{sampleDataLinks}}{A \code{List} with link definitions between samples and
data elements. Should not be directly accessed or modified by the user.}

\item{\code{metadata}}{A \code{list} to store additional metadata.}
}}

\section{General information}{


An experiment is typically composed of several items
\itemize{
\item Description and information (covariates etc) of each sample from
the experiment. These are stored in the \code{sampleData} slot as a
\code{DataFrame}, each row describing a sample with columns containing
all relevant information on that sample.
\item Files to data or annotations. These are stored in the
\verb{@experimentFiles} slot as an instance of class \code{MsExperimentFiles}.
\item General metadata about the experiment, stored as a \code{list} in the
\verb{@metadata} slot.
\item Mass spectrometry data. Sectra and their metadata are stored as
an \verb{[Spectra::Spectra()]} object in the \code{spectra} slot. Chromatographic
data is not yet supported but will be stored as a \code{Chromatograms()}
object in the \verb{@chromatorgrams} slot.
\item Quantification data is stored as \code{QFeatures} or
\code{SummarizedExperiment} objects in the \verb{@qdata} slot and can be accessed or
replaced with the \code{qdata()} or \verb{qdata<-} functions, respectively.
\item Any additional data, be it other spectra data, or proteomics
identification data (i.e peptide-spectrum matches defined as
\code{PSM} objects) can be added as elements to the list stored in
the \code{otherData} slot.
}

The \emph{length} of a \code{MsExperiment} is defined by the number of samples (i.e.
the number of rows of the object's \code{sampleData}). A \code{MsExperiment} with two
samples will thus have a length of two, independently of the number of files
or length of raw data in the object. This also defines the subsetting of the
object using the \code{[} function which will always subset by samples. See the
section for filtering and subsetting below for more information.

\code{MsExperiment} objects can be created using the \code{MsExperiment()} function
providing the data with the parameters listed below. If the
\code{\link[Spectra:Spectra]{Spectra::Spectra()}}
object provided with the \code{spectra} param uses a \code{MsBackendSql} backend,
sample data could be retrieved from the associated SQL database (see
section \emph{Using \code{MsExperiment} with \code{MsBackendSql}} in the vignette for
details). Alternatively, it is also possible to subsequently add data and
information to an existing \code{MsExperiment}.
Finally, with the \code{\link[=readMsExperiment]{readMsExperiment()}} function it is possible to create
a \code{MsExperiment} by importing MS spectra data directly from provided data
files. See examples below or the package vignette for more information.
}

\section{Accessing data}{


Data from an \code{MsExperiment} object can be accessed with the dedicated
accessor functions:
\itemize{
\item \code{experimentFiles()}, \verb{experimentFiles<-}: gets or sets experiment files.
\item \code{length()}: get the \emph{length} of the object which represents the number of
samples availble in the object's \code{sampleData}.
\item \code{metadata()}, \verb{metadata<-}: gets or sets the object's metadata.
\item \code{sampleData()}, \verb{sampleData<-}: gets or sets the object's sample data
(i.e. a \code{DataFrame} containing sample descriptions).
\item \code{spectra()}, \verb{spectra<-}: gets or sets spectra data. \code{spectra()} returns a
\code{\link[Spectra:Spectra]{Spectra::Spectra()}} object, \verb{spectra<-} takes a \code{Spectra} data as input
and returns the updated \code{MsExperiment}.
\item \code{spectraSampleIndex()}: depending on parameter \code{duplicates} it returns
either an \code{integer} (\code{duplicates = "first"}, the default) or a \code{list}
(\code{duplicates = "keep"}) of length equal to the number of spectra within
the object with the indices of the sample(s) (in \code{sampleData()}) a
spectrum is assigned to. With \code{duplicates = "first"}, an \code{integer} with
the index is returned for each spectrum. If a spectrum was assigned to
more than one sample a warning is shown and only the first sample index
is returned for that spectrum. For \code{duplicates = "keep"}, assignments are
returned as a \code{list} of \code{integer} vectors, each element being the
index(es) of the sample(s) a spectrum is assigned to. For spectra that are
not linked to any sample an \code{NA_integer_} is returned as index for
\code{duplicates = "first"} and an empty integer (\code{integer()}) for
\code{duplicates = "keep"}.
Note that the default \code{duplicates = "first"} will work in almost all use
cases, as generally, a spectrum will be assigned to a single sample.
\item \code{qdata()}, \verb{qdata<-}: gets or sets the quantification data, which can be a
\code{QFeatures} or \code{SummarizedExperiment}.
\item \code{otherData()} , \verb{otherData<-}: gets or sets the addition data
types, stored as a \code{List} in the object's \code{otherData} slot.
}
}

\section{Linking sample data to other experimental data}{


To start with, an \code{MsExperiment} is just a loose collection of files and data
related to an experiment, no explicit links or associactions are present
between the samples and related data. Such links can however be created with
the \code{linkSampleData()} function. This function can establish links between
individual (or all) samples within the object's \code{sampleData} to individual,
or multiple, data elements or files, such as \code{Spectra} or raw data files.

The presence of such links enables a (consistent) subsetting of an
\code{MsExperiment} by samples. Thus, once the link is defined, any subsetting by
sample will also correctly subset the linked data. All other, not linked,
data elements are always retained as in the original \code{MsExperiment}.

To be able to link different elements within an \code{MsExperiment} it is also
required to \emph{identify} them with a consistent naming scheme. The naming
scheme of slots and data elements within follows an SQL-like scheme, in which
the variable (element) is identified by the name of the database table,
followed by a \code{"."} and the name of the database table column. For
\code{MsExperiment}, the naming scheme is defined as
\code{"<slot name>.<element name>"}. A column called \code{"sample_name"} within the
\code{sampleData} data frame can thus be addressed with
\code{"sampleData.sample_name"}, while \code{spectra.msLevel} would represent the
spectra variable called \code{msLevel} within the \code{Spectra} stored in the
\code{spectra} slot.

Links between sample data rows and any other data element are stored as
\code{integer} matrices within the \verb{@sampleDataLinks} slot of the object (see also
the vignette for examples and illustrations). The first column of a matrix
is always the index of the sample, and the second column the index of the
element that is linked to that sample, with one row per element.
Links can be defined/added with the \code{linkSampleData()} function which adds
a relationship between rows in \code{sampleData} to elements in any other data
within the \code{MsExperiment} that are specified with parameter \code{with}.
\code{linkSampleData()} supports two different ways to define the link:
\itemize{
\item Parameter \code{with} defines the data to which the link should be established.
To link samples to raw data files that would for example be available as a
\code{character} in an element called \code{"raw_files"} within the object's
\code{experimentFiles}, \code{with = experimentFiles.raw_files} would have to be
used. Next it is required to specify which samples should be linked with
which elements in \code{with}. This needs to be defined with the parameters
\code{sampleIndex} and \code{withIndex}, both are expected to be \code{integer} vectors
specifying which sample in \code{sampleData} should be linked to which element
in \code{with} (see examples below or vignette for examples and details).
\item As an alternative way, a link could be defined with an SQL-like syntax
that relates a column in \code{sampleData} to a column/element in the data to
which the link should be established. To link for example individual
spectra to the corresponding samples
\code{with = "sampleData.raw_file = spectra.dataOrigin"} could be used assuming
that \code{sampleData} contains a column named \code{"raw_file"} with the (full path)
of the raw data file for each sample from which the spectra were imported.
In this case both \code{sampleIndex} and \code{withIndex} can be omitted, but it is
expected/required that the columns/elements from \code{sampleData} and the data
element to which the link should be established contain matching values.
}

Note that \code{linkSampleData} will \strong{replace} a previously existing link to the
same data element.
\itemize{
\item \code{spectraSampleIndex()} is a convenience function that extracts for each
spectrum in the object's \code{spectra()} the index of the sample it is
associated with (see function's help above for more information).
}
}

\section{Subsetting and filtering}{

\itemize{
\item \code{[}: \code{MsExperiment} objects can be subset \strong{by samples} with \verb{[i]}
where \code{i} is the index or a logical defining to which samples the data
should be subset. Subsetting by sample will (correctly) subset all
linked data to the respective samples. If multiple samples are linked to
the same data element, subsetting might duplicate that data element. This
duplication of \emph{n:m} relationships between samples to elements does however
not affect data consistency (see examples below for more information).
Not linked data (slots) will be returned as they are. Subsetting in
arbitrary order is supported.
See the vignette for details and examples.
\item \code{filterSpectra()}: subsets the \code{Spectra} within an \code{MsExperiment} using a
provided filter function (parameter \code{filter}). Parameters for the filter
function can be passed with parameter \code{...}. Any of the filter functions
of a \code{\link[Spectra:Spectra]{Spectra::Spectra()}} object can be passed with parameter \code{filter}.
Possibly present relationships between samples and spectra (\emph{links}, see
also \code{linkSampleData()}) are updated. Filtering affects only the spectra
data of the object, none of the other slots and data (e.g. \code{sampleData})
are modified.
The function returns an \code{MsExperiment} with the filtered \code{Spectra} object.
}
}

\examples{

## An empty MsExperiment object
msexp <- MsExperiment()
msexp

example(MsExperimentFiles)
experimentFiles(msexp) <- fls
msexp

## Linking samples to data elements

## Create a small experiment
library(S4Vectors)
mse <- MsExperiment()
sd <- DataFrame(sample_id = c("QC1", "QC2"),
                sample_name = c("QC Pool", "QC Pool"),
                injection_idx = c(1, 3))
sampleData(mse) <- sd

## define file names containing spectra data for the samples and
## add them, along with other arbitrary files to the experiment
fls <- dir(system.file("sciex", package = "msdata"), full.names = TRUE)
experimentFiles(mse) <- MsExperimentFiles(
    mzML_files = fls,
    annotations = "internal_standards.txt")

## Link samples to data files: first sample to first file in "mzML_files",
## second sample to second file in "mzML_files"
mse <- linkSampleData(mse, with = "experimentFiles.mzML_files",
    sampleIndex = c(1, 2), withIndex = c(1, 2))

## Link all samples to the one file in "annotations"
mse <- linkSampleData(mse, with = "experimentFiles.annotations",
    sampleIndex = c(1, 2), withIndex = c(1, 1))
mse

## Import the spectra data and add it to the experiment
library(Spectra)
spectra(mse) <- Spectra(fls, backend = MsBackendMzR())

## Link each spectrum to the respective sample. We use the alternative
## link definition that does not require sampleIndex and withIndex but
## links elements based on matching values in the specified data elements.
## We need to add the full file name as an additional column to sampleData
## in order to allow matching this file names with the value in
## spectra(mse)$dataOrigin which contains the original file names from which
## the spectra were imported.
sampleData(mse)$raw_file <- normalizePath(fls)

## The links can be added using the short notation below
mse <- linkSampleData(mse, with = "sampleData.raw_file = spectra.dataOrigin")
mse

## With sampleData links present, any subsetting of the experiment by sample
## will ensure that all linked elements are subset accordingly
b <- mse[2]
b
sampleData(b)
experimentFiles(b)$mzML_files

## The `spectraSampleIndex()` function returns, for each spectrum, the
## index in the object's `sampleData` to which it is linked/assigned
spectraSampleIndex(mse)

## Subsetting with duplication of n:m sample to data relationships
##
## Both samples were assigned above to one "annotation" file in
## `experimentFiles`:
experimentFiles(mse[1])[["annotations"]]
experimentFiles(mse[2])[["annotations"]]

## Subsetting will always keep the relationship between samples and linked
## data elements. Subsetting will however possibly duplicate data elements
## that are shared among samples. Thus, while in the original object the
## element "annotations" has a single entry, subsetting with [1:2] will
## result in an MsExperiment with duplicated entries in "annotations"
experimentFiles(mse)[["annotations"]]
experimentFiles(mse[1:2])[["annotations"]]

## Spectra within an MsExperiment can be filtered/subset with the
## `filterSpectra` function and any of the filter functions supported
## by `Spectra` objects. Below we restrict the spectra data to spectra
## with a retention time between 200 and 210 seconds.
res <- filterSpectra(mse, filterRt, rt = c(200, 210))
res

## The object contains now much less spectra. The retention times for these
rtime(spectra(res))

## Relationship between samples and spectra was preserved by the filtering
a <- res[1L]
spectra(a)
}
\author{
Laurent Gatto, Johannes Rainer
}
