% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare.R
\name{rankAgainstReference}
\alias{rankAgainstReference}
\title{Compare multiple methods and rank against reference accordingly}
\usage{
rankAgainstReference(
  input,
  reference,
  method = c("spearman", "pearson", "gsea"),
  geneSize = 150,
  cellLines = NULL,
  cellLineMean = "auto",
  rankByAscending = TRUE,
  rankPerCellLine = FALSE,
  threads = 1,
  chunkGiB = 1,
  verbose = FALSE
)
}
\arguments{
\item{input}{\code{Named numeric vector} of differentially expressed genes
whose names are gene identifiers and respective values are a statistic that
represents significance and magnitude of differentially expressed genes
(e.g. t-statistics); or \code{character} of gene symbols composing a gene
set that is tested for enrichment in reference data (only used if
\code{method} includes \code{gsea})}

\item{reference}{Data matrix or \code{character} object with file path to
CMap perturbations (see \code{\link{prepareCMapPerturbations}()}) or gene
expression and drug sensitivity association (see
\code{\link{loadExpressionDrugSensitivityAssociation}()})}

\item{method}{Character: comparison method (\code{spearman}, \code{pearson}
or \code{gsea}; multiple methods may be selected at once)}

\item{geneSize}{Numeric: number of top up-/down-regulated genes to use as
gene sets to test for enrichment in reference data; if a 2-length numeric
vector, the first index is the number of top up-regulated genes and the
second index is the number of down-regulated genes used to create gene
sets; only used if \code{method} includes \code{gsea} and if \code{input}
is not a gene set}

\item{cellLines}{Integer: number of unique cell lines}

\item{cellLineMean}{Boolean: add rows with the mean of \code{method} across
cell lines? If \code{cellLineMean = "auto"} (default), rows will be added
when data for more than one cell line is available.}

\item{rankByAscending}{Boolean: rank values based on their ascending
(\code{TRUE}) or descending (\code{FALSE}) order?}

\item{rankPerCellLine}{Boolean: rank results based on both individual cell
lines and mean scores across cell lines (\code{TRUE}) or based on mean
scores alone (\code{FALSE})? If \code{cellLineMean = FALSE}, individual
cell line conditions are always ranked.}

\item{threads}{Integer: number of parallel threads}

\item{chunkGiB}{Numeric: if second argument is a path to an HDF5 file
(\code{.h5} extension), that file is loaded and processed in chunks of a
given size in gibibytes (GiB); lower values decrease peak RAM usage (see
details below)}

\item{verbose}{Boolean: print additional details?}
}
\value{
Data table with correlation and/or GSEA score results
}
\description{
Compare multiple methods and rank against reference accordingly
}
\section{Process data by chunks}{

  If a file path to a valid HDF5 (\code{.h5}) file is provided instead of a
  data matrix, that file can be loaded and processed in chunks of size
  \code{chunkGiB}, resulting in decreased peak memory usage.

  The default value of 1 GiB (1 GiB = 1024^3 bytes) allows loading chunks of ~10000 columns and
  14000 rows (\code{10000 * 14000 * 8 bytes / 1024^3 = 1.04 GiB}).
}

\section{GSEA score}{

  When \code{method = "gsea"}, weighted connectivity scores (WTCS) are
  calculated (\url{https://clue.io/connectopedia/cmap_algorithms}).
}

\keyword{internal}
