% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/transform_counts.R
\name{compute_scale_factors}
\alias{compute_scale_factors}
\title{Compute count scaling factors}
\usage{
compute_scale_factors(
  x,
  by = c("auc", "mapped_reads"),
  targetSize = 4e+07,
  L = 100,
  auc = "recount_qc.bc_auc.all_reads_all_bases",
  avg_mapped_read_length = "recount_qc.star.average_mapped_length",
  mapped_reads = "recount_qc.star.all_mapped_reads",
  paired_end = is_paired_end(x, avg_mapped_read_length)
)
}
\arguments{
\item{x}{Either a
\link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class}
created by \code{create_rse()} or the sample metadata created by
\code{read_metadata()}.}

\item{by}{Either \code{auc} or \code{mapped_reads}. If set to \code{auc} it
will compute the scaling factor by the total coverage of the sample. That is,
the area under the curve (AUC) of the coverage. If set to \code{mapped_reads} it
will scale the counts by the number of mapped reads (in the QC annotation),
whether the library was paired-end or not, and the desired read length (\code{L}).}

\item{targetSize}{A \code{numeric(1)} specifying the target library size in number
of single end reads.}

\item{L}{A \code{integer(1)} specifying the target read length. It is only used
when \code{by = 'mapped_reads'} since it cancels out in the calculation when
using \code{by = 'auc'}.}

\item{auc}{A \code{character(1)} specifying the metadata column
name that contains the area under the coverage (AUC). Note that there are
several possible AUC columns provided in the sample metadata generated
by \code{create_rse()}.}

\item{avg_mapped_read_length}{A \code{character(1)} specifying the metdata column
name that contains the average fragment length after aligning. This is
typically twice the average read length for paired-end reads.}

\item{mapped_reads}{A \code{character(1)} specifying the metadata column
name that contains the number of mapped reads.}

\item{paired_end}{A \code{logical()} vector specifying whether each
sample is paired-end or not.}
}
\value{
A \code{numeric()} with the sample scale factors that are used by
\code{transform_counts()}.
}
\description{
This function computes the count scaling factors used by
\code{transform_counts()}. This function is similar to
\code{recount::scale_counts(factor_only = TRUE)}, but it is more general.
}
\examples{

## Download the metadata for SRP009615, a single-end study
SRP009615_meta <- read_metadata(
    metadata_files = file_retrieve(
        locate_url(
            "SRP009615",
            "data_sources/sra",
        )
    )
)

## Compute the scaling factors
compute_scale_factors(SRP009615_meta, by = "auc")
compute_scale_factors(SRP009615_meta, by = "mapped_reads")

## Download the metadata for DRP000499, a paired-end study
DRP000499_meta <- read_metadata(
    metadata_files = file_retrieve(
        locate_url(
            "DRP000499",
            "data_sources/sra",
        )
    )
)

## Compute the scaling factors
compute_scale_factors(DRP000499_meta, by = "auc")
compute_scale_factors(DRP000499_meta, by = "mapped_reads")

## You can compare the factors against those from recount::scale_counts()
## from the recount2 project which used a different RNA-seq aligner
## If needed, install recount, the R/Bioconductor package for recount2:
# BiocManager::install("recount")
recount2_factors <- recount::scale_counts(
    recount::rse_gene_SRP009615,
    by = "auc", factor_only = TRUE
)
recount3_factors <- compute_scale_factors(SRP009615_meta, by = "auc")
recount_factors <- data.frame(
    recount2 = recount2_factors[order(names(recount2_factors))],
    recount3 = recount3_factors[order(names(recount3_factors))]
)
plot(recount2 ~ recount3, data = recount_factors)
abline(a = 0, b = 1, col = "purple", lwd = 2, lty = 2)
}
\seealso{
Other count transformation functions: 
\code{\link{compute_read_counts}()},
\code{\link{is_paired_end}()},
\code{\link{transform_counts}()}
}
\concept{count transformation functions}
