% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gene_selection.R
\name{gene_peakedness_spread_selection}
\alias{gene_peakedness_spread_selection}
\title{Gene Peakedness Spread Selection}
\usage{
gene_peakedness_spread_selection(
  sce,
  gene_peakedness_df,
  genes_per_bin = 10,
  n_gene_bins = 10,
  pseudotime_slot = "slingPseudotime_1"
)
}
\arguments{
\item{sce}{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment::SingleCellExperiment} to
obtain pseudotime values from}

\item{gene_peakedness_df}{Gene peakedness DF generated by
\code{\link[=calculate_gene_peakedness]{calculate_gene_peakedness()}}}

\item{genes_per_bin}{Integer. Number of genes to select per gene bin.}

\item{n_gene_bins}{Integer. Number of gene bins to create over pseudotime.
We recommend around 1-2x the number of pseudotime bins you want to use.}

\item{pseudotime_slot}{String. The name of the pseudotime column in the SCE
metadata.}
}
\value{
A list of gene IDs with the highest ratios across regions of
pseudotime.
}
\description{
This function selects genes with peaks evenly distributed from a
pseudotime trajectory. It does this by splitting pseudotime into evenly
spread regions of pseudotime, and then selecting
genes with the highest peakedness ratio with a peak inside that region
of pseudotime. The number of regions and genes per region can be tuned.
}
\examples{
ncells <- 70
ngenes <- 100
# Each gene should have mean around its gene number
counts <- c()
for (i in seq_len(ngenes)) {
    counts <- c(counts, dnorm(seq_len(ncells), mean = (ncells / i), sd = 1))
}

counts_matrix <- matrix(
    counts,
    ncol = ncells,
    nrow = ngenes
)
sce <- SingleCellExperiment::SingleCellExperiment(assays = list(
    counts = counts_matrix * 3,
    normcounts = counts_matrix,
    logcounts = log(counts_matrix)
))
colnames(sce) <- paste0("cell", seq_len(ncells))
rownames(sce) <- paste0("gene", seq_len(ngenes))
sce$cell_type <- c(
    rep("celltype_1", ncells / 2),
    rep("celltype_2", ncells / 2)
)

sce$pseudotime <- seq_len(ncells) - 1
genelist <- rownames(sce)

# calculate_gene_peakedness
gene_peakedness <- calculate_gene_peakedness(
    sce,
    pseudotime_slot = "pseudotime"
)

head(gene_peakedness)

# plot_gene_peakedness
plot_gene_peakedness(sce, gene_peakedness, "gene20",
    pseudotime_slot = "pseudotime"
)

# smooth_gene
smoothed_gene20 <- smooth_gene(
    sce, "gene20",
    pseudotime_slot = "pseudotime"
)
head(smoothed_gene20)

# Select best spread of genes
genes_to_use <- gene_peakedness_spread_selection(sce, gene_peakedness,
    genes_per_bin = 2, n_gene_bins = 1, pseudotime_slot = "pseudotime"
)

print(genes_to_use)
plot(
    x = gene_peakedness[
        gene_peakedness$gene \%in\% genes_to_use, "peak_pseudotime"
    ],
    y = gene_peakedness[gene_peakedness$gene \%in\% genes_to_use, "ratio"]
)

}
\concept{gene-selection}
