% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/colonHealthy.R
\name{colonHealthy}
\alias{colonHealthy}
\title{Single-cell analysis of samples from healthy human colon}
\format{
\code{MultiAssayExperiment} obtained from an \code{ArchR} project. Annotated with the Hg38 genome build.
Contains the following experiments:
\itemize{
\item{\strong{TileMatrix}: SingleCellExperiment with 6062095 rows and 59231 columns}
\item{\strong{GeneIntegrationMatrix}: SingleCellExperiment with 19020 rows and 59231 columns}
\item{\strong{GeneScoreMatrix}: SingleCellExperiment with 24919 rows and 59231 columns}
\item{\strong{MotifMatrix}: SingleCellExperiment with 870 rows and 59231 columns}
\item{\strong{PeakMatrix}: SingleCellExperiment with 406946 rows and 59231 columns}
}
}
\usage{
colonHealthy(
  metadata = FALSE,
  experiments = c("TileMatrix", "GeneIntegrationMatrix", "GeneScoreMatrix",
    "MotifMatrix", "PeakMatrix")
)
}
\arguments{
\item{metadata}{logical flag specifying whether to return data or metadata only}

\item{experiments}{character vector of matrices to return; see \code{Format}}
}
\value{
\code{MultiAssayExperiment} made up of \code{SingleCellExperiment}s
with assays stored as \code{DelayedMatrix} objects.
If \code{metadata = TRUE}, an \code{ExperimentHub} object listing this data set's metadata.
}
\description{
ATACseq and RNAseq data obtained by the colon tissues analysis. Samples were
collected from adult human donors.
}
\section{Data preparation}{


scATAC data was downloaded from Gene Expression Omnibus
(acc. no. \href{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165659}{GSE165659})
and analyzed with SingleCell ATAC - 10X pipeline v2.0.0
scRNAseq data in form of Seurat objects was downloaded from  https://drive.google.com/drive/folders/12j9ufV1L0uWbUlab-VoXRznDLKDO7PQ. In case
of future change in the data storage location, it will be updated in the readme file in project's Github repository (https://github.com/winstonbecker/scCRC_continuum)

Downstream analysis was performed with the \code{ArchR} package v. 1.0.2:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{library(ArchR)
library(parallel)

catlas_files <- <FRAGMENT_FILES>
outputDir <- <OUTPUT_DIRECTORY>
arrow_files <- createArrowFiles(inputFiles = catlas_files, sampleNames = <SAMPLE_NAMES>)
doubScores <- addDoubletScores(input = arrow_files)

# cerate ArchR project
project <- ArchRProject(arrow_files, outputDirectory = outputDir)

# filtering out doublet cells
project <- filterDoublets(project)

# add Iterative Latent Semantic Indexing reduced-dimensionality space
project <- addIterativeLSI(ArchRProj = project, useMatrix = "TileMatrix", 
                           name = "IterativeLSI", clusterParams = list(resolution = c(0.2), 
                                                    sampleCells = 10000, n.start = 10))

# batch correction
project  <- addHarmony( ArchRProj = project, reducedDims = "IterativeLSI",
    name = "Harmony", groupBy = "Sample")

project <- addClusters( input = project, reducedDims = "IterativeLSI",
    method = "Seurat", name = "Clusters", resolution = 0.8)

# add clusters after Harmony batch correction
project <- addClusters(input = project, reducedDims = "Harmony",
    method = "Seurat", name = "Clusters_Harmony", resolution = 0.8)

# add UMAP embedding 
project <- addUMAP(ArchRProj = project, reducedDims = "IterativeLSI",
    nNeighbors = 30, minDist = 0.5, name ="UMAP_LSI")

project <- addUMAP(ArchRProj = project, reducedDims = "IterativeLSI",
    nNeighbors = 30, minDist = 0.5, name ="UMAP_Harmony")

# add column with log base 10 of the fragment numbers
project <- addCellColData(ArchRProj = project, data = log10(project$nFrags),
                                  name = "log10_nFrags", cells = project$cellNames)




# upload gene expression data
# use files downloaded from 
# https://drive.google.com/drive/folders/12j9ufV1L0uWbUlab-VoXRznDLKDO7PQ_?usp=sharing

data_files <- c("Final_scHTAN_colon_normal_epithelial_220213.rds",
                "Final_scHTAN_colon_immune_220213.rds",
                "Final_scHTAN_colon_stromal_220213.rds")

# define object names
RNAseq_se_names <- gsub(".*scHTAN_|_220213.rds", "", data_files)

# create objects as instances of SingleCellExperiment class
for (i in seq_along(RNAseq_se_names)) assign(RNAseq_se_names[i], Seurat::as.SingleCellExperiment(readRDS(data_files[i])))

# add column with cell types and disease state
for (obj in RNAseq_se_names)\{
    eval(parse(text = paste0("colData(", obj, ")$CellType <-", obj,"@colData@listData$CellType")))
    eval(parse(text = paste0("colData(", obj, ")$DiseaseState <-", obj,"@colData@listData$DiseaseState")))
\}

# uniformize colData columns before merging
shared_cols <- purrr::map(list(colon_immune, colon_stromal, colon_normal_epithelial), colData) \%>\%
    purrr::map(colnames) \%>\%
    purrr::reduce(intersect)

# remove reducedDims since their column names differ across objects
for (obj in RNAseq_se_names)\{
    eval(parse(text = paste0(obj, "@colData <- ", obj, "@colData[,colnames(", obj, "@colData) \%in\% shared_cols]")))
    eval(parse(text = paste0("SingleCellExperiment::reducedDim(", obj, ") <- NULL")))
    eval(parse(text = paste0(obj, "@int_colData@listData <- list()")))
\}

# merge RNAseq data objects

colon_RNAseq <- cbind(colon_immune, colon_normal_epithelial, colon_stromal)


RNA_se <- SummarizedExperiment(assay = list(counts = as(assay(colon_RNAseq, "counts"), "dgCMatrix")),
                               colData = colData(colon_RNAseq), rowData = rowData(colon_RNAseq))


# select samples from healthy donors (no cancer)
RNA_se <- RNA_se[,colData(RNA_se)$DiseaseState == "Normal"]

# RNA integration
project <- addGeneIntegrationMatrix(
    ArchRProj = project,
    useMatrix = "GeneScoreMatrix",
    reducedDims = "IterativeLSI",
    seRNA = RNA_se,
    addToArrow = TRUE,
    groupRNA = "CellType",
    nameCell = "predicted_cell_un",
    nameGroup = "predicted_group_un",
    nameScore = "predicted_score_un")


project <- addGroupCoverages(ArchRProj = project, groupBy = "predicted_group_un")

# add pseudo-bulk replicates
## requires MACS2 installation

project <- addReproduciblePeakSet( ArchRProj = project,
    groupBy = "predicted_group_un", pathToMacs2 = <PATH_TO_MACS2>)


# LSI reduced dimensionality based on the GeneIntegrationMatrix

project <- addIterativeLSI(ArchRProj = project, clusterParams = list(resolution = 0.2,
        sampleCells = 1000, n.start = 10), saveIterations = FALSE,
    useMatrix = "GeneIntegrationMatrix", varFeatures = 2500,
    firstSelection = "variable", binarize = FALSE, name = "LSI_RNA")

# add clusters based on the new reduced-dimensionality space
project <- addClusters( input = project, reducedDims = "LSI_RNA",
    method = "Seurat", name = "Clusters_RNA", resolution = 0.8)

# add UMAP embedding
project <- addUMAP(ArchRProj = project, reducedDims = "LSI_RNA",
    nNeighbors = 30, minDist = 0.5, name ="UMAP_LSI_RNA", metric = "cosine",
    force = TRUE)

# batch correction 
project  <- addHarmony( ArchRProj = project, reducedDims = "LSI_RNA",
    name = "Harmony_RNA", groupBy = "Sample")

# UMAP embedding after batch correction
project <- addUMAP(ArchRProj = project, reducedDims = "Harmony_RNA",
    nNeighbors = 30, minDist = 0.5, name ="UMAP_LSI_RNA_Harmony", metric = "cosine",
    force = TRUE
)

# find clutsters after batch correction
project <- addClusters( input = project, reducedDims = "Harmony_RNA",
    method = "Seurat", name = "Clusters_RNA_Harmony", resolution = 0.8)


# combine reduced-dimensionality spaces produced from ATACseq and RNAseq data
project <- addCombinedDims(project, reducedDims = c("IterativeLSI", "LSI_RNA"),
                            name =  "LSI_Combined")

# add UMAP embedding
project <- addUMAP(ArchRProj = project, name = "UMAP_combined", reducedDims = "LSI_Combined",
        nNeighbors = 30, minDist = 0.5, metric = "cosine")


# find clusters in combined reduced space
project <- addClusters(input = project, reducedDims = "LSI_Combined",
                        method = "Seurat", name = "Clusters_combined",
                        resolution = 0.4)

# add information about sequence motifs recognized by known transcriptions factors
project <- addMotifAnnotations(ArchRProj = project,
                                       motifSet = "cisbp", name = "Motif")


# add background peaks to be compared against during peak variation assessement
project <- addBgdPeaks(project)

# calculate per-cell devations of motif annotations
project <- addDeviationsMatrix(project, peakAnnotation = "Motif")

# save project
saveArchRProject(project, outputDir)

# convert project into MultiAssayExperiment object 
MAE <- maw.archr::create.mae.with.multiple.sces.from.archr(outputDir, tile.sizes = 500)

saveRDS(MAE, <OUTPUT_PATH>)

}\if{html}{\out{</div>}}
}

\section{Data storage and access}{

The \code{MultiAssayExperiments} is split into separate \code{SingleCellExperiment}
objects and they in turn are split into components, all of which are stored in a
single hdf5 file. Data and can be accessed with a special function that extracts
elements of the requested experiment(s), reassembles them, and builds an MAE.
}

\examples{
# check metada of dataset
colonHealthy(metadata = TRUE)
# download data
\dontrun{
colonHealthy()
}

}
\references{
\enumerate{
\item Zhang K, Hocker JD, Miller M, Hou X, Chiou J, Poirion OB, Qiu Y, Li YE,
Gaulton KJ, Wang A, Preissl S, Ren B. A single-cell atlas of
chromatin accessibility in the human genome.
Cell. 2021 Nov 24;184(24):5985-6001.e19. doi: 10.1016/j.cell.2021.10.024.
Epub 2021 Nov 12. PMID: 34774128; PMCID: PMC8664161.
\item Becker, W.R., Nevins, S.A., Chen, D.C. et al. Single-cell analyses
define a continuum of cell state and composition changes in the malignant
transformation of polyps to colorectal cancer. Nat Genet 54, 985–995 (2022).
https://doi.org/10.1038/s41588-022-01088-x
}
}
