## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, fig.align = "center") ## ----packages, message=FALSE, warning=FALSE----------------------------------- # Packages library("scran") library("ggplot2") library("Coralysis") library("SingleCellExperiment") library("MouseGastrulationData") ## ----data, message=FALSE, warning=FALSE--------------------------------------- # Import data sce <- EmbryoAtlasData(samples = c(1, 3)) ## ----Seurat normalisation, echo=TRUE, message=FALSE, warning=FALSE------------ ## Normalize the data # log1p normalization SeuratNormalisation <- function(object) { # 'SeuratNormalisation()' function applies the basic Seurat normalization to # a SingleCellExperiment object with a 'counts' assay. Normalized data # is saved in the 'logcounts' assay. logcounts(object) <- apply(counts(object), 2, function(x) { log1p(x / sum(x) * 10000) }) # log1p normalization w/ 10K scaling factor logcounts(object) <- as(logcounts(object), "sparseMatrix") return(object) } sce <- SeuratNormalisation(object = sce) # Remove 'counts' to reduce data size object counts(sce) <- NULL ## ----hvg---------------------------------------------------------------------- # Feature selection with 'scran' package nhvg <- 500 batch.label <- "pool" sce[[batch.label]] <- factor(sce[[batch.label]]) m.hvg <- scran::modelGeneVar(sce, block = sce[[batch.label]]) top.hvg <- getTopHVGs(m.hvg, n = nhvg) # Subset object sce <- sce[top.hvg, ] # Rename genes: 'Ensembl ID' to SYMBOL row.names(sce) <- rowData(sce)$SYMBOL rowData(sce) <- NULL ## ----dimred: pre-integration, fig.width=8.5, fig.height=7.25------------------ # Compute PCA & TSNE set.seed(123) sce <- RunPCA( object = sce, assay.name = "logcounts", p = 30, dimred.name = "unintPCA" ) set.seed(123) sce <- RunTSNE(sce, dimred.type = "unintPCA", dimred.name = "unintTSNE" ) # Plot TSNE highlighting the batch & cell type unint.batch.plot <- PlotDimRed( object = sce, color.by = "pool", dimred = "unintTSNE", point.size = 0.01, legend.nrow = 1, seed.color = 1024 ) unint.cell.plot <- PlotDimRed( object = sce, color.by = "celltype", dimred = "unintTSNE", point.size = 0.01, legend.nrow = 14, seed.color = 7 ) cowplot::plot_grid(unint.batch.plot, unint.cell.plot, ncol = 2, align = "vh") ## ----multi-level integration, message=TRUE, warning=FALSE--------------------- # Prepare data for integration: # remove non-expressing genes & logcounts is from `dgCMatrix` class sce <- PrepareData(object = sce) # Perform integration with Coralysis set.seed(1024) sce <- RunParallelDivisiveICP( object = sce, batch.label = "pool", build.train.set = FALSE, L = 10, threads = 2 ) ## ----dimred: post-integration, fig.width=8.5, fig.height=7.25----------------- # Compute PCA with joint cluster probabilities & TSNE set.seed(123) sce <- RunPCA(sce, assay.name = "joint.probability", dimred.name = "intPCA" ) set.seed(123) sce <- RunTSNE(sce, dimred.type = "intPCA", dimred.name = "intTSNE" ) # Plot TSNE highlighting the batch & cell type int.batch.plot <- PlotDimRed( object = sce, color.by = "pool", dimred = "intTSNE", point.size = 0.01, legend.nrow = 1, seed.color = 1024 ) int.cell.plot <- PlotDimRed( object = sce, color.by = "celltype", dimred = "intTSNE", point.size = 0.01, legend.nrow = 14, seed.color = 7 ) cowplot::plot_grid(int.batch.plot, int.cell.plot, ncol = 2, align = "vh" ) ## ----clustering, fig.width=11.5, fig.height=7.25------------------------------ # Graph-based clustering on the integrated PCA w/ 'scran' package blusparams <- bluster::SNNGraphParam(k = 15, cluster.fun = "louvain") set.seed(123) sce$cluster <- scran::clusterCells(sce, use.dimred = "intPCA", BLUSPARAM = blusparams ) # Plot clustering clt.plot <- PlotDimRed( object = sce, color.by = "cluster", dimred = "intTSNE", point.size = 0.01, legend.nrow = 3, seed.color = 65 ) cowplot::plot_grid(int.batch.plot, int.cell.plot, clt.plot, ncol = 3, align = "h" ) ## ----cluster markers, fig.width=5, fig.height=5------------------------------- # Cluster markers cluster.markers <- FindAllClusterMarkers(object = sce, clustering.label = "cluster") # Select the top 3 positive markers per cluster top3.markers <- lapply(X = split(x = cluster.markers, f = cluster.markers$cluster), FUN = function(x) { head(x[order(x$log2FC, decreasing = TRUE), ], n = 3) }) top3.markers <- do.call(rbind, top3.markers) top3.markers <- top3.markers[order(as.numeric(top3.markers$cluster)), ] # Heatmap of the top 3 positive markers per cluster HeatmapFeatures( object = sce, clustering.label = "cluster", features = top3.markers$marker, seed.color = 65 ) ## ----dge, fig.width=9, fig.height=4.5----------------------------------------- # DGE analysis: cluster 3 vs 6 dge.clt3vs6 <- FindClusterMarkers(sce, clustering.label = "cluster", clusters.1 = "3", clusters.2 = "6" ) head(dge.clt3vs6[order(abs(dge.clt3vs6$log2FC), decreasing = TRUE), ]) top6.degs <- head(dge.clt3vs6[order(abs(dge.clt3vs6$log2FC), decreasing = TRUE ), "marker"]) exp.plots <- lapply(X = top6.degs, FUN = function(x) { PlotExpression( object = sce, color.by = x, scale.values = TRUE, point.size = 0.5, point.stroke = 0.5 ) }) cowplot::plot_grid(plotlist = exp.plots, align = "vh", ncol = 3) ## ----rsession----------------------------------------------------------------- # R session sessionInfo()