## ----setup, include = FALSE, fig.show='hide'---------------------------------- knitr::knit_hooks$set(pngquant = knitr::hook_pngquant) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "ragg_png", dpi = 72, fig.retina = 2, fig.align = "center", out.width = "100%", pngquant = "--speed=1 --quality=5-10" ) ## ----message=FALSE, fig.show='hide'------------------------------------------- # Load library library(scDiagnostics) # Load datasets data("reference_data") data("query_data") data("qc_data") # Set seed for reproducibility set.seed(0) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate the MDS scatter plot with cell type coloring plotCellTypeMDS(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate the MDS scatter plot with cell type coloring plotCellTypePCA(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:3) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot the PCA data as boxplots boxplotPCA(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:3) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Compute important variables for all pairwise cell comparisons disc_output <- calculateDiscriminantSpace( reference_data = reference_data, query_data = query_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation") # Generate scatter and boxplot plot(disc_output, plot_type = "scatterplot") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Perform discriminant analysis and projection discriminant_results <- calculateDiscriminantSpace( reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", calculate_metrics = TRUE, # Compute Mahalanobis distance and cosine similarity alpha = 0.01 # Significance level for Mahalanobis distance cutoff ) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Extract Mahalanobis distances mahalanobis_distances <- discriminant_results$`CD4-CD8`$query_mahalanobis_dist # Identify anomalies based on a threshold threshold <- discriminant_results$`CD4-CD8`$mahalanobis_crit # Use the computed cutoff value anomalies <- mahalanobis_distances[mahalanobis_distances > threshold] ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Load ggplot2 for visualization library(ggplot2) # Convert distances to a data frame distances_df <- data.frame(Distance = mahalanobis_distances, Anomaly = ifelse(mahalanobis_distances > threshold, "Anomaly", "Normal")) # Plot histogram of Mahalanobis distances ggplot(distances_df, aes(x = Distance, fill = Anomaly)) + geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.7) + labs(title = "Histogram of Mahalanobis Distances", x = "Mahalanobis Distance", y = "Frequency") + theme_bw() ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Calculate the SIR space projections sir_output <- calculateSIRSpace( reference_data = reference_data, query_data = query_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", multiple_cond_means = TRUE ) # Plot the SIR space projections plot(sir_output, plot_type = "boxplot", sir_subset = 1:3) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot the top contributing markers plot(sir_output, plot_type = "varplot", sir_subset = 1:3, n_top_vars = 10) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Visualize VPREB3 gene on a PCA plot plotGeneExpressionDimred(se_object = query_data, method = "PCA", pc_subset = 1:3, feature = "VPREB3") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ plotMarkerExpression( reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", gene_name = "VPREB3", cell_type = "B_and_plasma" ) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate scatter plot p1 <- plotQCvsAnnotation(se_object = qc_data, cell_type_col = "SingleR_annotation", qc_col = "total", score_col = "annotation_scores") p1 + ggplot2::xlab("Library Size") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate histograms histQCvsAnnotation(se_object = query_data, cell_type_col = "SingleR_annotation", qc_col = "percent_mito", score_col = "annotation_scores") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot gene set scores on PCA plotGeneSetScores(se_object = query_data, method = "PCA", score_col = "gene_set_scores", pc_subset = 1:3) ## ----SessionInfo, echo=FALSE, message=FALSE, warning=FALSE, comment=NA, fig.show='hide'---- options(width = 80) sessionInfo()