## ----eval=FALSE--------------------------------------------------------------- # # Install from github # library(devtools) # install_github("brennanhilton/RNAseqCovarImpute") # # # Install from Bioconductor (not yet on Bioconductor) # # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("RNAseqCovarImpute") ## ----------------------------------------------------------------------------- library(RNAseqCovarImpute) library(dplyr) library(BiocParallel) library(PCAtools) library(limma) library(mice) data(example_data) data(example_DGE) ## ----------------------------------------------------------------------------- # We use voom to convert counts to logCPM values, adding 0.5 to all the counts to avoid taking the logarithm of zero, and normalized for library size. start.pca = Sys.time() # To calculate runtime pca_data = limma::voom(example_DGE)$E # Conduct pca p = PCAtools::pca(pca_data) # Determine the number of PCs that account for >80% explained variation which(cumsum(p$variance) > 80)[1] # Extract the PCs and append to our data pcs = p$rotated[,1:78] example_data = cbind(example_data, pcs) ## ----------------------------------------------------------------------------- # Conduct mice. In practice, m should be much larger (e.g., 10-100) imp = mice::mice(example_data, m=3) ## ----------------------------------------------------------------------------- mi_pca_res = RNAseqCovarImpute::limmavoom_imputed_data_pca(imp = imp, DGE = example_DGE, voom_formula = "~x + y + z + a + b", BPPARAM = SerialParam()) # Display the results for the first 5 genes for the x variable in the model. mi_pca_res[1:5, grep("^x", colnames(mi_pca_res))] ## ----------------------------------------------------------------------------- mi_pca_res_x = mi_pca_res %>% arrange(x_p) %>% mutate(x_p_adj = p.adjust(x_p, method = "fdr")) %>% dplyr::select(probe, x_coef, x_p, x_p_adj) end.pca = Sys.time() # To calculate runtime time.pca = end.pca - start.pca # To calculate runtime # Display the results for the first 5 genes mi_pca_res_x[1:5,] ## ----------------------------------------------------------------------------- # Get back the original example_data without the PCs appended data(example_data) start.old.method = Sys.time() # To calculate runtime intervals <- get_gene_bin_intervals(example_DGE, example_data, n = 10) ## ----message=FALSE, warning=FALSE, echo=FALSE--------------------------------- intervals %>% head(10) %>% knitr::kable(digits = 3, caption = "The first 10 gene bins. Start and end columns indicate row numbers for the beginning and end of each bin. Number indicates the number of genes in each bin.") ## ----------------------------------------------------------------------------- # Randomize the order of gene identifiers annot <- example_DGE$genes annot <- annot[sample(seq_len(nrow(annot))), ] # Match order of the genes in the DGE to the randomized order of genes in the annotation example_DGE <- example_DGE[annot, ] ## ----------------------------------------------------------------------------- gene_bin_impute <- impute_by_gene_bin(example_data, intervals, example_DGE, m = 3 ) ## ----------------------------------------------------------------------------- gene_bin_impute <- impute_by_gene_bin(example_data, intervals, example_DGE, m = 3, BPPARAM = SerialParam() ) ## ----------------------------------------------------------------------------- coef_se <- limmavoom_imputed_data_list( gene_intervals = intervals, DGE = example_DGE, imputed_data_list = gene_bin_impute, m = 3, voom_formula = "~x + y + z + a + b" ) ## ----------------------------------------------------------------------------- final_res <- combine_rubins( DGE = example_DGE, model_results = coef_se, predictor = "x" ) end.old.method = Sys.time() # To calculate runtime time.old.method = end.old.method - start.old.method ## ----message=FALSE, warning=FALSE, echo=FALSE--------------------------------- final_res %>% dplyr::select(probe, coef_combined, combined_p_bayes, combined_p_adj_bayes) %>% head(10) %>% knitr::kable(digits = 3, caption = "The top 10 genes associated with predictor x sorted by lowest P-value") ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()