## ----installScaleSim, warning = FALSE, message = FALSE------------------------ ##If needed, install devtools ##install.packages("devtools") # devtools::load_all("~/Documents/0_git/ALDEx_bioc") library(ALDEx2) set.seed(1234) ## ----dataSim1, message = FALSE, warning = FALSE------------------------------- library(tidyverse) ##Function to create the true abundances via Poisson resampling create_true_abundances <- function(d, n){ dd <- length(d)/2 dat <- d %>% sapply(function(x) rpois(n, lambda=x)) %>% t() %>% as.data.frame() %>% split(rep(1:2, each=dd)) %>% purrr::map(~`rownames<-`(.x, paste0("Taxa", 1:dd))) %>% purrr::map(t) %>% do.call(rbind, .) %>% as.data.frame() %>% cbind(Condition=factor(rep(c("Pre", "Post"), each=n), levels = c("Pre", "Post")), .) %>% `rownames<-`(., NULL) return(dat) } ## ----dataSim2, message = FALSE------------------------------------------------ ##Function to resample data to an arbitrary size resample_data <- function(dat, seq.depth){ ddat <- as.matrix(dat[,-1])/rowSums(as.matrix(dat[,-1])) for (i in 1:nrow(dat)){ dat[i,-1] <- rmultinom(1, size=seq.depth, prob=ddat[i,]) } return(dat) } ## ----dataSim3, message = FALSE------------------------------------------------ ###Setting the data parameters for the simulation ##Denotes the mean for the 20 taxa ##Note only taxa 3, 4, 15, and 20 change d.pre <- c(4000, 4000, 4000, 4000, 4000, 400,400,400,400,4000,400,500,500,500, 400,400,400,400,400,400) d.post <- d.pre d.post[c(3,4,15,20)] <- c(3000, 2000, 200, 100) #Combining together d <- c(d.pre, d.post) ##Create scale abundances dat <- create_true_abundances(d, n=100) ##Create resampled data rdat <- resample_data(dat, seq.depth=5000) ## ----dataSim5, message = FALSE------------------------------------------------ ## Finding sample totals totals <- rowSums(dat[,-1]) flow_cytometry <- function(totals, samp.names, replicates = 3){ samp.names <- rep(samp.names, each = replicates) flow_vals <- sapply(totals, FUN = function(totals, replicates){rnorm(replicates,totals,3e2)}, replicates = replicates, simplify = TRUE) flow_data <- data.frame("sample" = samp.names, "flow" = c(flow_vals)) return(flow_data) } flow_data <- flow_cytometry(totals, rownames(dat)) ## ----dataElements------------------------------------------------------------- ##Inspecting elements Y <- t(rdat[,-1]) Y[1:5,1:5] conds <- as.character(rdat[,1]) conds head(flow_data) ## ----defaultModel, message = FALSE-------------------------------------------- ## Original ALDEx2 mod.base <- aldex(Y, conds, gamma = NULL) mod.base %>% filter(we.eBH < 0.05) ## ----defaultModel2, message = FALSE------------------------------------------- ## Recreating ALDEx2 mod.clr <- aldex(Y, conds, gamma = 1e-3) mod.clr %>% filter(we.eBH < 0.05) ## ----defaultModel3, message = FALSE------------------------------------------- plot(mod.base$effect, mod.clr$effect, xlab = "Original ALDEx2 Effect Size", ylab = "Minimal Noise Effect Size") abline(a=0,b=1, col = "red", lty = "dashed") ## ----defaultModelNoise, message = FALSE--------------------------------------- ## Adding noise mod.ss <- aldex(Y, conds, gamma = .25) mod.ss %>% filter(we.eBH < 0.05) ## ----defaultModelNoise2, message = FALSE-------------------------------------- ## Adding more noise mod.coda <- aldex(Y, conds, gamma = 1) mod.coda %>% filter(we.eBH < 0.05) ## ----measureMod--------------------------------------------------------------- flow_data_collapse <- flow_data %>% group_by(sample) %>% mutate(mean = mean(flow)) %>% mutate(stdev = sd(flow)) %>% select(-flow) %>% ungroup() %>% unique() ## ----measureModSamps---------------------------------------------------------- scale_samps <- matrix(NA, nrow = 200, ncol = 128) for(i in 1:nrow(scale_samps)){ scale_samps[i,] <- rnorm(128, flow_data_collapse$mean[i], flow_data_collapse$stdev[i]) } ## ----measureModFit, message = FALSE------------------------------------------- mod.flow <- aldex(Y, conds, gamma = scale_samps) mod.flow %>% filter(we.eBH < 0.05) ## ----knowModSamps------------------------------------------------------------- # make a per-sample vector of scales scales <- c(rep(1, 100), rep(0.9, 100)) scale_samps <- aldex.makeScaleMatrix(.15, scales, conds, log=FALSE) # note 10% difference is about 2^.13 # can exactly replicate the above in log space with: # scales <- c(rep(.13, 100), rep(0, 100)) # scale_samps <- aldex.makeScaleMatrix(.15,scales, conds, log=TRUE) ## ----knowModFit, message = FALSE---------------------------------------------- mod.know <- aldex(Y, conds, gamma = scale_samps) mod.know %>% filter(we.eBH < 0.05) ## ----aldex.plot, echo=FALSE, warning=FALSE------------------------------------ par(mfrow=c(2,2)) aldex.plot(mod.clr, xlim=c(0.1,0.8), ylim=c(-0.5,2), main='Default/CLR') abline(h=0, lty=2, col='grey') aldex.plot(mod.ss, xlim=c(0.1,0.8), ylim=c(-0.5,2), main='Relaxed') abline(h=0, lty=2, col='grey') aldex.plot(mod.flow, xlim=c(0.1,0.8), ylim=c(-0.5,2), main='Flow') abline(h=0, lty=2, col='grey') aldex.plot(mod.know, xlim=c(0.1,0.8), ylim=c(-0.5,2), main='Knowledge') abline(h=0, lty=2, col='grey') ## ----graph, echo = FALSE, warning = FALSE------------------------------------- library(ggplot2) library(ggpattern) library(cowplot) ##Function to label True/false positive/negatives sig_code <- function(sig, Taxa, truth){ out <- rep("TN", length(Taxa)) out[sig &(Taxa %in% truth)] <- "TP" # True Positives out[sig & (out!="TP")] <- "FP" # False Positives out[!sig & (Taxa %in% truth)] <- "FN" # False Negatives return(out) } ##Function to summarize aldex2 output summary_aldex2 <- function(fit, pval = 0.05){ fit %>% as.data.frame() %>% rownames_to_column("category") %>% select(category, effect, we.ep, we.eBH) %>% mutate(padj=we.eBH) %>% mutate(mean=effect) %>% mutate(low=NA, high=NA) %>% mutate(sig = ifelse(padj <= pval, TRUE, FALSE)) } ##Function to create the grid plot plot_sig2 <- function(rrs, truth, ...){ names(rrs) <- model.names[names(rrs)] bind_rows(rrs, .id="Model") %>% select(Model, category, sig) %>% mutate(Taxa = category) %>% mutate(Taxa=as.numeric(sub("Taxa", "", Taxa))) %>% mutate(sigcode = sig_code(sig, Taxa, truth)) %>% mutate(Taxa=factor(Taxa), sigcode=factor(sigcode, levels=c("TP", "TN", "FP", "FN"))) %>% mutate(Model=factor(Model, levels=model.name.levels)) %>% ggplot(aes(x=Taxa, y=Model)) + geom_tile_pattern(aes(fill=sigcode, pattern = sigcode), color="darkgrey",pattern_fill = 'grey',pattern_colour = 'grey', pattern_density = 0.015) + theme_minimal() + theme(panel.grid = element_blank(), legend.title=element_blank(), text = element_text(size=16), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + scale_pattern_manual(values = c(TP = "none", TN = "none", FP = "none", FN = "stripe")) + scale_fill_manual(values= c("black", "white", "grey", "white")) } ##Plotting the results ##Pvalue at default of 0.05 p1 <- gather(dat, Taxa, Count, -Condition) %>% mutate(Taxa=as.numeric(sub("Taxa", "", Taxa))) %>% mutate(Taxa=factor(Taxa)) %>% ggplot(aes(x=Taxa, y=Count)) + geom_boxplot(aes(fill = Condition, color = Condition), position=position_dodge(width=1), size=1)+ scale_y_log10() + theme_bw() + scale_fill_manual(values = c("#fdae61", "#2b83ba")) + scale_color_manual(values = c("#fdae61", "#2b83ba")) + labs(color='Antibiotic\nTreatment') + labs(fill='Antibiotic\nTreatment') + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x=element_blank(), text = element_text(size=16)) dd <- length(d)/2 truth1 <- !(d[1:dd]==d[(dd+1):(2*dd)])##testing if the mean is different truth2 <- (1:dd)[truth1]##Locations of the differences model.names <- c("mod.base"="ALDEx2", "mod.clr" = "CLR", "mod.ss"= "Relaxed", "mod.know" = "Knowledged-Based", "mod.flow" = "Flow-Based") model.name.levels <- c("Flow-Based", "Knowledged-Based", "Relaxed", "CLR", "ALDEx2") rrs <- list(mod.base=summary_aldex2(mod.base), mod.clr = summary_aldex2(mod.clr), mod.ss = summary_aldex2(mod.ss), mod.know = summary_aldex2(mod.know), mod.flow = summary_aldex2(mod.flow)) p2 <- plot_sig2(rrs, truth=truth2) p <- plot_grid(p1, p2, nrow=2, align="v", rel_heights=c(1.7, 1)) p ## ----senAnalysis, message = FALSE--------------------------------------------- gamma_to_test <- c(1e-3, .1, .25, .5, .75, 1, 2, 3, 4, 5) ##Run the CLR function clr <- aldex.clr(Y, conds) ##Run sensitivity analysis function sen_res <- aldex.senAnalysis(clr, gamma = gamma_to_test) ## ----senAnalysis2, message = FALSE-------------------------------------------- length(sen_res) length(gamma_to_test) head(sen_res[[1]]) ## ----senAnalysis3, message = FALSE-------------------------------------------- ##Plotting plotGamma(sen_res, thresh = .1) ## ----session------------------------------------------------------------------ sessionInfo()