### R code from vignette source 'GrowingTrees.Rnw' ################################################### ### code chunk number 1: GrowingTrees.Rnw:51-53 ################################################### options(continue=" ") options(width=80) ################################################### ### code chunk number 2: expr1 ################################################### library(DECIPHER) # specify the path to your sequence file: fas <- "<>" # OR find the example sequence file used in this tutorial: fas <- system.file("extdata", "Streptomyces_ITS_aligned.fas", package="DECIPHER") seqs <- readDNAStringSet(fas) # use readAAStringSet for amino acid sequences seqs # the aligned sequences ################################################### ### code chunk number 3: expr2 ################################################### seqs <- unique(seqs) # remove duplicated sequences ns <- gsub("^.*Streptomyces( subsp\\. | sp\\. | | sp_)([^ ]+).*$", "\\2", names(seqs)) names(seqs) <- ns # name by species (or any other preferred names) seqs <- seqs[!duplicated(ns)] # remove redundant sequences from the same species seqs ################################################### ### code chunk number 4: expr3 ################################################### MODELS ################################################### ### code chunk number 5: expr4 ################################################### set.seed(123) # set the random number seed treeME <- Treeline(seqs, verbose=FALSE, processors=1) set.seed(NULL) # reset the seed ################################################### ### code chunk number 6: expr5 ################################################### treeME attributes(treeME) str(treeME, max.level=4) ################################################### ### code chunk number 7: expr6 ################################################### getOption("SweaveHooks")[["fig"]]() set.seed(123) # set the random number seed tree <- Treeline(seqs, method="ML", model="GTR+G4", maxTime=0.01, verbose=FALSE, processors=1) set.seed(NULL) # reset the seed plot(tree) ################################################### ### code chunk number 8: expr7 ################################################### attr(tree, "members") # number of leaves below this (root) node attr(tree, "height") # height of the node (in this case, the midpoint root) attr(tree, "score") # best score (in this case, the -LnL) attr(tree, "model") # either the specified or automatically select transition model attr(tree, "parameters") # the free model parameters (or NA if unoptimized) attr(tree, "midpoint") # center of the edge (for plotting) ################################################### ### code chunk number 9: expr8 ################################################### getOption("SweaveHooks")[["fig"]]() plot(dendrapply(tree, function(x) { s <- attr(x, "probability") # choose "probability" (aBayes) if (!is.null(s) && !is.na(s)) { s <- formatC(as.numeric(s), digits=2, format="f") attr(x, "edgetext") <- paste(s, "\n") } attr(x, "edgePar") <- list(p.col=NA, p.border=NA, t.col="#CC55AA", t.cex=0.7) if (is.leaf(x)) attr(x, "nodePar") <- list(lab.font=3, pch=NA) x }), horiz=TRUE, yaxt='n') # add a scale bar (placed manually) arrows(2, 0, 2.4, 0, code=3, angle=90, len=0.05, xpd=TRUE) text(2.2, 0, "0.4 subs./site", pos=3, xpd=TRUE) ################################################### ### code chunk number 10: expr9 ################################################### set.seed(123) # set the random number seed tree_UniformCosts <- Treeline(seqs, method="MP", reconstruct=TRUE, verbose=FALSE, processors=1) set.seed(NULL) # reset the seed ################################################### ### code chunk number 11: expr10 ################################################### mat <- attr(tree_UniformCosts, "transitions") mat # count of state transitions mat <- mat + t(mat) # make symmetric mat <- mat/(sum(mat)/2) # normalize mat <- -log2(mat) # convert to bits diag(mat) <- 0 # reset diagonal mat # a derived cost matrix ################################################### ### code chunk number 12: expr11 ################################################### getOption("SweaveHooks")[["fig"]]() set.seed(123) # set the random number seed tree_NonUniformCosts <- Treeline(seqs, method="MP", costMatrix=mat, reconstruct=TRUE, verbose=FALSE, processors=1) set.seed(NULL) # reset the seed splits <- function(x) { y <- sapply(x, function(x) paste(sort(unlist(x)), collapse=" ")) if (!is.leaf(x)) y <- c(y, splits(x[[1]]), splits(x[[2]])) y } splits_UniformCosts <- splits(tree_UniformCosts) splits_NonUniformCosts <- splits(tree_NonUniformCosts) dashEdges <- function(x, splits) { y <- paste(sort(unlist(x)), collapse=" ") if (!y %in% splits) attr(x, "edgePar") <- list(lty=2) x } layout(matrix(1:2, nrow=1)) plot(dendrapply(tree_UniformCosts, dashEdges, splits_NonUniformCosts), main="MP uniform costs") plot(dendrapply(tree_NonUniformCosts, dashEdges, splits_UniformCosts), main="MP non-uniform costs") ################################################### ### code chunk number 13: expr12 ################################################### getOption("SweaveHooks")[["fig"]]() new_tree <- MapCharacters(tree_NonUniformCosts, labelEdges=TRUE) plot(new_tree, edgePar=list(p.col=NA, p.border=NA, t.col="#55CC99", t.cex=0.7)) attr(new_tree[[1]], "change") # state changes on first branch left of (virtual) root ################################################### ### code chunk number 14: expr13 ################################################### reps <- 100 # number of bootstrap replicates tree1 <- Treeline(seqs, verbose=FALSE, processors=1) partitions <- function(x) { if (is.leaf(x)) return(NULL) x0 <- paste(sort(unlist(x)), collapse=" ") x1 <- partitions(x[[1]]) x2 <- partitions(x[[2]]) return(list(x0, x1, x2)) } pBar <- txtProgressBar() bootstraps <- vector("list", reps) for (i in seq_len(reps)) { r <- sample(width(seqs)[1], replace=TRUE) at <- IRanges(r, width=1) seqs2 <- extractAt(seqs, at) seqs2 <- lapply(seqs2, unlist) seqs2 <- DNAStringSet(seqs2) temp <- Treeline(seqs2, verbose=FALSE) bootstraps[[i]] <- unlist(partitions(temp)) setTxtProgressBar(pBar, i/reps) } close(pBar) ################################################### ### code chunk number 15: expr14 ################################################### getOption("SweaveHooks")[["fig"]]() bootstraps <- table(unlist(bootstraps)) original <- unlist(partitions(tree1)) hits <- bootstraps[original] names(hits) <- original w <- which(is.na(hits)) if (length(w) > 0) hits[w] <- 0 hits <- round(hits/reps*100) labelEdges <- function(x) { if (is.null(attributes(x)$leaf)) { part <- paste(sort(unlist(x)), collapse=" ") attr(x, "edgetext") <- as.character(hits[part]) } return(x) } tree2 <- dendrapply(tree1, labelEdges) attr(tree2, "edgetext") <- NULL # remove text from (virtual) root branch plot(tree2, edgePar=list(t.cex=0.5), nodePar=list(lab.cex=0.7, pch=NA)) ################################################### ### code chunk number 16: expr15 ################################################### rapply(tree, attr, which="label") # label of each leaf (left to right) labels(tree) # alternative rapply(tree, attr, which="height") # height of each leaf (left to right) italicize <- function(x) { if(is.leaf(x)) attr(x, "label") <- as.expression(substitute(italic(leaf), list(leaf=attr(x, "label")))) x } rapply(tree, italicize, how="replace") # italicize leaf labels ################################################### ### code chunk number 17: expr16 ################################################### getOption("SweaveHooks")[["fig"]]() d <- DistanceMatrix(seqs, correction="F81+F", verbose=FALSE, processors=1) exclusive <- function(x) { if (!is.leaf(x)) { # leaves are trivially exclusive leaves <- unlist(x) max_dist <- max(d[leaves, leaves]) # max within group if (all(max_dist < d[-leaves, leaves])) attr(x, "edgePar") <- list(col="orange") } x } plot(dendrapply(tree, exclusive)) ################################################### ### code chunk number 18: expr17 ################################################### Spp <- c("coelicolor", "lividans", "AA4", "Mg1", "scabiei") # species to retain extractClade <- function(x) { if (is.leaf(x)) { if (sum(Spp %in% labels(x)) > 0L) { labels(x) } else { NULL } } else { x <- lapply(x, extractClade) x <- x[lengths(x) > 0] if (length(x) == 1) x <- x[[1]] x } } extractClade(tree) ################################################### ### code chunk number 19: expr18 ################################################### freqs <- alphabetFrequency(seqs, baseOnly=TRUE) head(freqs) # summarize the number of non-base characters (gaps/ambiguities) summary(freqs) # "other" is non-base characters # index of sequence with the most non-base characters which.max(freqs[, "other"]) freqs <- freqs[, DNA_BASES] background <- colMeans(freqs) background # look for sequences deviating from background frequencies chi2 <- colSums((t(freqs) - background)^2/background) pval <- pchisq(chi2, length(background) - 1, lower.tail=FALSE) w <- which(pval < 0.05) seqs[w] # outlier sequences freqs[w,] # frequencies of outliers # get sequence index of any very distant outlier sequences D <- DistanceMatrix(seqs, verbose=FALSE, processors=1) t <- table(which(D > 0.9, arr.ind=TRUE)) # choose a cutoff head(sort(t, decreasing=TRUE)) # index of top outliers, if any ################################################### ### code chunk number 20: expr19 ################################################### getOption("SweaveHooks")[["fig"]]() P <- Cophenetic(treeME) # patristic distances D <- as.dist(D) # conver to 'dist' object plot(D, P, xlab="Pairwise distance", ylab="Patristic distance", log="xy") abline(a=0, b=1) # for ME trees we want explained variance > 0.9 V <- 1 - sum((P - D)^2)/sum((D - mean(D))^2) V # check the input data if V << 1 cor(P, D) # should be >> 0 cor(log(P), log(D)) # should be >> 0 ################################################### ### code chunk number 21: expr20 ################################################### WriteDendrogram(tree, file="") ################################################### ### code chunk number 22: expr21 ################################################### params <- attr(tree, "parameters") cat("[", paste(names(params), params, sep="=", collapse=","), "]", sep="", append=TRUE, file="") ################################################### ### code chunk number 23: sessinfo ################################################### toLatex(sessionInfo(), locale=FALSE)