## ----eval=FALSE--------------------------------------------------------------- # # Install via BioConductor # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("katdetectr") # # # or the latest version # BiocManager::install("katdetectr", version = "devel") # # # or from github # devtools::install_github("https://github.com/ErasmusMC-CCBC/katdetectr") ## ----------------------------------------------------------------------------- library(katdetectr) ## ----label = "Importing genomic variants", eval = TRUE------------------------ # Genomic variants stored within the VCF format. pathToVCF <- system.file(package = "katdetectr", "extdata/CPTAC_Breast.vcf") # Genomic variants stored within the MAF format. pathToMAF <- system.file(package = "katdetectr", "extdata/APL_primary.maf") # In addition, we can generate synthetic genomic variants including kataegis foci using generateSyntheticData(). # This functions returns a VRanges object. syntheticData <- generateSyntheticData(nBackgroundVariants = 2500, nKataegisFoci = 1) ## ----label = "Detection of kataegis foci", eval = TRUE------------------------ # Detect kataegis foci within the given VCF file. kdVCF <- detectKataegis(genomicVariants = pathToVCF) # # Detect kataegis foci within the given MAF file. # As this file contains multiple samples, we set aggregateRecords = TRUE. kdMAF <- detectKataegis(genomicVariants = pathToMAF, aggregateRecords = TRUE) # Detect kataegis foci within the synthetic data. kdSynthetic <- detectKataegis(genomicVariants = syntheticData) ## ----label = "Overview of KatDetect objects", eval = TRUE--------------------- summary(kdVCF) print(kdVCF) show(kdVCF) # Or simply: kdVCF ## ----label = "Retrieving information from KatDetect objects", eval = TRUE----- getGenomicVariants(kdVCF) getSegments(kdVCF) getKataegisFoci(kdVCF) getInfo(kdVCF) ## ----label = "Visualisation of KatDetect objects", eval = TRUE---------------- rainfallPlot(kdVCF) # With showSegmentation, the detected segments (changepoints) as visualized with their mean IMD. rainfallPlot(kdMAF, showSegmentation = TRUE) # With showSequence, we can display specific chromosomes or all chromosomes in which a putative kataegis foci has been detected. rainfallPlot(kdSynthetic, showKataegis = TRUE, showSegmentation = TRUE, showSequence = "Kataegis") ## ----------------------------------------------------------------------------- # function for modeling sample mutation rate modelSampleRate <- function(IMDs) { lambda <- log(2) / median(IMDs) return(lambda) } # function for calculating the nth root of x nthroot <- function(x, n) { y <- x^(1 / n) return(y) } # Function that defines the IMD cutoff specific for each segment # Within this function you can use all variables available in the slots: genomicVariants and segments IMDcutoffFun <- function(genomicVariants, segments) { IMDs <- genomicVariants$IMD totalVariants <- segments$totalVariants width <- segments |> dplyr::as_tibble() |> dplyr::pull(width) sampleRate <- modelSampleRate(IMDs) IMDcutoff <- -log(1 - nthroot(0.01 / width, ifelse(totalVariants != 0, totalVariants - 1, 1))) / sampleRate IMDcutoff <- replace(IMDcutoff, IMDcutoff > 1000, 1000) return(IMDcutoff) } kdCustom <- detectKataegis(syntheticData, IMDcutoff = IMDcutoffFun) ## ----------------------------------------------------------------------------- # generate data that contains non standard sequences syndata1 <- generateSyntheticData(seqnames = c("chr1_gl000191_random", "chr4_ctg9_hap1")) syndata2 <- generateSyntheticData(seqnames = "chr1") syndata <- suppressWarnings(c(syndata1, syndata2)) # construct a dataframe that contains the length of the sequences # each column name (name of the sequence) sequenceLength <- data.frame( chr1 = 249250621, chr1_gl000191_random = 106433, chr4_ctg9_hap1 = 590426 ) # provide the dataframe with the sequence lengths using the refSeq argument kdNonStandard <- detectKataegis(genomicVariants = syndata, refSeq = sequenceLength) ## ----label = "Session Information", eval = TRUE------------------------------- utils::sessionInfo()