## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(concordance=FALSE)
set.seed(12345677)

## ----echo=FALSE, warning=FALSE, message=FALSE---------------------------------
library(RgnTX)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)

## ----Fig1, fig.cap = "The genome space is linear. The figure shows four genomic features in the genome.", echo=FALSE, out.width='.60\\linewidth'----
knitr::include_graphics("figures/section2.1.png")

## ----Fig2, fig.cap = "The transcriptome space is heterogeneous. It is usually unclear which specific isoform transcript is associated with the transcriptome element because it overlaps with multiple isoforms when mapped to the genome, which is often the real scenario in biological research.", echo=FALSE, out.width='.60\\linewidth'----
knitr::include_graphics("figures/section2.2.png")

## ----Fig3, fig.cap = "Permutation space for each feature is distinct. Each feature will be permutated only within the set of isoform transcripts it is associated with.", echo=FALSE, out.width='.60\\linewidth'----
knitr::include_graphics("figures/section2.3.png")

## ----Fig4, fig.cap = "Overlapping counts between random region sets in different permutation spaces. Box boundaries represent the 25th and 75th percentiles; center line represents the median.", echo=FALSE, out.width='.80\\linewidth'----
knitr::include_graphics("figures/section2.4.png")

## ----eval = FALSE-------------------------------------------------------------
#  if (!requireNamespace("devtools", quietly = TRUE))
#      install.packages("devtools")
#  
#  devtools::install_github("yue-wang-biomath/RgnTX", build_vignettes = TRUE)

## ----eval = FALSE-------------------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("RgnTX")

## ----message=FALSE, warning=FALSE---------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

set.seed(12345677)
example.list <- randomizeTx(txdb, random_num = 10, random_length = 200)
example.list

## ----message=FALSE, warning=FALSE---------------------------------------------
example.gr <- GRangesList2GRanges(example.list)
example.gr
example.list <- GRanges2GRangesList(example.gr)

## ----warning=FALSE, message=FALSE---------------------------------------------
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene 
type <- "mature"
trans.ids <- c("170", "782", "974", "1364", "1387")

## ----warning=FALSE, message=FALSE---------------------------------------------
set.seed(12345677)
randomResults <- randomizeTx(txdb, trans_ids = trans.ids, random_num = 10,
                             random_length = 100)

## ----warning=FALSE, message=FALSE---------------------------------------------
randomResults 

## ----warning=FALSE, message=FALSE---------------------------------------------
width(randomResults)

## ----warning=FALSE, message=FALSE---------------------------------------------
trans.ids <- c("170", "782", "974", "1364", "1387")
exons.tx0 <- exonsBy(txdb)
regions.A <- exons.tx0[trans.ids]
regions.A

## ----warning=FALSE, message=FALSE---------------------------------------------
set.seed(12345677)
random.regions.A <- randomizeTransByOrder(regions_A = regions.A, 
random_length = 100)
width(regions.A)
width(random.regions.A)

## ----Fig5, fig.cap = "Features without isoform ambiguity.", echo=FALSE, out.width='.80\\linewidth'----
knitr::include_graphics("figures/section4.1.png")

## ----Fig6, fig.cap = "Features with isoform ambiguity.", echo=FALSE, out.width='.80\\linewidth'----
knitr::include_graphics("figures/section4.2.png")

## ----message=FALSE, warning=FALSE---------------------------------------------
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
trans.ids<- c("170", "782", "974", "1364", "1387")
RS1 <-  randomizeTx(txdb, trans.ids, random_num = 100, random_length = 100)

## ----warning=FALSE, message=FALSE---------------------------------------------
randomResults <- randomizeFeaturesTx(RS1, txdb, type = "mature")

## ----warning=FALSE, message=FALSE---------------------------------------------
randomResults <- randomizeFeaturesTx(RS1, txdb, type = "CDS")

## ----warning=FALSE, message=FALSE---------------------------------------------
randomResults <- randomizeFeaturesTx(RS1, txdb, N = 5)

## ----message=FALSE, warning=FALSE---------------------------------------------
file <- system.file(package="RgnTX", "extdata", "m6A_sites_data.rds")
m6A_sites_data <- readRDS(file)
m6A_sites_data[1:5]

## ----message=FALSE, warning=FALSE---------------------------------------------
randomResults <- randomizeFeaturesTxIA(RS = m6A_sites_data[1:5], 
                                       txdb, type = "mature", N = 1)
length(randomResults)

## ----eval = FALSE-------------------------------------------------------------
#  set.seed(12345677)
#  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  exons.tx0 <- exonsBy(txdb)
#  trans.ids <- sample(names(exons.tx0), 50)
#  
#  A <- randomizeTx(txdb, trans.ids, random_num = 100, random_length = 100)
#  B <- c(randomizeTx(txdb, trans.ids, random_num = 50, random_length = 100),
#         A[1:50])
#  
#  permTestTx_results <- permTestTx(RS1 = A,
#                                   RS2 = B,
#                                   txdb = txdb,
#                                   type = "mature",
#                                   ntimes = 50,
#                                   ev_function_1 = overlapCountsTx,
#                                   ev_function_2 = overlapCountsTx)

## ----echo = FALSE-------------------------------------------------------------
file <- system.file(package="RgnTX", "extdata", "permTestTx_results2.rds")
permTestTx_results <- readRDS(file)

## ----warning=FALSE, message=FALSE---------------------------------------------
names(permTestTx_results)

## ----Fig7, fig.cap = "Association between two region sets.", warning=FALSE, message=FALSE----
p1 <- plotPermResults(permTestTx_results, binwidth = 1)
p1

## ----Fig8, fig.cap = "Overlap counting ways of two kinds of features with the customPick regions.", echo=FALSE----
knitr::include_graphics("figures/section5.2.png")

## ----warning=FALSE, message=FALSE---------------------------------------------
getCDS = function(trans_ids, txdb,...){
  cds.tx0 <- cdsBy(txdb, use.names=FALSE)
  cds.names <- as.character(intersect(names(cds.tx0), trans_ids))
  cds = cds.tx0[cds.names]
  return(cds)
}

## ----eval = FALSE-------------------------------------------------------------
#  set.seed(12345677)
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  RS1 <- randomizeTx(txdb, "all", random_num = 100, random_length = 200,
#                     type = "CDS")
#  
#  permTestTx_results <- permTestTx_customPick(RS1 = RS1,
#                                              txdb = txdb,
#                                              customPick_function = getCDS,
#                                              ntimes = 50,
#                                              ev_function_1 = overlapCountsTx,
#                                              ev_function_2 = overlapCountsTx)
#  p1 <- plotPermResults(permTestTx_results, binwidth = 1)
#  p1

## ----Fig9, echo = FALSE, fig.cap = " Association between some features with CDS regions.", warning=FALSE, message=FALSE----
file <- system.file(package="RgnTX", "extdata", "permTestTx_results3.rds")
permTestTx_results <- readRDS(file)
p1 <- plotPermResults(permTestTx_results, binwidth = 1)
p1

## ----eval = FALSE-------------------------------------------------------------
#  set.seed(12345677)
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  file <- system.file(package="RgnTX", "extdata", "m6A_sites_data.rds")
#  m6A_sites_data <- readRDS(file)
#  RS1 <- m6A_sites_data[1:500]
#  
#  permTestTx_results <- permTestTxIA_customPick(RS1 = RS1,
#                                         txdb = txdb,
#                                         customPick_function = getStopCodon,
#                                         ntimes = 50,
#                                         ev_function_1 = overlapCountsTxIA,
#                                         ev_function_2 = overlapCountsTx)
#  p_a <- plotPermResults(permTestTx_results, binwidth = 1)
#  p_a

## ----Fig10, echo = FALSE, fig.cap = " Association between N6-Methyladenosine (500 sites) and stop codon regions.", warning=FALSE, message=FALSE----
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
file <- system.file(package="RgnTX", "extdata", "permTestTx_results.rds")
permTestTx_results <- readRDS(file)
p_a <- plotPermResults(permTestTx_results, binwidth = 1)
p_a

## ----eval = FALSE-------------------------------------------------------------
#  set.seed(12345677)
#  permTestTx_results <- permTestTxIA_customPick(RS1 = m6A_sites_data,
#                                         txdb = txdb,
#                                         customPick_function = getStopCodon,
#                                         ntimes = 50)
#  p_b <- plotPermResults(permTestTx_results)
#  p_b

## ----Fig11, fig.cap = " Association between N6-Methyladenosine and stop codon regions.", echo=FALSE----
knitr::include_graphics("figures/section5.5.png")

## ----warning=FALSE, message=FALSE---------------------------------------------
set.seed(12345677)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
trans.ids1<- c("170")
RS1 <- randomizeTx(txdb = txdb, trans_ids = trans.ids1,
                   random_num = 20, random_length = 100)
RS2 <- randomizeTx(txdb = txdb, trans_ids = trans.ids1,
                   random_num = 20, random_length = 100)
trans.ids2 <-  c("170", "782", "974", "1364", "1387")
RSL <- randomizeTx(txdb = txdb, trans_ids = trans.ids2,
                   random_num = 20, random_length = 100, N = 50)

## ----Fig12, fig.cap = "Association between two region sets picked from the same transcript .", warning=FALSE, message=FALSE----
permTestTx_results <- permTestTx_customAll(RSL = RSL, RS1 = RS1, RS2 = RS2)
p_3 <- plotPermResults(permTestTx_results)
p_3

## ----eval=FALSE---------------------------------------------------------------
#  set.seed(12345677)
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  exons.tx0 <- exonsBy(txdb)
#  trans.ids <- sample(names(exons.tx0), 50)
#  
#  A <- randomizeTx(txdb, trans.ids, random_num = 100, random_length = 100)
#  B <- c(randomizeTx(txdb, trans.ids, random_num = 50, random_length = 100),
#         A[1:50])
#  
#  permTestTx_results <- permTestTx(RS1 = A,
#                                   RS2 = B,
#                                   txdb = txdb,
#                                   type = "mature",
#                                   ntimes = 50,
#                                   ev_function_1 = overlapCountsTx,
#                                   ev_function_2 = overlapCountsTx)
#  
#  p_a <- plotPermResults(permTestTx_results, binwidth = 1)
#  
#  p_b <- plotPermResults(permTestTx_results, test_type = "two-sided")
#  
#  p_c <- plotPermResults(permTestTx_results, alpha = 0.025)
#  
#  p_d <- plotPermResults(permTestTx_results, alpha = 0.1)
#  
#  set.seed(12345677)
#  permTestTx_results_e <- permTestTx( RS1 = A,
#                                      RS2 = B,
#                                      txdb = txdb,
#                                      type = "mature",
#                                      ntimes = 50,
#                                      ev_function_1 = overlapWidthTx,
#                                      ev_function_2 = overlapWidthTx)
#  p_e <- plotPermResults(permTestTx_results_e, binwidth = 150)
#  p_e
#  
#  set.seed(12345677)
#  permTestTx_results_f <- permTestTx( RS1 = A,
#                                      RS2 = B,
#                                      txdb = txdb,
#                                      type = "mature",
#                                      ntimes = 50,
#                                      ev_function_1 = distanceTx,
#                                      ev_function_2 = distanceTx, beta = 0.3)
#  p_f <- plotPermResults(permTestTx_results_f)
#  p_f
#  
#  set.seed(12345677)
#  permTestTx_results_g <- permTestTx(RS1 = A,
#                                   RS2 = B,
#                                   txdb = txdb,
#                                   type = "mature",
#                                   ntimes = 500,
#                                   ev_function_1 = overlapCountsTx,
#                                   ev_function_2 = overlapCountsTx)
#  
#  p_g <- plotPermResults(permTestTx_results_g, binwidth = 1)
#  p_g
#  
#  set.seed(12345677)
#  permTestTx_results_h <- permTestTx(RS1 = A,
#                                   RS2 = B,
#                                   txdb = txdb,
#                                   type = "mature",
#                                   ntimes = 2000,
#                                   ev_function_1 = overlapCountsTx,
#                                   ev_function_2 = overlapCountsTx)
#  
#  p_h <- plotPermResults(permTestTx_results_h, binwidth = 1)
#  p_h

## ----Fig13, echo=FALSE, warning=FALSE,  comment=FALSE, message= FALSE, fig.cap = "Test results of an association with different argument settings."----
knitr::include_graphics("figures/section7.png")

## ----eval=FALSE---------------------------------------------------------------
#  set.seed(12345677)
#  file <- system.file(package="RgnTX", "extdata", "m6A_sites_data.rds")
#  m6A_sites_data <- readRDS(file)
#  RS1 <- m6A_sites_data[1:500]
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  
#  permTestTx_results <- permTestTxIA_customPick(RS1 = RS1,
#                                            txdb = txdb,
#                                            type = "mature",
#                                            customPick_function = getStopCodon,
#                                            ntimes = 50)
#  shiftedZScoreTx_results <- shiftedZScoreTx(permTestTx_results, txdb = txdb,
#                                             window = 2000,
#                                             step = 200,
#                                             ev_function_1 = overlapCountsTxIA)
#  p1 <- plotShiftedZScoreTx(shiftedZScoreTx_results)
#  p1

## ----Fig14, echo = FALSE, warning=FALSE, comment=FALSE, message= FALSE, fig.cap = "Shifted z-scores. Analysis of the association of m$^6$A and stop codon regions with window 2000 and step 200."----
file <- system.file(package="RgnTX", "extdata", "shiftedZScoreTx_results1.rds")
shiftedZScoreTx_results <- readRDS(file)
p1 <- plotShiftedZScoreTx(shiftedZScoreTx_results)
p1

## ----eval=FALSE---------------------------------------------------------------
#  set.seed(12345677)
#  shiftedZScoreTx_results <- shiftedZScoreTx(permTestTx_results,
#                                             window = 300,
#                                             step = 10, txdb = txdb,
#                                             ev_function_1 = overlapCountsTxIA)
#  p2 <- plotShiftedZScoreTx(shiftedZScoreTx_results)
#  p2

## ----Fig15, echo = FALSE, warning=FALSE, comment=FALSE, message= FALSE, fig.cap = "Shifted z-scores. Analysis of the association of m$^6$A and stop codon regions with window 300 and step 10."----
file <- system.file(package="RgnTX", "extdata", "shiftedZScoreTx_results2.rds")
shiftedZScoreTx_results <- readRDS(file)
p2 <- plotShiftedZScoreTx(shiftedZScoreTx_results)
p2

## ----message=FALSE, warning=FALSE---------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
# Five transcripts with positive strand.
trans.ids <- c("170", "782", "974", "1364", "1387")

# Take the CDS part of them.
cds.tx0 <- cdsBy(txdb, use.names = FALSE)
cds.p <- cds.tx0[trans.ids]

# The width of the region from each transcript to be picked is 200.
width <- 200

# The start vector 
start = as.numeric(max(end(cds.p)))

R.cds.last200 <- shiftTx(regions = cds.p, start = start, width = width, 
                         direction = "left", strand = "+")
R.cds.last200

## ----message=FALSE, warning=FALSE---------------------------------------------
R.cds.last200.list <- GRanges2GRangesList(R.cds.last200)
R.cds.last200.list

## ----message=FALSE, warning=FALSE---------------------------------------------
width(R.cds.last200.list)

## ----results = "asis", echo=FALSE---------------------------------------------
table1 <- matrix(c(100, 500, 1000, 2000, 5000, 10000,
         1.783995, 2.027494, 2.341810, 3.081001, 5.097162, 8.434085,
         1.786938, 2.024690, 2.367650, 3.006552, 5.028154, 8.959244,
         1.794495, 2.075795, 2.390498, 3.020262, 5.107760, 9.162599,
         1.761639, 2.046817, 2.585473, 3.302140, 5.187891, 9.148554), nrow = 6, ncol = 5)
table1 = data.frame(table1)
names(table1) <- c('Num/Length', 10,50, 100, 200)
knitr::kable(table1, "simple", caption = 'Randomization time of randomizeTx.')

## ----results = "asis", echo=FALSE---------------------------------------------
table2 <- matrix(c(100, 500, 1000, 2000, 5000, 10000,
         1.770159, 2.004670, 2.335991, 3.034632, 5.039495, 8.644615,
         1.801999, 2.024997, 2.322497, 3.042497, 5.194495, 8.500142,
         1.755498, 2.096545, 2.377498, 3.277249, 5.190499, 8.819886,
         1.771499, 2.026999, 2.359494, 3.291889, 5.235691, 9.216726), nrow = 6, ncol = 5)
table2 = data.frame(table2)
names(table2) <- c('Num/Length', 10,50, 100, 200)
knitr::kable(table2, "simple", caption = 'Randomization time of randomizeFeaturesTx.')

## ----sessionInfo--------------------------------------------------------------
sessionInfo()