## ----style, echo = FALSE, results = 'asis', message = FALSE-------------------
BiocStyle::markdown()
library(knitr)

## ----libraryLoad, message = FALSE---------------------------------------------
library(metagene2)

## ----minimalAnalysis----------------------------------------------------------
# metagene objects are created by calling metagene2$new and providing
# regions and bam files:
mg <- metagene2$new(regions = get_demo_regions(), 
                   bam_files = get_demo_bam_files(), 
                   assay='chipseq')

# We can then plot coverage over those regions across all bam files.
mg$produce_metagene(title = "Demo metagene plot")

## ----bamFiles_show------------------------------------------------------------
# We create a vector with paths to the bam files of interest.
bam_files <- get_demo_bam_files()
basename(bam_files)

## ----bamFiles_bai-------------------------------------------------------------
# List .bai matching our specified bam files.
basename(Sys.glob(gsub(".bam", ".ba*", bam_files)))

## ----bamFiles_autoname--------------------------------------------------------
mg <- metagene2$new(regions = get_demo_regions(), bam_files = bam_files)
names(mg$get_params()[["bam_files"]])

## ----bamFiles_explicitname----------------------------------------------------
names(bam_files) = c("a1_1", "a1_2", "a2_1", "a2_2", "ctrl")
mg <- metagene2$new(regions = get_demo_regions(), bam_files = bam_files)
names(mg$get_params()[["bam_files"]])

## ----regionsArgumentFilename--------------------------------------------------
regions <- get_demo_region_filenames()
regions

## ----regionsArgumentFilenameLoad----------------------------------------------
mg <- metagene2$new(regions = get_demo_region_filenames(),
                   bam_files = get_demo_bam_files())
mg$get_regions()

## ----grangeslist_chipseq_regions----------------------------------------------
regions <- get_demo_regions()
regions

## ----grangeslist_chipseq_load-------------------------------------------------
mg <- metagene2$new(regions = regions,
                   bam_files = get_demo_bam_files())
mg$get_regions()

## ----demo_rna_regions---------------------------------------------------------
regions <- get_demo_rna_regions()
regions

## ----demo_stitch_mode---------------------------------------------------------
mg <- metagene2$new(regions = regions,
                   bam_files = get_demo_rna_bam_files(),
                   region_mode="stitch")
mg$get_regions()

## ----genratePromoterGRange, eval=FALSE----------------------------------------
#      # First locate the TxDb package containing the geneset of interest.
#      # Some of the most common TxDb packages include:
#      #  - TxDb.Hsapiens.UCSC.hg38.knownGene
#      #  - TxDb.Hsapiens.UCSC.hg19.knownGene
#      #  - TxDb.Mmusculus.UCSC.mm10.knownGene
#      #  - TxDb.Mmusculus.UCSC.mm10.ensGene
#      library(TxDb.Hsapiens.UCSC.hg38.knownGene)
#  
#      # We'll use the GenomicFeatures package to obtain gene/TSS coordinates
#      # from the TxDb package.
#      library(GenomicFeatures)
#  
#      # The GenomicFeatures::genes function provides us with gene bodies.
#      all_gene_bodies = GenomicFeatures::genes(TxDb.Hsapiens.UCSC.hg38.knownGene)
#  
#      # The GenomicFeatures::promoters function gets a region flanking the TSS.
#      # By using it directly on TxDb.Hsapiens.UCSC.hg38.knownGene, we would get
#      # the TSSes of all transcripts. Here, we use it on the gene_bodies GRanges
#      # we've just created, and limit ourselves to one TSS per gene.
#      all_TSS = GenomicFeatures::promoters(all_gene_bodies,
#                                           upstream=2000, downstream=2000)

## ----design_definition--------------------------------------------------------
example_design <- data.frame(Samples = bam_files,
                             align1 = c(1,1,0,0,2),
                             align2 = c(0,0,1,1,2))
kable(example_design)

## ----design_plot--------------------------------------------------------------
# Initializing the metagene object.
mg <- metagene2$new(regions = get_demo_regions(),
                   bam_files = get_demo_bam_files(),
                   assay='chipseq')

# Plotting while grouping the bam files by design group
mg$produce_metagene(design=example_design)

## ----design_metadata----------------------------------------------------------
# Initializing the metagene object.
mg <- metagene2$new(regions = get_demo_regions(),
                   bam_files = get_demo_bam_files()[1:4],
                   assay='chipseq')

design_meta = data.frame(design=mg$get_design_group_names(),
                         Align=c("Align1", "Align1", "Align2", "Align2"),
                         Rep=c(1, 2, 1, 2))
                             
mg$produce_metagene(design_metadata=design_meta, facet_by=Align~Rep, group_by="region")

## ----group_region_grangeslist-------------------------------------------------
# Create a GRangesList of regions to be grouped together.
regions_grl <- get_demo_regions()

# We now have a named GRangesList with two set of 50 regions.
regions_grl
lapply(regions_grl, length)

# Initializing the metagene object.
mg <- metagene2$new(regions = regions_grl,
                   bam_files = get_demo_bam_files(),
                   assay='chipseq')

# When plotting the final metagene, our regions are grouped according to
# their membership in the initial GRangesList object.
mg$plot(facet_by=~region, group_by="design")

## ----group_region_metadata----------------------------------------------------
# First, we load the regions.
regions_gr <- unlist(get_demo_regions())

# We then define some metadata.
# The examples here are nonsensical. Real metadata could include factor
# binding status, differential expression, etc.
demo_metadata = data.frame(BedName=names(regions_gr),
                           EvenStart=ifelse((start(regions_gr) %% 2) == 0, "Even", "Odd"),
                           Strand=strand(regions_gr))
head(demo_metadata)

# Initializing the metagene object, passing in region metadata.
mg <- metagene2$new(regions = get_demo_regions(),
                   region_metadata=demo_metadata,
                   bam_files = get_demo_bam_files(),
                   assay='chipseq')

# When plotting the metagene, our regions are grouped according to
# the specified metadata columns.
mg$produce_metagene(split_by=c("EvenStart", "Strand"), facet_by=EvenStart~Strand, group_by="design")

## ----getParams----------------------------------------------------------------
mg <- get_demo_metagene()
names(mg$get_params())

mg$get_params()[["bin_count"]]

## ----setParamsConstructor-----------------------------------------------------
mg <- metagene2$new(regions=get_demo_regions(), 
                   bam_files=get_demo_bam_files(),
                   bin_count=50)
mg$produce_metagene(alpha=0.01, title="Set parameters on produce_metagene")

## ----changeSingleParamProduceMetagene-----------------------------------------
mg$produce_metagene(bin_count=100)

## ----showPlot-----------------------------------------------------------------
mg$plot(title = "Demo plot subset")

## ----getParams2---------------------------------------------------------------
mg <- get_demo_metagene()
names(mg$get_params())

mg$get_params()[c("bin_count", "alpha", "normalization")]

## ----getBamCount--------------------------------------------------------------
mg <- get_demo_metagene()
mg$get_bam_count(mg$get_params()[["bam_files"]][1])

## ----getRegions---------------------------------------------------------------
# Out demo regions are a GRangesList of two elements containing 50 ranges each.
get_demo_regions()

# When we initialize the metagene object, those regions will be pre-processed,
# flattening the list into a single GRanges object and adding a region_name
# column for tracking.
mg <- metagene2$new(regions = get_demo_regions(),
                   bam_files = get_demo_bam_files())

# get_regions allows us to see those post-processed regions.
mg$get_regions()

## ----getRawCoverages----------------------------------------------------------
coverages <- mg$get_raw_coverages()
coverages[[1]]
length(coverages)

## ----copyMetagene-------------------------------------------------------------
mg_copy <- mg$clone(deep=TRUE)

## ----plotMetagene-------------------------------------------------------------
mg1 <- metagene2$new(bam_files = get_demo_bam_files(), 
                     regions = get_demo_regions()[1])
mg2 <- metagene2$new(bam_files = get_demo_bam_files(),
                     regions = get_demo_regions()[2])
plot_metagene(rbind(mg1$add_metadata(), mg2$add_metadata()),
              facet_by=.~region_name)

## ----metagene_heatmap_default_order-------------------------------------------
mg <- get_demo_metagene()
metagene2_heatmap(mg)

## ----metagene_heatmap_decreasing_order----------------------------------------
mg <- get_demo_metagene()
metagene2_heatmap(mg, coverage_order(mg, "align1_rep1"))