## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
# Use bam file that exists in ORFik package
library(ORFik)
bam_file <- system.file("extdata/Danio_rerio_sample", "ribo-seq.bam", package = "ORFik")
footprints <- fimport(bam_file)
# This is identical to:
footprints <- readBam(bam_file)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
# Use bam file that exists in ORFik package
ofst_file <- system.file("extdata/Danio_rerio_sample/ofst", "ribo-seq.ofst", package = "ORFik")
footprints <- fimport(ofst_file)
# This is identical to:
footprints <- import.ofst(ofst_file)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
ofst_out_dir <- file.path(tempdir(), "ofst/")
convert_bam_to_ofst(NULL, bam_file, ofst_out_dir)
# Find the file again
ofst_file <- list.files(ofst_out_dir, full.names = TRUE)[1]
# Load it
fimport(ofst_file)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
bigwig_out_dir <- file.path(tempdir(), "bigwig/")
convert_to_bigWig(NULL, bam_file, bigwig_out_dir, 
                  seq_info = seqinfo(BamFile(bam_file)))
# Find the file again
bigwig_file <- list.files(bigwig_out_dir, full.names = TRUE) 
# You see we have 2 files here, 1 for forward strand, 1 for reverse
# Load it
fimport(bigwig_file)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
bigwig_file <- list.files(bigwig_out_dir, full.names = TRUE) 
# Let us access a location without loading the full file.
random_point <- GRangesList(GRanges("chr24", 22711508, "-"))
# Getting raw vector (Then you need to know which strand it is on:)
bigwig_for_random_point <- bigwig_file[2] # the reverse strand file
rtracklayer::import.bw(bigwig_for_random_point, as = "NumericList",
                              which = random_point[[1]])
# 4 reads were there

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------

dt <- coveragePerTiling(random_point, bigwig_file)
dt # Position is 1, because it is relative to first

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
library(ORFik)
organism <- "Saccharomyces cerevisiae" # Baker's yeast
# This is where you should usually store you annotation references ->
#output_dir <- file.path(ORFik::config()["ref"], gsub(" ", "_", organism))
output_dir <- tempdir()
annotation <- getGenomeAndAnnotation(
                    organism = organism,
                    output.dir = output_dir,
                    assembly_type = "toplevel"
                    )
genome <- annotation["genome"]
gtf <- annotation["gtf"]

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
# Index fasta genome
indexFa(genome)
# Make TxDb object for large speedup:
txdb <- makeTxdbFromGenome(gtf, genome, organism, optimize = TRUE, return = TRUE)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
# Access a FaFile
fa <- findFa(genome)
# Get chromosome information
seqinfo(findFa(genome))
# Load a 10 first bases from chromosome 1
txSeqsFromFa(GRangesList(GRanges("I", 1:10, "+")), fa, TRUE)
# Load a 10 first bases from chromosome 1 and 2.
txSeqsFromFa(GRangesList(GRanges("I", 1:10, "+"), GRanges("II", 1:10, "+")), fa, TRUE)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  txdb_path <- paste0(gtf, ".db")
  txdb <- loadTxdb(txdb_path)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  loadRegion(txdb, "tx")[1]
  loadRegion(txdb, "mrna")[1]
  loadRegion(txdb, "leaders")[1]
  loadRegion(txdb, "cds")[1]
  loadRegion(txdb, "trailers")

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  tx_to_keep <- filterTranscripts(txdb, minFiveUTR = 1, minCDS = 150, minThreeUTR = 0)
  loadRegion(txdb, "mrna")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#    # This fails!
#    filterTranscripts(txdb, minFiveUTR = 10, minCDS = 150, minThreeUTR = 0)

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  txdb <- makeTxdbFromGenome(gtf, genome, organism, optimize = TRUE, return = TRUE, 
                             pseudo_5UTRS_if_needed = 100)
  filterTranscripts(txdb, minFiveUTR = 10, minCDS = 150, minThreeUTR = 0)[1:3]

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  filterTranscripts(txdb, minFiveUTR = 0, minCDS = 1, minThreeUTR = 0, 
                    longestPerGene = TRUE)[1:3]

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
  findUORFs_exp(txdb, findFa(genome), startCodon = "ATG|CTG", save_optimized = TRUE)
  loadRegion(txdb, "uorfs") # For later use, output like this