% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_vcf.R
\name{read_vcf}
\alias{read_vcf}
\title{Read in VCF file}
\source{
\code{
#### Benchmarking ####
library(VCFWrenchR)
library(VariantAnnotation)
path <- "https://gwas.mrcieu.ac.uk/files/ubm-a-2929/ubm-a-2929.vcf.gz"
vcf <- VariantAnnotation::readVcf(file = path)
N <- 1e5
vcf_sub <- vcf[1:N,]
res <- microbenchmark::microbenchmark(
    "vcf2df"={dat1 <- MungeSumstats:::vcf2df(vcf = vcf_sub)},
    "VCFWrenchR"= {dat2 <- as.data.frame(x = vcf_sub)},
    "VRanges"={dat3 <- data.table::as.data.table(
        methods::as(vcf_sub, "VRanges"))},
    times=1
)
}

\href{https://github.com/Bioconductor/VariantAnnotation/issues/57}{
Discussion on VariantAnnotation GitHub}

\href{https://github.com/Bioconductor/VariantAnnotation/issues/59}{
Discussion on VariantAnnotation GitHub}
}
\usage{
read_vcf(
  path,
  as_datatable = TRUE,
  save_path = NULL,
  tabix_index = FALSE,
  samples = 1,
  which = NULL,
  use_params = TRUE,
  sampled_rows = 10000L,
  download = TRUE,
  vcf_dir = tempdir(),
  download_method = "download.file",
  force_new = FALSE,
  mt_thresh = 100000L,
  nThread = 1,
  verbose = TRUE
)
}
\arguments{
\item{path}{Path to local or remote VCF file.}

\item{as_datatable}{Return the data as a
\link[data.table]{data.table} (default: \code{TRUE})
or a \link[VariantAnnotation]{VCF} (\code{FALSE}).}

\item{save_path}{File path to save formatted data. Defaults to
\code{tempfile(fileext=".tsv.gz")}.}

\item{tabix_index}{Index the formatted summary statistics with
\href{http://www.htslib.org/doc/tabix.html}{tabix} for fast querying.}

\item{samples}{Which samples to use:
\itemize{
\item{1 : }{Only the first sample will be used (\emph{DEFAULT}).}
\item{NULL : }{All samples will be used.}
\item{c("<sample_id1>","<sample_id2>",...) : }{
Only user-selected samples will be used (case-insensitive).}
}}

\item{which}{Genomic ranges to be added if supplied. Default is NULL.}

\item{use_params}{When \code{TRUE} (default), increases the speed of reading in the VCF by
omitting columns that are empty based on the head of the VCF (NAs only).
NOTE that that this requires the VCF to be sorted, bgzip-compressed,
tabix-indexed, which \link[MungeSumstats]{read_vcf} will attempt to do.}

\item{sampled_rows}{First N rows to sample.
Set \code{NULL} to use full \code{sumstats_file}.
when determining whether cols are empty.}

\item{download}{Download the VCF (and its index file)
to a temp folder before reading it into R.
This is important to keep \code{TRUE} when \code{nThread>1} to avoid
making too many queries to remote file.}

\item{vcf_dir}{Where to download the original VCF from Open GWAS.
\emph{WARNING:} This is set to \code{tempdir()} by default.
This means the raw (pre-formatted) VCFs be deleted upon ending the R session.
Change this to keep the raw VCF file on disk
(e.g. \code{vcf_dir="./raw_vcf"}).}

\item{download_method}{\code{"axel"} (multi-threaded) or
\code{"download.file"} (single-threaded) .}

\item{force_new}{If a formatted file of the same names as \code{save_path}
exists, formatting will be skipped and this file will be imported instead
(default). Set \code{force_new=TRUE} to override this.}

\item{mt_thresh}{When the number of rows (variants) in the VCF is
\code{< mt_thresh}, only use single-threading for reading in the VCF.
This is because the overhead of parallelisation outweighs the speed benefits
when VCFs are small.}

\item{nThread}{Number of threads to use for parallel processes.}

\item{verbose}{Print messages.}
}
\value{
The VCF file in data.table format.
}
\description{
Read in a VCF file as a \link[VariantAnnotation]{VCF} or a
\link[data.table]{data.table}.
Can optionally save the VCF/data.table as well.
}
\examples{
#### Local file ####
path <- system.file("extdata","ALSvcf.vcf", package="MungeSumstats")
sumstats_dt <- read_vcf(path = path)

#### Remote file ####
## Small GWAS (0.2Mb)
# path <- "https://gwas.mrcieu.ac.uk/files/ieu-a-298/ieu-a-298.vcf.gz"
# sumstats_dt2 <- read_vcf(path = path)

## Large GWAS (250Mb)
# path <- "https://gwas.mrcieu.ac.uk/files/ubm-a-2929/ubm-a-2929.vcf.gz"
# sumstats_dt3 <- read_vcf(path = path, nThread=11)

### Very large GWAS (500Mb)
# path <- "https://gwas.mrcieu.ac.uk/files/ieu-a-1124/ieu-a-1124.vcf.gz"
# sumstats_dt4 <- read_vcf(path = path, nThread=11)
}
