% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/validate_parameters.R
\name{validate_parameters}
\alias{validate_parameters}
\title{Ensure that the input parameters are logical}
\usage{
validate_parameters(
  path,
  ref_genome,
  convert_ref_genome,
  convert_small_p,
  es_is_beta,
  compute_z,
  compute_n,
  convert_n_int,
  analysis_trait,
  INFO_filter,
  FRQ_filter,
  pos_se,
  effect_columns_nonzero,
  N_std,
  N_dropNA,
  chr_style,
  rmv_chr,
  on_ref_genome,
  infer_eff_direction,
  eff_on_minor_alleles,
  strand_ambig_filter,
  allele_flip_check,
  allele_flip_drop,
  allele_flip_z,
  allele_flip_frq,
  bi_allelic_filter,
  flip_frq_as_biallelic,
  snp_ids_are_rs_ids,
  remove_multi_rs_snp,
  frq_is_maf,
  indels,
  drop_indels,
  check_dups,
  dbSNP,
  dbSNP_tarball,
  write_vcf,
  return_format,
  ldsc_format,
  save_format,
  imputation_ind,
  log_folder_ind,
  log_mungesumstats_msgs,
  mapping_file,
  tabix_index,
  chain_source,
  local_chain,
  drop_na_cols,
  rmv_chrPrefix
)
}
\arguments{
\item{path}{Filepath for the summary statistics file to be formatted. A
dataframe or datatable of the summary statistics file can also be passed
directly to MungeSumstats using the path parameter.}

\item{ref_genome}{name of the reference genome used for the GWAS ("GRCh37" or
"GRCh38"). Argument is case-insensitive. Default is NULL which infers the
reference genome from the data.}

\item{convert_ref_genome}{name of the reference genome to convert to
("GRCh37" or "GRCh38"). This will only occur if the current genome build does
not match. Default is not to convert the genome build (NULL).}

\item{convert_small_p}{Binary, should non-negative
p-values <= 5e-324 be converted to 0?
Small p-values pass the R limit and can cause errors with LDSC/MAGMA and
should be converted. Default is TRUE.}

\item{es_is_beta}{Binary, whether to map ES to BETA. We take BETA to be any
BETA-like value (including Effect Size). If this is not the case for your
sumstats, change this to FALSE. Default is TRUE.}

\item{compute_z}{Whether to compute Z-score column. Default is FALSE. This
can be computed from Beta and SE with (Beta/SE) or P
(Z:=sign(BETA)*sqrt(stats::qchisq(P,1,lower=FALSE))).
\strong{Note} that imputing the Z-score from P for every SNP will not be
perfectly correct and may result in a loss of power. This should only be done
as a last resort. Use 'BETA' to impute by BETA/SE and 'P' to impute by SNP
p-value.}

\item{compute_n}{Whether to impute N. Default of 0 won't impute, any other
integer will be imputed as the N (sample size) for every SNP in the dataset.
\strong{Note} that imputing the sample size for every SNP is not correct and
should only be done as a last resort. N can also be inputted with "ldsc",
"sum", "giant" or "metal" by passing one of these for this field or a vector
of multiple. Sum and an integer value creates an N column in the output
whereas giant, metal or ldsc create an Neff or effective sample size. If
multiples are passed, the formula used to derive it will be indicated.}

\item{convert_n_int}{Binary, if N (the number of samples) is not an integer,
should this be rounded? Default is TRUE.}

\item{analysis_trait}{If multiple traits were studied, name of the trait for
analysis from the GWAS. Default is NULL.}

\item{INFO_filter}{numeric The minimum value permissible of the imputation
information score (if present in sumstats file). Default 0.9.}

\item{FRQ_filter}{numeric The minimum value permissible of the frequency(FRQ)
of the SNP (i.e. Allele Frequency (AF)) (if present in sumstats file). By
default no filtering is done, i.e. value of 0.}

\item{pos_se}{Binary Should the standard Error (SE) column be checked to
ensure it is greater than 0? Those that are, are removed (if present in
sumstats file). Default TRUE.}

\item{effect_columns_nonzero}{Binary should the effect columns in the data
BETA,OR (odds ratio),LOG_ODDS,SIGNED_SUMSTAT be checked to ensure no SNP=0.
Those that do are removed(if present in sumstats file). Default FALSE.}

\item{N_std}{numeric The number of standard deviations above the mean a SNP's
N is needed to be removed. Default is 5.}

\item{N_dropNA}{Drop rows where N is missing.Default is TRUE.}

\item{chr_style}{Chromosome naming style to use in the formatted summary
statistics file ("NCBI", "UCSC", "dbSNP", or "Ensembl"). The NCBI and
Ensembl styles both code chromosomes as \verb{1-22, X, Y, MT}; the UCSC style is
\verb{chr1-chr22, chrX, chrY, chrM}; and the dbSNP style is
\verb{ch1-ch22, chX, chY, chMT}. Default is Ensembl.}

\item{rmv_chr}{Chromosomes to exclude from the formatted summary statistics
file. Use NULL if no filtering is necessary. Default is \code{c("X", "Y", "MT")}
which removes all non-autosomal SNPs.}

\item{on_ref_genome}{Binary Should a check take place that all SNPs are on
the reference genome by SNP ID. Default is TRUE.}

\item{infer_eff_direction}{Binary Should a check take place to ensure the
alleles match the effect direction? Default is TRUE.}

\item{eff_on_minor_alleles}{Binary Should MungeSumstats assume that the
effects are majoritively measured on the minor alleles? Default is FALSE as
this is an assumption that won't be appropriate in all cases. However, the
benefit is that if we know the majority of SNPs have their effects based on
the minor alleles, we can catch cases where the allele columns have been
mislabelled.}

\item{strand_ambig_filter}{Binary Should SNPs with strand-ambiguous alleles
be removed. Default is FALSE.}

\item{allele_flip_check}{Binary Should the allele columns be checked against
reference genome to infer if flipping is necessary. Default is TRUE.}

\item{allele_flip_drop}{Binary Should the SNPs for which neither their A1 or
A2 base pair values match a reference genome be dropped. Default is TRUE.}

\item{allele_flip_z}{Binary should the Z-score be flipped along with effect
and FRQ columns like Beta? It is assumed to be calculated off the effect size
not the P-value and so will be flipped i.e. default TRUE.}

\item{allele_flip_frq}{Binary should the frequency (FRQ) column be flipped
along with effect and z-score columns like Beta? Default TRUE.}

\item{bi_allelic_filter}{Binary Should non-bi-allelic SNPs be removed.
Default is TRUE.}

\item{flip_frq_as_biallelic}{Binary Should non-bi-allelic SNPs frequency
values be flipped as 1-p despite there being other alternative alleles?
Default is FALSE but if set to TRUE, this allows non-bi-allelic SNPs to be
kept despite needing flipping.}

\item{snp_ids_are_rs_ids}{Binary Should the supplied SNP ID's be assumed to
be RSIDs. If not, imputation using the SNP ID for other columns like
base-pair position or chromosome will not be possible. If set to FALSE, the
SNP RS ID will be imputed from the reference genome if possible. Default is
TRUE.}

\item{remove_multi_rs_snp}{Binary Sometimes summary statistics can have
multiple RSIDs on one row (i.e. related to one SNP), for example
"rs5772025_rs397784053". This can cause an error so by default, the first
RS ID will be kept and the rest removed e.g."rs5772025". If you want to just
remove these SNPs entirely, set it to TRUE. Default is FALSE.}

\item{frq_is_maf}{Conventionally the FRQ column is intended to show the
minor/effect allele frequency (MAF) but sometimes the major allele frequency
can be inferred as the FRQ column. This logical variable indicates that the
FRQ column should be renamed to MAJOR_ALLELE_FRQ if the frequency values
appear to relate to the major allele i.e. >0.5. By default this mapping won't
occur i.e. is TRUE.}

\item{indels}{Binary does your Sumstats file contain Indels? These don't
exist in our reference file so they will be excluded from checks if this
value is TRUE. Default is TRUE.}

\item{drop_indels}{Binary, should any indels found in the sumstats be
dropped? These can not be checked against a reference dataset and will have
the same RS ID and position as SNPs which can affect downstream analysis.
Default is False.}

\item{check_dups}{whether to check for duplicates - if formatting QTL
datasets this should be set to FALSE otherwise keep as TRUE. Default is TRUE.}

\item{dbSNP}{version of dbSNP to be used for imputation (144 or 155). See
\code{dbSNP_tarball} for different versions of dbSNP (including newer releases).}

\item{dbSNP_tarball}{Pass local versions of dbSNP in tarball format. Default
of NULL uses the dbSNP version passed in \code{dbSNP} parmeter. \code{dbSNP_tarball}
was enabled to help with dbSNP versions >=156, after the decision to no
longer provide dbSNP releases as bioconductor packages. dbSNP 156 tarball is
available here: http://149.165.171.124/SNPlocs/.}

\item{write_vcf}{Whether to write as VCF (TRUE) or tabular file (FALSE).}

\item{return_format}{If return_data is TRUE. Object type to be returned
("data.table","vranges","granges").}

\item{ldsc_format}{DEPRECATED, do not use. Use save_format="LDSC" instead.}

\item{save_format}{Output format of sumstats. Options are NULL - standardised
output format from MungeSumstats, LDSC - output format compatible with LDSC
and openGWAS - output compatible with openGWAS VCFs. Default is NULL.
\strong{NOTE} - If LDSC format is used, the naming convention of A1 as the
reference (genome build) allele and A2 as the effect allele will be reversed
to match LDSC (A1 will now be the effect allele). See more info on this
\href{https://groups.google.com/g/ldsc_users/c/S7FZK743w68}{here}. Note that any
effect columns (e.g. Z) will be inrelation to A1 now instead of A2.}

\item{imputation_ind}{Binary Should a column be added for each imputation
step to show what SNPs have imputed values for differing fields. This
includes a field denoting SNP allele flipping (flipped). On the flipped
value, this denoted whether the alelles where switched based on
MungeSumstats initial choice of A1, A2 from the input column headers and thus
may not align with what the creator intended.\strong{Note} these columns will be
in the formatted summary statistics returned. Default is FALSE.}

\item{log_folder_ind}{Binary Should log files be stored containing all
filtered out SNPs (separate file per filter). The data is outputted in the
same format specified for the resulting sumstats file. The only exception to
this rule is if output is vcf, then log file saved as .tsv.gz. Default is
FALSE.}

\item{log_mungesumstats_msgs}{Binary Should a log be stored containing all
messages and errors printed by MungeSumstats in a run. Default is FALSE}

\item{mapping_file}{MungeSumstats has a pre-defined column-name mapping file
which should cover the most common column headers and their interpretations.
However, if a column header that is in youf file is missing of the mapping we
give is incorrect you can supply your own mapping file. Must be a 2 column
dataframe with column names "Uncorrected" and "Corrected". See
data(sumstatsColHeaders) for default mapping and necessary format.}

\item{tabix_index}{Index the formatted summary statistics with
\href{http://www.htslib.org/doc/tabix.html}{tabix} for fast querying.}

\item{chain_source}{source of the chain file to use in liftover, if converting
genome build ("ucsc" or "ensembl"). Note that the UCSC chain files require a
license for commercial use. The Ensembl chain is used by default ("ensembl").}

\item{local_chain}{Path to local chain file to use instead of downlaoding.
Default of NULL i.e. no local file to use. NOTE if passing a local chain file
make sure to specify the path to convert from and to the correct build like
GRCh37 to GRCh38. We can not sense check this for local files. The chain file
can be submitted as a gz file (as downloaed from source) or unzipped.}

\item{drop_na_cols}{A character vector of column names to be checked for
missing values. Rows with missing values in any of these columns (if present
in the dataset) will be dropped. If \code{NULL}, all columns will be checked for
missing values. Default columns are SNP, chromosome, position, allele 1,
allele2, effect columns (frequency, beta, Z-score, standard error, log odds,
signed sumstats, odds ratio), p value and N columns.}

\item{rmv_chrPrefix}{Is now deprecated, do. not use. Use chr_style instead -
chr_style = 'Ensembl' will give the same result as rmv_chrPrefix=TRUE used to
give.}
}
\value{
No return
}
\description{
Ensure that the input parameters are logical
}
\keyword{internal}
