% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{pp_tcga}
\alias{pp_tcga}
\title{Process tumour genomic data}
\usage{
pp_tcga(
  p_mut,
  p_cna,
  p_exprs,
  p_score,
  freq_thresh = 0.02,
  expr_thresh = 10,
  hypermut_thresh = 300
)
}
\arguments{
\item{p_mut}{path of muation data, like "data_mutations_uniprot.txt" provided by cBioPortal.}

\item{p_cna}{path of copy number variation data, like "data_CNA.txt".}

\item{p_exprs}{path of normalized RNAseq expression data, like "data_RNA_Seq_v2_expression_median.txt".}

\item{p_score}{path of zscore data, like "data_RNA_Seq_v2_mRNA_median_Zscores.txt".}

\item{freq_thresh}{threshold to select recurrent mutations.}

\item{expr_thresh}{threshold to remove low expression genes.}

\item{hypermut_thresh}{threshold for hpyermutations.}
}
\value{
Return a list of mut_data, expr_data and zscore_data, while expr_data and zscore_data are matrix (entrez_id by patients),
  mut_data is a data.table with two columns of "patientid" and "mut_entrez".
}
\description{
Preprocess mutation, cna, expression and zscore datsets in TCGA PanCancer Atlas by cBioPortal.
}
\details{
It is designed to process the TCGA data provided by cBioPortal. In mutation data, "Missense_Mutation", "Nonsense_Mutation", "Frame_Shift_Del",
  "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Nonstop_Mutation" are selected for the downstream analysis,
  In CNA data, genes with GISTIC value equal to -2 are used. Patients with hypermutations are removed.
  Low expression genes, or genes that are not detected in any patient are filtered out.
}
\examples{
#- See vignette for more details.
if (FALSE) {
P_mut  <- "data_mutations_extended.txt"
P_cna  <- "data_CNA.txt"
P_expr <- "data_RNA_Seq_v2_expression_median.txt"
P_z    <- "data_RNA_Seq_v2_mRNA_median_Zscores.txt"
res    <- pp_tcga(P_mut, P_cna, P_expr, P_z)
saveRDS(res$mut_data, "mut_data.rds")
saveRDS(res$expr_data, "expr_data.rds")
saveRDS(res$zscore_data, "zscore_data.rds")
}
}
\references{
Cerami et al. The cBio Cancer Genomics Portal: An Open Platform for Exploring Multidimensional Cancer Genomics Data. Cancer Discovery. May 2012 2; 401.
  Gao et al. Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. Sci. Signal. 6, pl1 (2013).
}
