% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/loading_helper_functions.r
\name{load_dataset_10Xdata}
\alias{load_dataset_10Xdata}
\title{load_dataset_10Xdata}
\usage{
load_dataset_10Xdata(dataset_path, dataset_genome, clustering_set,
  gene_id_cols_10X = c("ensembl_ID", "GeneSymbol"),
  id_to_use = gene_id_cols_10X[1])
}
\arguments{
\item{dataset_path}{Path to the directory of 10X data, as generated by the 
cellRanger pipeline (versions 2.1.0 and 2.0.1). The directory should have 
subdirecotires \emph{analysis}, \emph{filtered_gene_bc_matrices} and
\emph{raw_gene_bc_matrices} (only the first 2 are read).}

\item{dataset_genome}{The genome that the reads were aligned against, 
e.g. GRCh38.  Check for this as a directory name under the 
\emph{filtered_gene_bc_matrices} subdirectory if unsure.}

\item{clustering_set}{The 10X cellRanger pipeline produces several 
different  cluster definitions per dataset. Specify which one to use e.g. 
kmeans_10_clusters Find them as directory names under 
\emph{analysis/clustering/}}

\item{gene_id_cols_10X}{Vector of the names of the columns in the gene 
description file (\emph{filtered_gene_bc_matrices/GRCh38/genes.csv}). The 
first element of this will become the ID. 
Default = c("ensembl_ID","GeneSymbol")}

\item{id_to_use}{Column from \bold{gene_id_cols_10X} that defines the gene 
identifier to use as 'ID' in the returned SummarisedExperiment object.
Many-to-one relationships betwen the assumed unique first element of 
\bold{gene_id_cols_10X} and \bold{id_to_use} will be handled gracefully by 
\code{\link{convert_se_gene_ids}}. 
Defaults to first element of \bold{gene_id_cols_10X}}
}
\value{
A SummarisedExperiment object containing the count data, cell info
and gene info.
}
\description{
Convenience function to create a SummarizedExperiment object (dataset_se) 
from a the output of 10X cell ranger pipeline run.
}
\details{
This function makes a SummarizedExperiment object in a form that
should work for celaref functions. Specifically, that means it will have an
'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
'cell_sample' and 'group' feild for cells (view with
\code{colData(dataset_se)}). See parameters for detail.
Additionally, the counts will be an integer matrix (not a
sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
or \emph{ID}) will be a factor.

The clustering information can be read from whichever cluster is specified,
usually there will be several choices.

This funciton is designed to work with output of version 2.0.1 of the 
cellRanger pipeline, may not work with others (will not work for 1.x).
}
\examples{
example_10X_dir <- system.file("extdata", "sim_cr_dataset", package = "celaref")
dataset_se <- load_dataset_10Xdata(example_10X_dir, dataset_genome="GRCh38", 
    clustering_set="kmeans_4_clusters", gene_id_cols_10X=c("gene")) 

\dontrun{
dataset_se <- load_dataset_10Xdata('~/path/to/data/10X_pbmc4k', 
    dataset_genome="GRCh38", 
    clustering_set="kmeans_7_clusters") 
} 

}
\seealso{
\href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} 
For general doco on the SummarizedExperiment objects.

\code{\link{convert_se_gene_ids}} describes method for 
converting IDs.

Other Data loading functions: \code{\link{contrast_each_group_to_the_rest_for_norm_ma_with_limma}},
  \code{\link{load_se_from_tables}}
}
\concept{Data loading functions}
