#' Class to model files that describe a single MS dataset.
#' 
#' @slot files named list of files generated by a signal processing tools. 
#' In most cases, this will be a single file named `input`. 
#' In some cases, multiple files are used, for example `MaxQuant` outputs 
#' `evidence` and `proteinGroups` files.
#' @slot type character: "MSstats" or "MSstatsTMT".
#' @slot tool character: name of a signal processing tools that generated the
#' output. Possible values are: DIAUmpire, MaxQuant, OpenMS, OpenSWATH, 
#' Progenesis, ProteomeDiscoverer, Skyline, SpectroMine, Spectronaut.
#' @slot version description of a software version of the signal processing tool.
#' Not implemented yet.
#' @rdname MSstatsInputFiles
setClass("MSstatsInputFiles", 
         slots = c(files = "list", type = "character", 
                   tool = "character", version = "ANY"))

#' MSstatsDIAUmpireFiles: class for DIAUmpire files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsDIAUmpireFiles", contains = "MSstatsInputFiles")
#' MSstatsMaxQuantFiles: class for MaxQuant files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsMaxQuantFiles", contains = "MSstatsInputFiles")
#' MSstatsOpenMSFiles: class for OpenMS files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsOpenMSFiles", contains = "MSstatsInputFiles")
#' MSstatsOpenSWATHFiles: class for OpenSWATH files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsOpenSWATHFiles", contains = "MSstatsInputFiles")
#' MSstatsProgenesisFiles: class for Progenesis files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProgenesisFiles", contains = "MSstatsInputFiles")
#' MSstatsProteomeDiscovererFiles: class for ProteomeDiscoverer files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProteomeDiscovererFiles", contains = "MSstatsInputFiles")
#' MSstatsSkylineFiles: class for Skyline files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSkylineFiles", contains = "MSstatsInputFiles")
#' MSstatsSkylineFiles: class for SpectroMine files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSpectroMineFiles", contains = "MSstatsInputFiles")
#' MSstatsSpectronautFiles: class for Spectronaut files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsSpectronautFiles", contains = "MSstatsInputFiles")
#' MSstatsPhilosopherFiles: class for Philosopher files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsPhilosopherFiles", contains = "MSstatsInputFiles")
#' MSstatsDIANNFiles: class for DIA-NN files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsDIANNFiles", contains = "MSstatsInputFiles")
#' MSstatsFragPipeFiles: class for FragPipe files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsFragPipeFiles", contains = "MSstatsInputFiles")
#' MSstatsMetamorpheusFiles: class for Metamorpheus files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsMetamorpheusFiles", contains = "MSstatsInputFiles")
#' MSstatsProteinProspectorFiles: class for ProteinProspector files.
#' @rdname MSstatsInputFiles
#' @keywords internal
setClass("MSstatsProteinProspectorFiles", contains = "MSstatsInputFiles")


#' Get one of files contained in an instance of `MSstatsInputFiles` class.
#' @rdname getInputFile
#' @return data.table
#' @export
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' class(imported)
#' head(getInputFile(imported, "evidence"))
setGeneric("getInputFile", 
           function(msstats_object, file_type) standardGeneric("getInputFile"), 
           signature = "msstats_object")
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @param file_type character name of a type file. Usually equal to "input".
#' @return data.table
#' @export 
#' @rdname getInputFile
setMethod("getInputFile", "MSstatsInputFiles", 
          function(msstats_object, file_type = "input") 
              msstats_object@files[[file_type]])
#' @param msstats_object object that inherits from `MSstatsPhilosopherFiles` class.
#' @param file_type character name of a type file. Usually equal to "input".
#' @return data.table
#' @rdname getInputFile
setMethod("getInputFile", "MSstatsPhilosopherFiles",
          function(msstats_object, file_type = "input") {
              if (file_type == "annotation") {
                  msstats_object@files[["annotation"]]
              } else {
                  list_names = names(msstats_object@files)
                  data.table::rbindlist(msstats_object@files[list_names != "annotation"])
              }
          })


#' Get type of dataset from an MSstatsInputFiles object.
#' @rdname getDataType
#' @keywords internal
#' @export
#' @return character - label of a data type. Currently, "MSstats" or "MSstatsTMT"
#' @examples
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' class(imported)
#' getDataType(imported) # "MSstats"
#' 
setGeneric("getDataType", 
           function(msstats_object) standardGeneric("getDataType"))
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @return character "MSstats" or "MSstatsTMT".
#' @export
#' @rdname getDataType
setMethod("getDataType", "MSstatsInputFiles",
          function(msstats_object) msstats_object@type)


#' Import files from signal processing tools.
#' 
#' @param input_files list of paths to input files or `data.frame` objects.
#' Interpretation of this parameter depends on values of parameters `type` and `tool`.
#' @param type chr, "MSstats" or "MSstatsTMT".
#' @param tool chr, name of a signal processing tool that generated input files.
#' @param tool_version not implemented yet. In the future, this parameter will allow
#' handling different versions of each signal processing tools.
#' @param ... optional additional parameters to `data.table::fread`.
#' 
#' @return an object of class `MSstatsInputFiles`.
#' @export
#' 
#' @examples 
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' class(imported)
#' head(getInputFile(imported, "evidence"))
#' 
MSstatsImport = function(input_files, type, tool, tool_version = NULL, ...) {
    checkmate::assertChoice(type, c("MSstats", "MSstatsTMT"))
    checkmate::assertTRUE(!is.null(names(input_files)))
    
    input_files = as.list(input_files)
    input_files = lapply(input_files, .getDataTable, ...)
    
    msstats_object = methods::new("MSstatsInputFiles", files = input_files,
                                  type = type, tool = tool, 
                                  version = tool_version)
    class = paste0("MSstats", tool, "Files")
    .logSuccess(tool, "import")
    methods::new(class, msstats_object)
}

#' Clean files generated by a signal processing tools.
#' @param msstats_object object that inherits from `MSstatsInputFiles` class.
#' @param ... additional parameter to specific cleaning functions.
#' @rdname MSstatsClean
#' @export
#' @return data.table
#' 
#' @examples 
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' head(cleaned_data)
#' 
setGeneric("MSstatsClean", function(msstats_object, ...) {
    standardGeneric("MSstatsClean")
})
#' Clean DIAUmpire files
#' @include clean_DIAUmpire.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawDIAUmpire
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsDIAUmpireFiles", 
          .cleanRawDIAUmpire)
#' Clean MaxQuant files
#' @include clean_MaxQuant.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawMaxQuant
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsMaxQuantFiles", 
          .cleanRawMaxQuant)
#' Clean OpenMS files
#' @include clean_OpenMS.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawOpenMS
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsOpenMSFiles", 
          .cleanRawOpenMS)
#' Clean OpenSWATH files
#' @include clean_OpenSWATH.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawOpenSWATH
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsOpenSWATHFiles", 
          .cleanRawOpenSWATH)
#' Clean Progenesis files
#' @include clean_Progenesis.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawProgenesis
setMethod("MSstatsClean", signature = "MSstatsProgenesisFiles", 
          .cleanRawProgenesis)
#' Clean ProteomeDiscoverer files
#' @include clean_ProteomeDiscoverer.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawPD
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsProteomeDiscovererFiles", 
          .cleanRawPD)
#' Clean Skyline files
#' @include clean_Skyline.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSkyline
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSkylineFiles", 
          .cleanRawSkyline)
#' Clean SpectroMine files
#' @include clean_SpectroMine.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSpectroMineTMT
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSpectroMineFiles", 
          .cleanRawSpectroMineTMT)
#' Clean Spectronaut files
#' @include clean_Spectronaut.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawSpectronaut
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsSpectronautFiles", 
          .cleanRawSpectronaut)
#' Clean Philosopher files
#' @include clean_Philosopher.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawPhilosopher
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsPhilosopherFiles", 
          .cleanRawPhilosopher)
#' Clean DIA-NN files
#' @include clean_DIANN.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawDIANN
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsDIANNFiles", 
          .cleanRawDIANN)
#' Clean Metamorpheus files
#' @include clean_Metamorpheus.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawMetamorpheus
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsMetamorpheusFiles", 
          .cleanRawMetamorpheus)
#' Clean Protein Prospector files
#' @include clean_ProteinProspector.R
#' @rdname MSstatsClean
#' @inheritParams .cleanRawProteinProspector
#' @return data.table
setMethod("MSstatsClean", signature = "MSstatsProteinProspectorFiles", 
          .cleanRawProteinProspector)


#' Preprocess outputs from MS signal processing tools for analysis with MSstats
#' 
#' @param input data.table processed by the MSstatsClean function.
#' @param annotation annotation file generated by a signal processing tool.
#' @param feature_columns character vector of names of columns that 
#' define spectral features.
#' @param remove_shared_peptides logical, if TRUE shared peptides will be removed.
#' @param remove_single_feature_proteins logical, if TRUE, proteins that only have
#' one feature will be removed.
#' @param feature_cleaning named list with maximum two (for `MSstats` converters)
#' or three (for `MSstatsTMT` converter) elements. If `handle_few_measurements` is
#' set to "remove", feature with less than three measurements will be removed 
#' (otherwise it should be equal to "keep"). `summarize_multiple_psms` is a function
#' that will be used to aggregate multiple feature measurements in a run. It should
#' return a scalar and accept an `na.rm` parameter. For `MSstatsTMT` converters,
#' setting `remove_psms_with_any_missing` will remove features which have missing
#' values in a run from that run. 
#' @param score_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param exact_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param pattern_filtering a list of named lists that specify filtering options.
#' Details are provided in the vignette.
#' @param columns_to_fill a named list of scalars. If provided, columns with
#' names defined by the names of this list and values corresponding to its elements
#' will be added to the output `data.frame`.
#' @param aggregate_isotopic logical. If `TRUE`, isotopic peaks will by summed.
#' @param anomaly_metrics character vector of names of columns with quality metrics. Default is missing and is not required if anomaly model not run.
#' @param ... additional parameters to `data.table::fread`.
#' 
#' @return data.table
#' @export
#' 
#' @examples 
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' annot_path = system.file("tinytest/raw_data/MaxQuant/annotation.csv", 
#'                          package = "MSstatsConvert")
#' mq_annot = MSstatsMakeAnnotation(cleaned_data, read.csv(annot_path),
#'                                  Run = "Rawfile")
#'                                
#' # To filter M-peptides and oxidatin peptides 
#' m_filter = list(col_name = "PeptideSequence", pattern = "M", 
#'                 filter = TRUE, drop_column = FALSE)
#' oxidation_filter = list(col_name = "Modifications", pattern = "Oxidation", 
#'                         filter = TRUE, drop_column = TRUE)
#' msstats_format = MSstatsPreprocess(
#' cleaned_data, mq_annot, 
#' feature_columns = c("PeptideSequence", "PrecursorCharge"),
#' columns_to_fill = list(FragmentIon = NA, ProductCharge = NA),
#' pattern_filtering = list(oxidation = oxidation_filter, m = m_filter)
#' )
#' # Output in the standard MSstats format
#' head(msstats_format)
#' 
MSstatsPreprocess = function(
    input, annotation, feature_columns, remove_shared_peptides = TRUE,
    remove_single_feature_proteins = TRUE,
    feature_cleaning = list(remove_features_with_few_measurements = TRUE,
                            summarize_multiple_psms = max),
    score_filtering = list(), exact_filtering = list(), 
    pattern_filtering = list(), columns_to_fill = list(), 
    aggregate_isotopic = FALSE, anomaly_metrics = c(), ...
) {
    .checkMSstatsParams(input, annotation, feature_columns,
                        remove_shared_peptides,
                        remove_single_feature_proteins,
                        feature_cleaning)
    .logConverterOptions(
        feature_columns, remove_shared_peptides, remove_single_feature_proteins,
        feature_cleaning, is.element("Channel", colnames(input))
    )
    
    input = .fixBasicColumns(input)
    input = .handleFiltering(input, score_filtering, 
                             exact_filtering, pattern_filtering)
    input = .handleIsotopicPeaks(input, aggregate_isotopic)
    input = .filterFewMeasurements(input, 1, FALSE)
    input = .handleSharedPeptides(input, remove_shared_peptides)
    input = .cleanByFeature(input, feature_columns, 
                            feature_cleaning, anomaly_metrics)
    input = .handleSingleFeaturePerProtein(input, 
                                           remove_single_feature_proteins)
    input = .mergeAnnotation(input, annotation)
    .fillValues(input, columns_to_fill)
    .adjustIntensities(input)
    input
}


#' Creates balanced design by removing overlapping fractions and filling incomplete rows
#' 
#' @param input `data.table` processed by the `MSstatsPreprocess` function
#' @param feature_columns str, names of columns that define spectral features
#' @param fill_incomplete if TRUE (default), ensures that rows with missing data 
#' for specific features are added as NA. For example, if the y10 ion of 
#' peptideA is measured in the "disease" samples but entirely missing for the 
#' "healthy" samples, rows with NA values will be created for the y10 ion of 
#' peptideA in the "healthy" group. This process increases the number of 
#' rows to account for all possible feature-sample combinations.
#' @param handle_fractions if TRUE (default), overlapping fractions will be resolved
#' @param fix_missing str, optional. Defaults to NULL, which means no action.
#' If not NULL, must be one of the options: "zero_to_na" or "na_to_zero".
#' If "zero_to_na", Intensity values equal exactly to 0 will be converted to NA.
#' If "na_to_zero", missing values will be replaced by zeros.
#' @param remove_few lgl, if TRUE, features with one or two measurements 
#' across runs will be removed.
#' @param anomaly_metrics character vector of names of columns with quality metrics
#' 
#' @export
#' @return data.frame of class `MSstatsValidated`
#' 
#' @examples
#' unbalanced_data = system.file("tinytest/raw_data/unbalanced_data.csv", 
#'                               package = "MSstatsConvert")
#' unbalanced_data = data.table::as.data.table(read.csv(unbalanced_data))
#' balanced = MSstatsBalancedDesign(unbalanced_data, 
#'                                  c("PeptideSequence", "PrecursorCharge",
#'                                    "FragmentIon", "ProductCharge"))
#' dim(balanced) # Now balanced has additional rows (with Intensity = NA)
#' # for runs that were not included in the unbalanced_data table
#' 
MSstatsBalancedDesign = function(input, feature_columns, fill_incomplete = TRUE,
                                 handle_fractions = TRUE, fix_missing = NULL,
                                 remove_few = TRUE, anomaly_metrics = c()) {
    feature = NULL
    
    input[, feature := do.call(".combine", .SD), .SDcols = feature_columns]
    if (handle_fractions) {
        input = .handleFractions(input)
        input = .filterFewMeasurements(input, 1, 
                                       remove_few, 
                                       feature_columns)
        msg_fractions = "** Fractionation handled."
        getOption("MSstatsLog")("INFO", msg_fractions)
        getOption("MSstatsMsg")("INFO", msg_fractions)
    } 
    input = .makeBalancedDesign(input, fill_incomplete, anomaly_metrics)
    msg_balanced = paste("** Updated quantification data to make balanced design.",
                         "Missing values are marked by NA")
    getOption("MSstatsLog")("INFO", msg_balanced)
    getOption("MSstatsMsg")("INFO", msg_balanced)
    input = .fixMissingValues(input, fix_missing)
    input = input[, !(colnames(input) %in% c("feature", "isZero")), 
                  with = FALSE]
    
    getOption("MSstatsLog")("INFO", "\n")
    .MSstatsFormat(input, anomaly_metrics)
}


#' Create annotation
#' 
#' @param input data.table preprocessed by the MSstatsClean function
#' @param annotation data.table 
#' @param ... key-value pairs, where keys are names of columns of `annotation` 
#' 
#' @return data.table
#' @export
#' 
#' @examples 
#' evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
#'                             package = "MSstatsConvert")
#' pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
#'                       package = "MSstatsConvert")
#' evidence = read.csv(evidence_path)
#' pg = read.csv(pg_path)
#' imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
#'                          "MSstats", "MaxQuant")
#' cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
#' annot_path = system.file("tinytest/raw_data/MaxQuant/annotation.csv", 
#'                          package = "MSstatsConvert")
#' mq_annot = MSstatsMakeAnnotation(cleaned_data, read.csv(annot_path),
#'                                  Run = "Rawfile")
#' head(mq_annot)
#' 
MSstatsMakeAnnotation = function(input, annotation, ...) {
    all_columns = unlist(list(...))
    if (!is.null(annotation)) {
        annotation = .getDataTable(annotation)
        msg = "** Using provided annotation."
        getOption("MSstatsLog")("INFO", msg)
        getOption("MSstatsMsg")("INFO", msg)
    } else {
        cols = c("Run", "Channel", "Condition", "BioReplicate", "TechReplicate",
                 "Mixture", "TechRepMixture", "Fraction", unname(all_columns))
        cols = intersect(cols, colnames(input))
        annotation = unique(input[, cols, with = FALSE])
        msg = "** Using annotation extracted from quantification data."
        getOption("MSstatsLog")("INFO", msg)
        getOption("MSstatsMsg")("INFO", msg)
    }
    if (length(all_columns) > 0) {
        data.table::setnames(annotation, 
                             unname(all_columns),
                             names(all_columns),
                             skip_absent = TRUE)
    }
    annotation = annotation[, !duplicated(colnames(annotation)), 
                            with = FALSE]
    .checkAnnotation(input, annotation)
    if (is.element("Channel", colnames(annotation))) {
        annotation$Channel = .standardizeColnames(annotation$Channel)
        labels_msg = "Run and Channel"
    } else {
        labels_msg = "Run"
    }
    annotation$Run = .standardizeColnames(annotation$Run)
    msg = paste("**", labels_msg, "labels were standardized to remove",
                "symbols such as '.' or '%'.")
    getOption("MSstatsLog")("INFO", msg)
    getOption("MSstatsMsg")("INFO", msg)
    annotation
}

#' Run Anomaly Model
#' 
#' @param input data.table preprocessed by the MSstatsBalancedDesign function
#' @param quality_metrics character vector of quality metrics to use in the model
#' @param temporal_direction character vector of same length as quality_metrics indicating temporal feature to create.
#' @param missing_run_count numeric, maximum allowed fraction of missing runs per feature.
#' @param n_feat numeric, maximum number of features per protein to use in the model.
#' @param run_order data.frame with two columns: Run and Order. Order should be numeric and indicate the order of runs.
#' @param n_trees numeric, number of trees to use in the isolation forest model. Default is 100.
#' @param max_depth numeric or "auto", maximum depth of each tree. Default is "auto" which sets depth to log2(N) where N is the number of runs.
#' @param cores numeric, number of cores to use for parallel processing. Default is 1.
#' @useDynLib MSstatsConvert, .registration = TRUE
#' 
#' @return data.table
#' @export
MSstatsAnomalyScores = function(input, quality_metrics, temporal_direction,
                                missing_run_count, n_feat, run_order, n_trees, 
                                max_depth, cores){
    
    input = .prepareSpectronautAnomalyInput(input, quality_metrics, 
                                            run_order, n_feat, 
                                            missing_run_count)
    input$PSM = paste0(input$PeptideSequence, input$PrecursorCharge)
    
    for (i in seq_along(quality_metrics)){
        if (temporal_direction[i] != FALSE){
            quality_metrics = c(quality_metrics,
                                paste0(quality_metrics[i], ".",
                                       temporal_direction[i]))
        }
    }
    
    input = .runAnomalyModel(input, 
                             n_trees=n_trees, 
                             max_depth=max_depth, 
                             cores=cores,
                             split_column="PSM",
                             quality_metrics=quality_metrics)
    
    subset_cols = c("Run", "ProteinName", "PeptideSequence", 
                    "PrecursorCharge", "FragmentIon", 
                    "ProductCharge", "IsotopeLabelType", 
                    "Condition", "BioReplicate", 
                    "Fraction", "Intensity", "AnomalyScores",
                    quality_metrics)
    
    subset_cols = subset_cols[subset_cols %in% names(input)]
    input = input[, ..subset_cols]
    
    return(input)

}

#' Takes as input the output of the SpectronauttoMSstatsFormat function and calculates various quality metrics to assess the health of the data. Requires Anomaly Detection model to be fit.
#' 
#' @param input MSstats input which is the output of Spectronaut converter
#' @return list of two data.tables
#' 
#' @export
CheckDataHealth = function(input){
    
    input = as.data.table(input)
    
    # All intensity characteristics
    missing_percent = .checkMissing(input)
    zero_truncated = .checkIntensityDistribution(input)
    
    # Feature specific characteristics
    input$Feature = paste(input$PeptideSequence,
                          input$PrecursorCharge,
                          input$FragmentIon,
                          input$ProductCharge, sep="_")
    feature_data = .checkFeatureSD(input)
    outlier_info = .checkFeatureOutliers(input, feature_data)
    feature_data = outlier_info[[1]]
    outlier_summary = outlier_info[[2]]
    feature_data = .checkFeatureCoverage(input, feature_data)
    
    skew_results = .checkAnomalySkew(input)
    
    return(list(feature_data, skew_results))
}