## ----load libs, message=F-----------------------------------------------------
# Load the libraries
library(XAItest)
library(ggplot2)
library(ggforce)
library(SummarizedExperiment)

## ----load classif simu data---------------------------------------------------
se_path <- system.file("extdata", "seClassif.rds", package="XAItest")
dataset_classif <- readRDS(se_path)

data_matrix <- assay(dataset_classif, "counts")
data_matrix <- t(data_matrix)
metadata <- as.data.frame(colData(dataset_classif))
df_simu_classif <- as.data.frame(cbind(data_matrix, y = metadata[['y']]))
for (col in names(df_simu_classif)) {
    if (col != 'y') {
        df_simu_classif[[col]] <- as.numeric(df_simu_classif[[col]])
    }
}

## -----------------------------------------------------------------------------
featureImportanceXGBoost <- function(df, y="y", ...){
    # Prepare data
    matX <- as.matrix(df[, colnames(df) != y])
    vecY <- df[[y]]
    vecY <- as.character(vecY)
    vecY[vecY == unique(vecY)[1]] <- 0
    vecY[vecY == unique(vecY)[2]] <- 1
    vecY <- as.numeric(vecY)
    
    # Train the XGBoost model
    model <- xgboost::xgboost(data = matX, label = vecY,
                                nrounds = 10, verbose = FALSE)
    modelPredictions <- predict(model, matX)
    modelPredictionsCat <- modelPredictions
    modelPredictionsCat[modelPredictions < 0.5] <-
                                unique(as.character(df[[y]]))[1]
    modelPredictionsCat[modelPredictions >= 0.5] <-
                                unique(as.character(df[[y]]))[2]

    # Specifying the phi_0, i.e. the expected prediction without any features
    p <- mean(vecY)
    # Computing the actual Shapley values with kernelSHAP accounting
    # for feature dependence using the empirical (conditional)
    # distribution approach with bandwidth parameter sigma = 0.1 (default)
    explanation <- shapr::explain(
        model,
        approach = "empirical",
        x_explain = matX,
        x_train = matX,
        phi0 = p,
        iterative = TRUE,
        iterative_args = list(max_iter = 3)
    )
    results <- colMeans(abs(explanation$shapley_values_est), na.rm = TRUE)
    results <- results[3:length(results)]
    list(featImps = results, model = model, modelPredictions=modelPredictionsCat)
}

## ----xai4, warning=FALSE------------------------------------------------------
set.seed(123)
results <- XAI.test(dataset_classif,"y", simData = TRUE,
                   simPvalTarget = 0.001,
                   customFeatImps=
                   list("XGB_SHAP_feat_imp"=featureImportanceXGBoost),
                   defaultMethods = c("ttest", "lm")
                  )

## ----mapPvalImportance4-------------------------------------------------------
mpi <- mapPvalImportance(results, refPvalColumn = "ttest_adjPval", refPval = 0.001)
head(mpi$df)

## ----mapPvalImportance5, eval=FALSE-------------------------------------------
# mpi$dt

## -----------------------------------------------------------------------------
# Plot of the XGboost generated model
plotModel(results, "XGB_SHAP_feat_imp", "diff_distrib01", "biDistrib")

## -----------------------------------------------------------------------------
sessionInfo()