## ----load libs, message=F----------------------------------------------------- # Load the libraries library(XAItest) library(ggplot2) library(ggforce) library(SummarizedExperiment) ## ----load classif simu data--------------------------------------------------- se_path <- system.file("extdata", "seClassif.rds", package="XAItest") dataset_classif <- readRDS(se_path) data_matrix <- assay(dataset_classif, "counts") data_matrix <- t(data_matrix) metadata <- as.data.frame(colData(dataset_classif)) df_simu_classif <- as.data.frame(cbind(data_matrix, y = metadata[['y']])) for (col in names(df_simu_classif)) { if (col != 'y') { df_simu_classif[[col]] <- as.numeric(df_simu_classif[[col]]) } } ## ----------------------------------------------------------------------------- featureImportanceXGBoost <- function(df, y="y", ...){ # Prepare data matX <- as.matrix(df[, colnames(df) != y]) vecY <- df[[y]] vecY <- as.character(vecY) vecY[vecY == unique(vecY)[1]] <- 0 vecY[vecY == unique(vecY)[2]] <- 1 vecY <- as.numeric(vecY) # Train the XGBoost model model <- xgboost::xgboost(data = matX, label = vecY, nrounds = 10, verbose = FALSE) modelPredictions <- predict(model, matX) modelPredictionsCat <- modelPredictions modelPredictionsCat[modelPredictions < 0.5] <- unique(as.character(df[[y]]))[1] modelPredictionsCat[modelPredictions >= 0.5] <- unique(as.character(df[[y]]))[2] # Specifying the phi_0, i.e. the expected prediction without any features p <- mean(vecY) # Computing the actual Shapley values with kernelSHAP accounting # for feature dependence using the empirical (conditional) # distribution approach with bandwidth parameter sigma = 0.1 (default) explanation <- shapr::explain( model, approach = "empirical", x_explain = matX, x_train = matX, phi0 = p, iterative = TRUE, iterative_args = list(max_iter = 3) ) results <- colMeans(abs(explanation$shapley_values_est), na.rm = TRUE) results <- results[3:length(results)] list(featImps = results, model = model, modelPredictions=modelPredictionsCat) } ## ----xai4, warning=FALSE------------------------------------------------------ set.seed(123) results <- XAI.test(dataset_classif,"y", simData = TRUE, simPvalTarget = 0.001, customFeatImps= list("XGB_SHAP_feat_imp"=featureImportanceXGBoost), defaultMethods = c("ttest", "lm") ) ## ----mapPvalImportance4------------------------------------------------------- mpi <- mapPvalImportance(results, refPvalColumn = "ttest_adjPval", refPval = 0.001) head(mpi$df) ## ----mapPvalImportance5, eval=FALSE------------------------------------------- # mpi$dt ## ----------------------------------------------------------------------------- # Plot of the XGboost generated model plotModel(results, "XGB_SHAP_feat_imp", "diff_distrib01", "biDistrib") ## ----------------------------------------------------------------------------- sessionInfo()