## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options( rmarkdown.html_vignette.check_title = FALSE ) ## ----setup-------------------------------------------------------------------- library(tidytof) library(stringr) ## ----------------------------------------------------------------------------- data(ddpr_metadata) # link for downloading the sample-level data from the Nature Medicine website data_link <- "https://static-content.springer.com/esm/art%3A10.1038%2Fnm.4505/MediaObjects/41591_2018_BFnm4505_MOESM3_ESM.csv" # download the data and combine it with clinical annotations ddpr_patients <- readr::read_csv(data_link, skip = 2L, n_max = 78L, show_col_types = FALSE) |> dplyr::rename(patient_id = Patient_ID) |> dplyr::left_join(ddpr_metadata, by = "patient_id") |> dplyr::filter(!str_detect(patient_id, "Healthy")) # preview only the metadata (i.e. non-numeric) columns ddpr_patients |> dplyr::select(where(~ !is.numeric(.x))) |> head() ## ----------------------------------------------------------------------------- ddpr_patients <- ddpr_patients |> # convert the relapse_status variable to a factor # and create the time_to_event and event columns for survival modeling dplyr::mutate( relapse_status = as.factor(relapse_status), time_to_event = dplyr::if_else(relapse_status == "Yes", time_to_relapse, ccr), event = dplyr::if_else(relapse_status == "Yes", 1, 0) ) ## ----------------------------------------------------------------------------- # find how many of each outcome we have in our cohort ddpr_patients |> dplyr::count(relapse_status) ## ----------------------------------------------------------------------------- ddpr_patients_unannotated <- ddpr_patients |> dplyr::filter(is.na(relapse_status)) ddpr_patients <- ddpr_patients |> dplyr::filter(!is.na(relapse_status)) ## ----------------------------------------------------------------------------- set.seed(3000L) training_split <- ddpr_patients |> tof_split_data( split_method = "k-fold", num_cv_folds = 10, strata = relapse_status ) training_split ## ----------------------------------------------------------------------------- my_resample <- training_split$splits[[1]] print(my_resample) ## ----------------------------------------------------------------------------- my_resample |> rsample::training() |> head() ## ----------------------------------------------------------------------------- my_resample |> rsample::testing() |> head() ## ----warning = FALSE---------------------------------------------------------- hyperparams <- tof_create_grid(mixture_values = 1) class_mod <- training_split |> tof_train_model( predictor_cols = c(contains("Pop2")), response_col = relapse_status, model_type = "two-class", hyperparameter_grid = hyperparams, impute_missing_predictors = TRUE, remove_zv_predictors = TRUE # often a smart decision ) ## ----------------------------------------------------------------------------- print(class_mod) ## ----------------------------------------------------------------------------- training_classifier_metrics <- class_mod |> tof_assess_model() ## ----------------------------------------------------------------------------- training_classifier_metrics$confusion_matrix ## ----------------------------------------------------------------------------- class_mod |> tof_plot_model() ## ----------------------------------------------------------------------------- cv_classifier_metrics <- class_mod |> tof_assess_model(new_data = "tuning") class_mod |> tof_plot_model(new_data = "tuning") ## ----warning = FALSE---------------------------------------------------------- hyperparams <- tof_create_grid(mixture_values = 1) survival_mod <- training_split |> tof_train_model( predictor_cols = c(contains("Pop2")), time_col = time_to_event, event_col = event, model_type = "survival", hyperparameter_grid = hyperparams, impute_missing_predictors = TRUE, remove_zv_predictors = TRUE # often a smart decision ) print(survival_mod) ## ----------------------------------------------------------------------------- survival_metrics <- survival_mod |> tof_assess_model() survival_metrics ## ----------------------------------------------------------------------------- survival_mod |> tof_plot_model() ## ----------------------------------------------------------------------------- cv_survival_metrics <- survival_mod |> tof_assess_model(new_data = "tuning") ## ----------------------------------------------------------------------------- survival_mod |> tof_plot_model(new_data = "tuning") ## ----------------------------------------------------------------------------- sessionInfo()