## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----echo=FALSE, message=FALSE, results='hide'--------------------------------
library(LimROTS, quietly = TRUE)
data("UPS1.Case4")

## ----echo=FALSE---------------------------------------------------------------
table(UPS1.Case4$fake.batch, UPS1.Case4$Conc.)

## ----message=FALSE------------------------------------------------------------
# Load necessary packages
library(LimROTS, quietly = TRUE)
library(BiocParallel, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(SummarizedExperiment, quietly = TRUE)

## -----------------------------------------------------------------------------
# Load the dataset
data("UPS1.Case4")
print(UPS1.Case4)

## -----------------------------------------------------------------------------
set.seed(1234, kind = "default" , sample.kind = "default")

## -----------------------------------------------------------------------------
# colData(se)$Conc. <- factor(c("low" , "low", "high", "high" ,  
# levels = c("high" , "low")))

## -----------------------------------------------------------------------------
# Set metadata and formula for LimROTS analysis
meta.info <- c("Conc.", "tool", "fake.batch")
niter <- 100 # Number of bootstrap samples
K <- 100 # Set the value for K based on the data size
K <- floor(K)
group.name <- "Conc."
formula.str <- "~ 0 + Conc. + tool + fake.batch" # Formula for group comparison

# Run LimROTS analysis with trend and robust settings enabled
UPS1.Case4 <- LimROTS(
    x = UPS1.Case4,
    niter = niter, K = K, meta.info = meta.info,
    BPPARAM  = NULL, group.name = group.name,
    formula.str = formula.str, trend = TRUE,
    robust = TRUE, permutating.group = FALSE
)

## -----------------------------------------------------------------------------
# BPPARAM  <- SnowParam(workers = 4)
# Commented out in the vignette to ensure the limit set by the
# R_CHECK_LIMIT_CORES environment variable is not exceeded.

## -----------------------------------------------------------------------------
# BPPARAM  <- MulticoreParam(workers = 4)
# Commented out in the vignette to ensure the limit set by the
# R_CHECK_LIMIT_CORES environment variable is not exceeded.

## -----------------------------------------------------------------------------
# Create a data frame from the LimROTS results

limrots.result.df <- data.frame(rowData(UPS1.Case4), 
                                row.names = rownames(UPS1.Case4))

# Mark proteins as true positives (HUMAN UPS1 proteins)
limrots.result.df$TP <- ifelse(grepl("HUMAN", limrots.result.df$GeneID),
    "HUMAN_TP", "ECOLI_FP"
)

# Create a volcano plot
ggplot(limrots.result.df, aes(
    x = corrected.logfc, y = -log10(qvalue),
    color = factor(TP)
)) +
    geom_point(alpha = 0.8) +
    theme_bw() +
    labs(
        title = "Volcano Plot", x = "Log Fold Change", y = "-Log10 q.value",
        color = "True Positive"
    ) +
    scale_color_manual(values = c("grey", "red")) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "blue")

## ----results="hide" , message=FALSE, warning=FALSE----------------------------
## Quality Control Plots

# Plot of q-values
plot(metadata(UPS1.Case4)[["q_values"]])

# Histogram of q-values
hist(metadata(UPS1.Case4)[["q_values"]])

# Summary of q-values
print(summary(metadata(UPS1.Case4)[["q_values"]]))

## ----eval=FALSE---------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# if (!require(limma)) {
#     BiocManager::install("limma")
# }
# if (!require(ROTS)) {
#     BiocManager::install("ROTS")
# }
# if (!require(caret)) {
#     install.packages(caret)
# }

## -----------------------------------------------------------------------------
library(ROTS)
groups <- as.numeric(UPS1.Case4$Conc.)
rots.result <- ROTS(
    data = assay(UPS1.Case4), groups = groups, B = niter,
    K = K, seed = 1234
)
rots.result <- data.frame(
    proteins = row.names(rots.result$data),
    logFC = rots.result$logfc, FDR = rots.result$FDR
)

## -----------------------------------------------------------------------------
library(limma)

design.matrix <- model.matrix(formula(formula.str), data = colData(UPS1.Case4))
fit <- lmFit(assay(UPS1.Case4), design.matrix)
cont_matrix <- makeContrasts("Conc.1-Conc.2", levels = design.matrix)
fit2 <- contrasts.fit(fit, cont_matrix)
fit.ebayes <- eBayes(fit2, trend = TRUE, robust = TRUE)
limma.result <- topTable(fit.ebayes, coef = "Conc.1-Conc.2", number = "Inf")

## -----------------------------------------------------------------------------
library(caret, quietly = TRUE, warn.conflicts = TRUE)


TP <- elementMetadata(UPS1.Case4)[["GeneID"]]
TP <- TP[grepl("HUMAN", TP)]


predictions_limrots <- elementMetadata(UPS1.Case4)[["qvalue"]] < 0.05
predictions_limrots <- factor(predictions_limrots, levels = c(TRUE, FALSE))
true_labels_limrots <- ifelse(rownames(UPS1.Case4) %in% TP,
    TRUE, FALSE
)
true_labels_limrots <- factor(true_labels_limrots, levels = c(TRUE, FALSE))
conf_matrix_limrots <- confusionMatrix(
    predictions_limrots,
    true_labels_limrots
)

predictions_rots <- rots.result$FDR < 0.05
predictions_rots <- factor(predictions_rots, levels = c(TRUE, FALSE))
true_labels_rots <- ifelse(rots.result$protein %in% TP, TRUE, FALSE)
true_labels_rots <- factor(true_labels_rots, levels = c(TRUE, FALSE))
conf_matrix_rots <- confusionMatrix(predictions_rots, true_labels_rots)


predictions_limma <- limma.result$adj.P.Val < 0.05
predictions_limma <- factor(predictions_limma, levels = c(TRUE, FALSE))
true_labels_limma <- ifelse(row.names(limma.result) %in% TP, TRUE, FALSE)
true_labels_limma <- factor(true_labels_limma, levels = c(TRUE, FALSE))
conf_matrix_limma <- confusionMatrix(predictions_limma, true_labels_limma)

## -----------------------------------------------------------------------------
library(ggplot2)


extract_metrics <- function(conf_matrix, method_name) {
    metrics <- c(
        conf_matrix$byClass["Sensitivity"],
        conf_matrix$byClass["Specificity"],
        conf_matrix$byClass["Pos Pred Value"],
        conf_matrix$byClass["Neg Pred Value"],
        conf_matrix$byClass["F1"],
        conf_matrix$byClass["Balanced Accuracy"]
    )

    data.frame(
        Method = method_name, Metric = names(metrics),
        Value = as.numeric(metrics)
    )
}

metrics_limrots <- extract_metrics(conf_matrix_limrots, "limROTS")
metrics_rots <- extract_metrics(conf_matrix_rots, "ROTS")
metrics_limma <- extract_metrics(conf_matrix_limma, "limma")


all_metrics <- do.call(rbind, list(
    metrics_limrots, metrics_rots,
    metrics_limma
))


ggplot(all_metrics, aes(x = Metric, y = Value, fill = Method)) +
    geom_bar(stat = "identity", position = "dodge") +
    theme_bw() +
    labs(
        title = "Comparison of Performance Case Study",
        y = "Value", x = "Metric"
    ) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(
        plot.title = element_text(face = "bold", color = "black"),
        axis.title = element_text(face = "bold", color = "black"),
        axis.text = element_text(face = "bold", color = "black"),
        axis.ticks = element_line(color = "black")
    )

## -----------------------------------------------------------------------------
sessionInfo()