#' Test for equivalence of a metric #' #' This function performs a significance test based on the bootstrap results #' of cutpointr to test whether a chosen metric is equal between subgroups #' or between two cutpointr objects. The test statistic is calculated as #' the standardized difference of the metric between groups. If \code{x} #' contains subgroups, the test is run on all possible pairings of subgroups. #' An additional adjusted p-value is returned in that case. #' #' The variable name is looked up in the columns of the bootstrap results #' where the suffixes _b and _oob indicate in-bag and out-of-bag estimates, #' respectively (controlled via the \code{in_bag} argument). #' Possible values are optimal_cutpoint, AUC, #' acc, sensitivity, specificity, and the metric that was selected #' in \code{cutpointr}. Note that there is no "out-of-bag optimal cutpoint", so #' when selecting \code{variable = optimal_cutpoint} the test will be based on #' the in-bag data. #' #' The test statistic is calculated as z = (t1 - t2) / sd(t1 - t2) where t1 and #' t2 are the metric values on the full sample and sd(t1 - t2) is the standard #' deviation of the differences of the metric values per bootstrap repetition. #' The test is two-sided. #' #' If two cutpointr objects are compared and the numbers of bootstrap repetitions #' differ, the smaller number will be used. #' #' Since pairwise differences are calculated for this test, the test function #' does not support multiple optimal cutpoints, because it is unclear how the #' differences should be calculated in that case. #' #' @param x A cutpointr object with bootstrap results #' @param y If x does not contain subgroups another cutpointr object #' @param variable The variable for testing #' @param in_bag Whether the in-bag or out-of-bag results should be used for testing #' @param correction The type of correction for multiple testing. Possible #' values are as in p.adjust.methods #' #' @return A data.frame (a tibble) with the columns test_var, p, d, sd_d, z #' and in_bag. If a grouped cutpointr object was tested, the additional #' columns subgroup1, subgroup2 and p_adj are returned. #' #' @examples #' \dontrun{ #' library(cutpointr) #' library(dplyr) #' set.seed(734) #' cp_f <- cutpointr(suicide %>% filter(gender == "female"), dsi, suicide, #' boot_runs = 1000, boot_stratify = TRUE) #' set.seed(928) #' cp_m <- cutpointr(suicide %>% filter(gender == "male"), dsi, suicide, #' boot_runs = 1000, boot_stratify = TRUE) #' # No significant differences: #' boot_test(cp_f, cp_m, AUC, in_bag = TRUE) #' boot_test(cp_f, cp_m, sum_sens_spec, in_bag = FALSE) #' #' set.seed(135) #' cp <- cutpointr(suicide, dsi, suicide, gender, boot_runs = 1000, #' boot_stratify = TRUE) #' # Roughly same result as above: #' boot_test(cp, variable = AUC, in_bag = TRUE) #' boot_test(cp, variable = sum_sens_spec, in_bag = FALSE) #' } #' #' @export #' @family main cutpointr functions #' @source Robin, X., Turck, N., Hainard, A., Tiberti, N., Lisacek, F., #' Sanchez, J.-C., & Müller, M. (2011). pROC: An open-source package for R #' and S+ to analyze and compare ROC curves. BMC Bioinformatics, 12(1), 77. #' https://doi.org/10.1186/1471-2105-12-77 boot_test <- function(x, y = NULL, variable = "AUC", in_bag = TRUE, correction = "holm") { if (!has_boot_results(x)) { stop(paste("x does not contain bootstrap results.", "Was boot_runs > 0 in cutpointr?")) } stopifnot(inherits(x, "cutpointr")) if (!is.null(y)) { if (!has_boot_results(y)) { stop(paste("y does not contain bootstrap results.", "Was boot_runs > 0 in cutpointr?")) } stopifnot(inherits(y, "cutpointr")) } # No boot_test if multiple cutpoints multi_cp_x <- purrr::map_lgl(x$boot, function(b) is.list(b$optimal_cutpoint)) if (!is.null(y)) { multi_cp_y <- purrr::map_lgl(y$boot, function(b) is.list(b$optimal_cutpoint)) } else { multi_cp_y <- FALSE } if (any(multi_cp_x) | any(multi_cp_y)) { stop(paste("boot_test does not support multiple optimal cutpoints.", "See ?boot_test. Try setting break_ties = median", "in cutpointr.")) } variable <- rlang::enquo(variable) variable <- rlang::as_name(variable) # If y is also supplied, x and y should be ungrouped if ((!is.null(y)) & nrow(x) > 1) { stop(paste("When testing two cutpointr objects these objects must not", "contain subgroups.")) } if (in_bag) suffix <- "_b" else suffix <- "_oob" if (variable == "optimal_cutpoint") suffix <- "" if (is.null(y)) { # Test subgroups vs. each other combs <- utils::combn(x = x$subgroup, m = 2, simplify = FALSE) combs <- do.call(rbind, combs) combs <- data.frame(var1 = combs[, 1], var2 = combs[, 2], stringsAsFactors = FALSE) test_res <- purrr::map2_dfr(.x = combs$var1, .y = combs$var2, .f = function(var1, var2) { dat_var1 <- x %>% dplyr::filter(.data$subgroup == var1) %>% dplyr::select(.data$boot) %>% tidyr::unnest(.data$boot) %>% dplyr::select(paste0(variable, suffix)) %>% unlist dat_var2 <- x %>% dplyr::filter(.data$subgroup == var2) %>% dplyr::select(.data$boot) %>% tidyr::unnest(.data$boot) %>% dplyr::select(paste0(variable, suffix)) %>% unlist na_v1 <- sum(is.na(dat_var1)) na_v2 <- sum(is.na(dat_var2)) if (na_v1 > 0 | na_v2 > 0) { message(var1, " vs. ", var2, ": ", "Omitting missing values. Using ", nrow(x$boot[[1]]) - max(na_v1, na_v2), " observations.") } sdt <- stats::sd(dat_var1 - dat_var2, na.rm = TRUE) t1 <- x %>% dplyr::filter(.data$subgroup == var1) %>% dplyr::select(variable) %>% unlist t2 <- x %>% dplyr::filter(.data$subgroup == var2) %>% dplyr::select(variable) %>% unlist z <- (t1 - t2) / sdt if (t1 - t2 == 0 & is.nan(sdt)) z <- 0 p <- unname(stats::pnorm(-abs(z)) * 2) tibble::tibble(subgroup1 = var1, subgroup2 = var2, test_var = variable, p = p, p_adj = NA, d = t1 - t2, sd_d = sdt, z = z, in_bag = in_bag) }) test_res$p_adj <- unname(stats::p.adjust(p = test_res$p, method = correction)) } else if (!(is.null(y))) { # Test two ungrouped cutpointr objects vs. each other if (!has_boot_results(y)) { stop(paste("y does not contain bootstrap results.", "Was boot_runs > 0 in cutpointr?")) } if (nrow(y) > 1) { stop(paste("When testing two cutpointr objects these objects must not", "contain subgroups.")) } dat_var1 <- x %>% dplyr::select(.data$boot) %>% tidyr::unnest(.data$boot) %>% dplyr::select(paste0(variable, suffix)) %>% unlist dat_var2 <- y %>% dplyr::select(.data$boot) %>% tidyr::unnest(.data$boot) %>% dplyr::select(paste0(variable, suffix)) %>% unlist if (length(dat_var1) != length(dat_var2)) { warning(paste("Unequal number of boot_runs. Using the lower", "number of", min(length(dat_var1), length(dat_var2)), "bootstrap repeats.")) lower_runs <- min(length(dat_var1), length(dat_var2)) dat_var1 <- dat_var1[1:lower_runs] dat_var2 <- dat_var2[1:lower_runs] } na_v1 <- sum(is.na(dat_var1)) na_v2 <- sum(is.na(dat_var2)) if (na_v1 > 0 | na_v2 > 0) { message("Omitting missing values. Using ", nrow(x$boot[[1]]) - max(na_v1, na_v2), " observations.") } sdt <- stats::sd(dat_var1 - dat_var2, na.rm = TRUE) t1 <- x %>% dplyr::select(variable) %>% unlist t2 <- y %>% dplyr::select(variable) %>% unlist z <- (t1 - t2) / sdt if (t1 - t2 == 0 & sdt == 0) z <- 0 p <- stats::pnorm(-abs(z)) * 2 test_res <- tibble::tibble(test_var = variable, p = p, d = t1 - t2, sd_d = sdt, z = z, in_bag = in_bag) } class(test_res) <- c("boot_test", class(test_res)) return(test_res) }