find_metric_name <- function(object) { if ("subgroup" %in% colnames(object)) { return(colnames(object)[5]) } else { return(colnames(object)[4]) } } find_metric_name_boot <- function(object) { if ("subgroup" %in% colnames(object)) { return(colnames(object)[6]) } else { return(colnames(object)[5]) } } default_cols <- c("m", "subgroup", "direction", "optimal_cutpoint", "method", "acc", "sensitivity", "specificity", "AUC", "pos_class", "neg_class", "prevalence", "outcome", "predictor", "grouping", "data", "roc_curve", "boot", "tn", "fn", "tp", "fp", "tpr", "tnr", "fpr", "fnr") check_method_cols <- function(method_result) { cn <- colnames(method_result) n_col <- ncol(method_result) identified_cols <- 0 oc_col <- which(cn == "optimal_cutpoint") if (!is.null(oc_col)) identified_cols <- identified_cols + 1 if ("roc_curve" %in% cn) { roc_col <- which(cn == "roc_curve") identified_cols <- identified_cols + 1 } if (identified_cols == 1 & n_col == 2) { metric_col <- (1:n_col)[-oc_col] } else if (identified_cols < n_col) { stopifnot(exists("roc_col")) metric_col <- (1:n_col)[-c(oc_col, roc_col)] if (length(metric_col) >= 2) { stop(paste("method function returned too many columns.", "Should return optimal_cutpoint, roc_curve (optional)", "and a metric column (optional).")) } metric_name <- cn[metric_col] if (metric_name %in% default_cols) { colnames(method_result)[metric_col] <- paste0("metric_", metric_name) } } return(method_result) } check_metric_name <- function(met) { # Numeric vector if (!is.array(met) & is.numeric(met)) return(met) cn <- colnames(met) if (cn %in% default_cols) { colnames(met) <- paste0("metric_", cn) return(met) } else { return(met) } } check_method_name <- function(mod_name) { if (length(mod_name) > 1) { if (mod_name[1] %in% c("::", ":::") & length(mod_name) == 3) { mod_name <- mod_name[3] } else { stop("Could not parse method name") } } return(mod_name) } check_colnames <- function(cutpointr_object) { if ("subgroup" %in% colnames(cutpointr_object)) col_nr <- 4 else col_nr <- 3 metric_name <- colnames(cutpointr_object)[col_nr] if (metric_name %in% default_cols) { metric_name2 <- paste0("metric_", metric_name) colnames(cutpointr_object)[col_nr] <- metric_name2 cutpointr_object$metric_name <- metric_name2 } else { cutpointr_object$metric_name <- metric_name } return(cutpointr_object) } check_roc_curve <- function(object) { if (!("roc_cutpointr" %in% class(object$roc_curve[[1]]))) { stop(paste("roc_curve as returned by the method function is not an", "object of the class roc_cutpointr")) } } has_column <- function(x, colname) { if (colname %in% colnames(x) | colname %in% names(x)) { return(TRUE) } else { return(FALSE) } } has_boot_results <- function(x) { if (has_column(x, "boot")) { if (all(is.na(x[["boot"]]))) { return(FALSE) } else { return(TRUE) } } else { return(FALSE) } } ifel_pos_neg <- function(logi_vec, pos_class, neg_class) { predictions <- rep(neg_class, length(logi_vec)) predictions[logi_vec] <- pos_class return(predictions) } get_fnth <- function(x, n = 1) { x <- unlist(x) if (length(x) == 1) { return(x[1]) } else { return(x[n]) } stop("no conditions apply in get_fnth") } get_numeric_cols <- function(x, class_col) { cols <- colnames(x)[unlist(lapply(x, is.numeric))] cols <- cols[cols != class_col] cols } midpoint <- function(oc, x, direction) { sapply(oc, function(oc) { x <- c(oc, x) if (direction == ">=") { x <- sort(unique(x)) } else { x <- sort(unique(x), decreasing = TRUE) } if (direction == ">=") { mean(c(oc, x[utils::tail(which(x <= oc), 1) - 1])) } else if (direction == "<=") { mean(c(oc, x[utils::tail(which(x >= oc), 1) - 1])) } }) } apply_break_ties <- function(oc, f) { stopifnot(nrow(oc) == 1) optimal_cutpoint <- f(oc[["optimal_cutpoint"]][[1]]) if (length(optimal_cutpoint) > 1) { optimal_cutpoint <- list(optimal_cutpoint) } oc$optimal_cutpoint <- optimal_cutpoint return(oc) } get_opt_ind <- function(roc_curve, oc, direction) { stopifnot(is.numeric(oc) | is.na(oc)) sapply(oc, function(x) { if (direction == ">=") { opt_ind <- max(which(roc_curve$x.sorted >= x)) } else if (direction == "<=") { opt_ind <- max(which(roc_curve$x.sorted <= x)) } return(opt_ind) }) } summary_sd <- function(x) { x <- unlist(x) s <- summary(x)[1:6] result <- c(s[1], stats::quantile(x, 0.05, na.rm = TRUE), s[2:5], stats::quantile(x, 0.95, na.rm = TRUE), s[6], SD = stats::sd(x, na.rm = TRUE), NAs = sum(is.na(x))) return(result) } summary_sd_df <- function(x) { x <- unlist(x) s <- summary(x)[1:6] result <- c(s[1], stats::quantile(x, 0.05, na.rm = TRUE), s[2:5], stats::quantile(x, 0.95, na.rm = TRUE), s[6], SD = stats::sd(x, na.rm = TRUE), NAs = sum(is.na(x))) result <- data.frame(`Min.` = result[1], `5%` = result[2], `1st Qu.` = result[3], `Median` = result[4], `Mean` = result[5], `3rd Qu.` = result[6], `95%` = result[7], `Max.` = result[8], `SD` = result[9], `NAs` = result[10], check.names = FALSE, row.names = NULL) return(result) } # Printing function for data.frames without returning x invisibly and with # row.names = FALSE print_df_nodat <- function (x, ..., digits = NULL, quote = FALSE, right = TRUE, row.names = FALSE, max = NULL) { n <- length(row.names(x)) if (length(x) == 0L) { cat(sprintf(ngettext(n, "data frame with 0 columns and %d row", "data frame with 0 columns and %d rows"), n), "\n", sep = "") } else if (n == 0L) { print.default(names(x), quote = FALSE) cat(gettext("<0 rows> (or 0-length row.names)\n")) } else { if (is.null(max)) max <- getOption("max.print", 99999L) if (!is.finite(max)) stop("invalid 'max' / getOption(\"max.print\"): ", max) omit <- (n0 <- max%/%length(x)) < n m <- as.matrix(format.data.frame(if (omit) x[seq_len(n0), , drop = FALSE] else x, digits = digits, na.encode = FALSE)) if (!isTRUE(row.names)) dimnames(m)[[1L]] <- if (isFALSE(row.names)) rep.int("", if (omit) n0 else n) else row.names print(m, ..., quote = quote, right = right, max = max) if (omit) cat(" [ reached 'max' / getOption(\"max.print\") -- omitted", n - n0, "rows ]\n") } invisible(x) } # If the output of the metric function is no named matrix with one column, # convert it to one. Also run some checks. sanitize_metric <- function(m, m_name, n, silent = TRUE) { if ("data.frame" %in% class(m)) { m <- as.matrix(m) } if (!is.null(dim(m))) { if (dim(m)[2] == 1 & ("matrix" %in% class(m))) { res <- m if (is.null(colnames(res))) colnames(res) <- m_name } else { stop(paste("The metric function should return a numeric vector", "or a one-column matrix or data.frame.")) } } else if (is.numeric(m)) { res <- matrix(m, ncol = 1, dimnames = list(NULL, m_name)) } else { stop(paste("Can't process metric of type", class(m))) } finite_res <- is.finite(res) if (any(!finite_res)) { if (!silent) message("Converting non-finite metric values to NA") res[!finite_res] <- NA } if (nrow(res) != n) { stop("Number of returned metric values not equal to n") } colnames(res) <- make.names(colnames(res)) return(res) } only_one_unique <- function(x) { if (is.character(x) | is.factor(x)) { one_unique_char(x) } else { one_unique_num(x) } } which_cpp <- function(x, y) { if (is.numeric(x)) { return(which_are_num(x, y)) } else { return(which_are_char(x, y)) } } is_equal_cpp <- function(x, y) { if (is.numeric(x)) { return(is_equal_cpp_num(x, y)) } else { return(is_equal_cpp_char(x, as.character(y))) } } na_inf_omit <- function(x) { x <- stats::na.omit(x) x <- x[is.finite(x)] return(x) } .onUnload <- function (libpath) { library.dynam.unload("cutpointr", libpath) } add_list <- function(x, y, name) { if (length(y) > 1) { x[[name]] <- list(y) } else { x[[name]] <- y } return(x) } # rbind list elements that are tibbles with different column types (list or dbl, # may be necessary for bootstrap results, if only one bootstrap resulted in # multiple optimal cutpoints) # Convert non-list columns to list so that bind_rows doesn't complain prepare_bind_rows <- function(x) { stopifnot(inherits(x, "list")) if (length(x) < 2) { return(x) } else { list_cols <- purrr::map(x, function(df) { df %>% purrr::map(function(col) (is.list(col))) }) list_cols <- purrr::map(list_cols, function(coltypes) { which(unlist(coltypes)) }) list_cols <- unique(unlist(list_cols)) x <- purrr::map(x, function(x) { dplyr::mutate_at(.tbl = x, .vars = list_cols, .funs = function(x) { if (!is.list(x)) { purrr::map(x, function(x) x) } else { x } }) }) return(x) } } # Return indices for observations based on nonparametric bootstrap per class. # Preliminary tests suggested that per_class = FALSE leads to better # confidence intervals. simple_boot <- function(ind_pos = NULL, ind_neg = NULL, data = NULL, dep_var = NULL, stratify) { if (stratify) { b_ind_pos <- sample(ind_pos, size = length(ind_pos), replace = TRUE) b_ind_neg <- sample(ind_neg, size = length(ind_neg), replace = TRUE) return(c(b_ind_pos, b_ind_neg)) } else { b_ind <- sample(1:nrow(data), size = nrow(data), replace = TRUE) return(b_ind) } }