Content - 37ba19ea9f92af8c4e6a7d5ab575daff689e2698 - 8191cef/R/utils.R

visit type:
Tip revision: 4408233eb8624dea85ecf18e86d50c296165c3f2 authored by Christian Thiele on 13 April 2022, 17:12:29 UTC
version 1.1.2
Tip revision: 4408233
utils.R
find_metric_name <- function(object) {
    if ("subgroup" %in% colnames(object)) {
        return(colnames(object)[5])
    } else {
        return(colnames(object)[4])
    }
}

find_metric_name_boot <- function(object) {
    if ("subgroup" %in% colnames(object)) {
        return(colnames(object)[6])
    } else {
        return(colnames(object)[5])
    }
}

default_cols <- c("m", "subgroup", "direction", "optimal_cutpoint",
                  "method", "acc", "sensitivity", "specificity", "AUC",
                  "pos_class", "neg_class", "prevalence",
                  "outcome", "predictor", "grouping", "data", "roc_curve",
                  "boot", "tn", "fn", "tp", "fp", "tpr", "tnr", "fpr", "fnr")

check_method_cols <- function(method_result) {
    cn <- colnames(method_result)
    n_col <- ncol(method_result)
    identified_cols <- 0
    oc_col <- which(cn == "optimal_cutpoint")
    if (!is.null(oc_col)) identified_cols <- identified_cols + 1
    if ("roc_curve" %in% cn) {
        roc_col <- which(cn == "roc_curve")
        identified_cols <- identified_cols + 1
    }
    if (identified_cols == 1 & n_col == 2) {
        metric_col <- (1:n_col)[-oc_col]
    } else if (identified_cols < n_col) {
        stopifnot(exists("roc_col"))
        metric_col <- (1:n_col)[-c(oc_col, roc_col)]
        if (length(metric_col) >= 2) {
            stop(paste("method function returned too many columns.",
                       "Should return optimal_cutpoint, roc_curve (optional)",
                       "and a metric column (optional)."))
        }
        metric_name <- cn[metric_col]
        if (metric_name %in% default_cols) {
            colnames(method_result)[metric_col] <- paste0("metric_", metric_name)
        }
    }
    return(method_result)
}

check_metric_name <- function(met) {
    # Numeric vector
    if (!is.array(met) & is.numeric(met)) return(met)
    cn <- colnames(met)
    if (cn %in% default_cols) {
        colnames(met) <- paste0("metric_", cn)
        return(met)
    } else {
        return(met)
    }
}

check_method_name <- function(mod_name) {
    if (length(mod_name) > 1) {
        if (mod_name[1] %in% c("::", ":::") & length(mod_name) == 3) {
            mod_name <- mod_name[3]
        } else {
            stop("Could not parse method name")
        }
    }
    return(mod_name)
}

check_colnames <- function(cutpointr_object) {
    if ("subgroup" %in% colnames(cutpointr_object)) col_nr <- 4 else col_nr <- 3
    metric_name <- colnames(cutpointr_object)[col_nr]
    if (metric_name %in% default_cols) {
        metric_name2 <- paste0("metric_", metric_name)
        colnames(cutpointr_object)[col_nr] <- metric_name2
        cutpointr_object$metric_name <- metric_name2
    } else {
        cutpointr_object$metric_name <- metric_name
    }
    return(cutpointr_object)
}

check_roc_curve <- function(object) {
    if (!("roc_cutpointr" %in% class(object$roc_curve[[1]]))) {
        stop(paste("roc_curve as returned by the method function is not an",
                   "object of the class roc_cutpointr"))
    }
}

has_column <- function(x, colname) {
    if (colname %in% colnames(x) | colname %in% names(x)) {
        return(TRUE)
    } else {
        return(FALSE)
    }
}

has_boot_results <- function(x) {
    if (has_column(x, "boot")) {
        if (all(is.na(x[["boot"]]))) {
            return(FALSE)
        } else {
            return(TRUE)
        }
    } else {
        return(FALSE)
    }
}

ifel_pos_neg <- function(logi_vec, pos_class, neg_class) {
    predictions <- rep(neg_class, length(logi_vec))
    predictions[logi_vec] <- pos_class
    return(predictions)
}

get_fnth <- function(x, n = 1) {
    x <- unlist(x)
    if (length(x) == 1) {
        return(x[1])
    } else {
        return(x[n])
    }
    stop("no conditions apply in get_fnth")
}

get_numeric_cols <- function(x, class_col) {
    cols <- colnames(x)[unlist(lapply(x, is.numeric))]
    cols <- cols[cols != class_col]
    cols
}

midpoint <- function(oc, x, direction) {
    sapply(oc, function(oc) {
        x <- c(oc, x)
        if (direction == ">=") {
            x <- sort(unique(x))
        } else {
            x <- sort(unique(x), decreasing = TRUE)
        }
        if (direction == ">=") {
            mean(c(oc, x[utils::tail(which(x <= oc), 1) - 1]))
        } else if (direction == "<=") {
            mean(c(oc, x[utils::tail(which(x >= oc), 1) - 1]))
        }
    })
}

apply_break_ties <- function(oc, f) {
    stopifnot(nrow(oc) == 1)
    optimal_cutpoint <- f(oc[["optimal_cutpoint"]][[1]])
    if (length(optimal_cutpoint) > 1) {
        optimal_cutpoint <- list(optimal_cutpoint)
    }
    oc$optimal_cutpoint <- optimal_cutpoint
    return(oc)
}

get_opt_ind <- function(roc_curve, oc, direction) {
    stopifnot(is.numeric(oc) | is.na(oc))
    sapply(oc, function(x) {
        if (direction == ">=") {
            opt_ind <- max(which(roc_curve$x.sorted >= x))
        } else if (direction == "<=") {
            opt_ind <- max(which(roc_curve$x.sorted <= x))
        }
        return(opt_ind)
    })
}

summary_sd <- function(x) {
    x <- unlist(x)
    s <- summary(x)[1:6]
    result <- c(s[1],
                stats::quantile(x, 0.05, na.rm = TRUE),
                s[2:5],
                stats::quantile(x, 0.95, na.rm = TRUE),
                s[6],
                SD = stats::sd(x, na.rm = TRUE),
                NAs = sum(is.na(x)))
    return(result)
}

summary_sd_df <- function(x) {
    x <- unlist(x)
    s <- summary(x)[1:6]
    result <- c(s[1],
                stats::quantile(x, 0.05, na.rm = TRUE),
                s[2:5],
                stats::quantile(x, 0.95, na.rm = TRUE),
                s[6],
                SD = stats::sd(x, na.rm = TRUE),
                NAs = sum(is.na(x)))
    result <- data.frame(`Min.` = result[1],
                         `5%` = result[2],
                         `1st Qu.` = result[3],
                         `Median` = result[4],
                         `Mean` = result[5],
                         `3rd Qu.` = result[6],
                         `95%` = result[7],
                         `Max.` = result[8],
                         `SD` = result[9],
                         `NAs` = result[10],
                         check.names = FALSE,
                         row.names = NULL)
    return(result)
}

# Printing function for data.frames without returning x invisibly and with
# row.names = FALSE
print_df_nodat <- function (x, ..., digits = NULL, quote = FALSE, right = TRUE,
          row.names = FALSE, max = NULL) {
    n <- length(row.names(x))
    if (length(x) == 0L) {
        cat(sprintf(ngettext(n, "data frame with 0 columns and %d row",
                             "data frame with 0 columns and %d rows"), n),
            "\n", sep = "")
    }
    else if (n == 0L) {
        print.default(names(x), quote = FALSE)
        cat(gettext("<0 rows> (or 0-length row.names)\n"))
    }
    else {
        if (is.null(max))
            max <- getOption("max.print", 99999L)
        if (!is.finite(max))
            stop("invalid 'max' / getOption(\"max.print\"): ",
                 max)
        omit <- (n0 <- max%/%length(x)) < n
        m <- as.matrix(format.data.frame(if (omit)
            x[seq_len(n0), , drop = FALSE]
            else x, digits = digits, na.encode = FALSE))
        if (!isTRUE(row.names))
            dimnames(m)[[1L]] <- if (isFALSE(row.names))
                rep.int("", if (omit)
                    n0
                    else n)
        else row.names
        print(m, ..., quote = quote, right = right, max = max)
        if (omit)
            cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
                n - n0, "rows ]\n")
    }
    invisible(x)
}

# If the output of the metric function is no named matrix with one column,
# convert it to one. Also run some checks.
sanitize_metric <- function(m, m_name, n, silent = TRUE) {
    if ("data.frame" %in% class(m)) {
        m <- as.matrix(m)
    }
    if (!is.null(dim(m))) {
        if (dim(m)[2] == 1 & ("matrix" %in% class(m))) {
            res <- m
            if (is.null(colnames(res))) colnames(res) <- m_name
        } else {
            stop(paste("The metric function should return a numeric vector",
                       "or a one-column matrix or data.frame."))
        }
    } else if (is.numeric(m)) {
        res <- matrix(m, ncol = 1, dimnames = list(NULL, m_name))
    } else {
        stop(paste("Can't process metric of type", class(m)))
    }
    finite_res <- is.finite(res)
    if (any(!finite_res)) {
        if (!silent) message("Converting non-finite metric values to NA")
        res[!finite_res] <- NA
    }
    if (nrow(res) != n) {
        stop("Number of returned metric values not equal to n")
    }
    colnames(res) <- make.names(colnames(res))
    return(res)
}

only_one_unique <- function(x) {
    if (is.character(x) | is.factor(x)) {
        one_unique_char(x)
    } else {
        one_unique_num(x)
    }
}

which_cpp <- function(x, y) {
    if (is.numeric(x)) {
        return(which_are_num(x, y))
    } else {
        return(which_are_char(x, y))
    }
}

is_equal_cpp <- function(x, y) {
    if (is.numeric(x)) {
        return(is_equal_cpp_num(x, y))
    } else {
        return(is_equal_cpp_char(x, as.character(y)))
    }
}

na_inf_omit <- function(x) {
    x <- stats::na.omit(x)
    x <- x[is.finite(x)]
    return(x)
}

.onUnload <- function (libpath) {
    library.dynam.unload("cutpointr", libpath)
}

add_list <- function(x, y, name) {
    if (length(y) > 1) {
        x[[name]] <- list(y)
    } else {
        x[[name]] <- y
    }
    return(x)
}

# rbind list elements that are tibbles with different column types (list or dbl,
# may be necessary for bootstrap results, if only one bootstrap resulted in
# multiple optimal cutpoints)
# Convert non-list columns to list so that bind_rows doesn't complain
prepare_bind_rows <- function(x) {
    stopifnot(inherits(x, "list"))
    if (length(x) < 2) {
        return(x)
    } else {
        list_cols <- purrr::map(x, function(df) {
            df %>%
                purrr::map(function(col) (is.list(col)))
        })
        list_cols <- purrr::map(list_cols, function(coltypes) {
            which(unlist(coltypes))
        })
        list_cols <- unique(unlist(list_cols))
        x <- purrr::map(x, function(x) {
            dplyr::mutate_at(.tbl = x, .vars = list_cols, .funs = function(x) {
                if (!is.list(x)) {
                    purrr::map(x, function(x) x)
                } else {
                    x
                }
            })
        })
        return(x)
    }
}


# Return indices for observations based on nonparametric bootstrap per class.
# Preliminary tests suggested that per_class = FALSE leads to better
# confidence intervals.
simple_boot <- function(ind_pos = NULL, ind_neg = NULL,
                        data = NULL, dep_var = NULL,
                        stratify) {
    if (stratify) {
        b_ind_pos <- sample(ind_pos, size = length(ind_pos), replace = TRUE)
        b_ind_neg <- sample(ind_neg, size = length(ind_neg), replace = TRUE)
        return(c(b_ind_pos, b_ind_neg))
    } else {
        b_ind <- sample(1:nrow(data), size = nrow(data), replace = TRUE)
        return(b_ind)
    }
}
Browse the archive

https://github.com/cran/cutpointr