https://github.com/cran/precrec
Raw File
Tip revision: dac6628e3e9c6e5f5b05e3ee1a071e0f17b640aa authored by Takaya Saito on 24 June 2016, 07:30:42 UTC
version 0.4.0
Tip revision: dac6628
etc_utils_dataframe.R
#' Convert a curves and points object to a data frame
#'
#' The \code{as.data.frame} function converts an \code{S3} object generated by
#'   \code{\link{evalmod}} to a data frame.
#'
#' @param x An \code{S3} object generated by \code{\link{evalmod}}.
#'   The \code{as.data.frame} function takes one of the following \code{S3} objects.
#'
#' \enumerate{
#'
#'   \item ROC and Precision-Recall curves (mode = "rocprc")
#'
#'   \tabular{lll}{
#'     \strong{\code{S3} object}
#'     \tab \strong{# of models}
#'     \tab \strong{# of test datasets} \cr
#'
#'     sscurves \tab single   \tab single   \cr
#'     mscurves \tab multiple \tab single   \cr
#'     smcurves \tab single   \tab multiple \cr
#'     mmcurves \tab multiple \tab multiple
#'   }
#'
#'   \item Basic evaluation measures (mode = "basic")
#'
#'   \tabular{lll}{
#'     \strong{\code{S3} object}
#'     \tab \strong{# of models}
#'     \tab \strong{# of test datasets} \cr
#'
#'     sspoints \tab single   \tab single   \cr
#'     mspoints \tab multiple \tab single   \cr
#'     smpoints \tab single   \tab multiple \cr
#'     mmpoints \tab multiple \tab multiple
#'   }
#' }
#'
#' See the \strong{Value} section of \code{\link{evalmod}} for more details.
#'
#' @param raw_curves A Boolean value to specify whether raw curves are
#'   shown instead of the average curve. It is effective only
#'   when \code{raw_curves} is set to \code{TRUE}
#'   of the \code{\link{evalmod}} function.
#'
#' @param ... Not used by this method.
#'
#' @param row.names Not used by this method.
#'
#' @param optional Not used by this method.
#'
#' @return The \code{as.data.frame} function returns a data frame.
#'
#' @seealso \code{\link{evalmod}} for generating \code{S3} objects with
#'   performance evaluation measures.
#'
#' @examples
#'
#' #############################################################################
#' ### Single model & single test dataset
#' ###
#'
#' ## Load a dataset with 10 positives and 10 negatives
#' data(P10N10)
#'
#' ## Generate an sscurve object that contains ROC and Precision-Recall curves
#' sscurves <- evalmod(scores = P10N10$scores, labels = P10N10$labels)
#'
#' ## Convert sscurves to a data frame
#' sscurves.df <- as.data.frame(sscurves)
#'
#' ## Generate an sspoints object that contains basic evaluation measures
#' sspoints <- evalmod(mode = "basic", scores = P10N10$scores,
#'                     labels = P10N10$labels)
#' ## Convert sspoints to a data frame
#' sspoints.df <- as.data.frame(sspoints)
#'
#'
#' #############################################################################
#' ### Multiple models & single test dataset
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(1, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#'                modnames = samps[["modnames"]])
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mscurves <- evalmod(mdat)
#'
#' ## Convert mscurves to a data frame
#' mscurves.df <- as.data.frame(mscurves)
#'
#' ## Generate an mspoints object that contains basic evaluation measures
#' mspoints <- evalmod(mdat, mode = "basic")
#'
#' ## Convert mspoints to a data frame
#' mspoints.df <- as.data.frame(mspoints)
#'
#'
#' #############################################################################
#' ### Single model & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(10, 100, 100, "good_er")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#'                modnames = samps[["modnames"]],
#'                dsids = samps[["dsids"]])
#'
#' ## Generate an smcurve object that contains ROC and Precision-Recall curves
#' smcurves <- evalmod(mdat, raw_curves = TRUE)
#'
#' ## Convert smcurves to a data frame
#' smcurves.df <- as.data.frame(smcurves)
#'
#'
#' ## Generate an smpoints object that contains basic evaluation measures
#' smpoints <- evalmod(mdat, mode = "basic")
#'
#' ## Convert smpoints to a data frame
#' smpoints.df <- as.data.frame(smpoints)
#'
#'
#' #############################################################################
#' ### Multiple models & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(10, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#'                modnames = samps[["modnames"]],
#'                dsids = samps[["dsids"]])
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mmcurves <- evalmod(mdat, raw_curves = TRUE)
#'
#' ## Convert mmcurves to a data frame
#' mmcurves.df <- as.data.frame(mmcurves)
#'
#' ## Generate an mmpoints object that contains basic evaluation measures
#' mmpoints <- evalmod(mdat, mode = "basic")
#'
#' ## Convert mmpoints to a data frame
#' mmpoints.df <- as.data.frame(mmpoints)
#'
#' @name as.data.frame
NULL

#
# Make a dataframe for plotting
#
.dataframe_common <- function(obj, mode = "rocprc", raw_curves = TRUE,
                              check_ggplot = FALSE, ...) {
  # === Check package availability  ===
  if (check_ggplot) {
    .load_ggplot2()
  }


  # === Validate input arguments ===
  .validate(obj)
  new_mode <- .pmatch_mode(mode)
  .check_mode(new_mode, obj)
  .check_raw_curves(raw_curves, obj)

  # Prepare variables
  uniq_modnames <- attr(obj, "uniq_modnames")
  uniq_dsids <- attr(obj, "uniq_dsids")
  modnames <- attr(obj, "data_info")[["modnames"]]
  dsids <- attr(obj, "data_info")[["dsids"]]

  if (new_mode == "rocprc") {
    curvetype_names <- list(ROC = "rocs", PRC = "prcs")
  } else if (new_mode == "basic") {
    curvetype_names <- list(score = "score", label = "label", error = "err",
                            accuracy = "acc", specificity = "sp",
                            sensitivity = "sn", precision = "prec", mcc = "mcc",
                            fscore = "fscore")
  }

  # Make dsis-modname pairs
  i <- 1
  dsid_modnames <- vector(mode = "character",
                          length = length(uniq_modnames) * length(uniq_dsids))
  for (modname in uniq_modnames) {
    for (dsid in uniq_dsids) {
      dsid_modnames[i] <- paste(modname, dsid, sep = ":")
      i <- i + 1
    }
  }

  # Create curve_df
  if (raw_curves) {
    curve_df <- .dataframe_curve(obj, uniq_modnames, uniq_dsids, modnames,
                               dsids, dsid_modnames, curvetype_names)
  } else {
    curve_df <- .dataframe_curve_avg(obj, uniq_modnames, uniq_dsids, modnames,
                                   dsids, dsid_modnames, curvetype_names)
  }

  if (!check_ggplot) {
    if ("dsid_modname" %in% names(curve_df)) {
      curve_df <- with(curve_df, subset(curve_df, select = -dsid_modname))
    }
    colnum <- ncol(curve_df)
    names(curve_df) <- c(names(curve_df)[1:(colnum-1)], "type")
  }

  curve_df
}

#
# Make a dataframe for plotting with regular curves
#
.dataframe_curve <- function(obj, uniq_modnames, uniq_dsids, modnames, dsids,
                           dsid_modnames, curvetype_names) {

  curve_df <- NULL
  for (curvetype in names(curvetype_names)) {
    curves <- obj[[curvetype_names[[curvetype]]]]
    for (i in seq_along(curves)) {
      x <- curves[[i]][["x"]]
      y <- curves[[i]][["y"]]

      modname <- factor(rep(modnames[i], length(x)), levels = uniq_modnames)
      dsid <- factor(rep(dsids[i], length(x)), levels = uniq_dsids)
      dsid_modname <- factor(rep(paste(modnames[i], dsids[i], sep = ":"),
                                 length(x)),
                             levels = dsid_modnames)
      curvename <- factor(rep(curvetype, length(x)),
                          levels = names(curvetype_names))
      curve_df <- rbind(curve_df, data.frame(x = x, y = y, modname = modname,
                                             dsid = dsid,
                                             dsid_modname = dsid_modname,
                                             curvetype = curvename))
    }
  }

  curve_df
}

#
# Make a dataframe for plotting with average curves
#
.dataframe_curve_avg <- function(obj, uniq_modnames, uniq_dsids, modnames, dsids,
                               dsid_modnames, curvetype_names) {

  grp_avg <- attr(obj, "grp_avg")
  curve_df <- NULL
  for (curvetype in names(curvetype_names)) {
    avgcurves <- grp_avg[[curvetype_names[[curvetype]]]]

    for (i in seq_along(avgcurves)) {
      x <- avgcurves[[i]][["x"]]
      y <- avgcurves[[i]][["y_avg"]]
      ymin <- avgcurves[[i]][["y_ci_l"]]
      ymax <- avgcurves[[i]][["y_ci_h"]]

      modname <- factor(rep(uniq_modnames[i], length(x)),
                        levels = uniq_modnames)
      curvename <- factor(rep(curvetype, length(x)),
                          levels = names(curvetype_names))
      curve_df <- rbind(curve_df, data.frame(x = x, y = y,
                                             ymin = ymin, ymax = ymax,
                                             modname = modname,
                                             curvetype = curvename))
    }
  }

  curve_df
}
back to top