https://github.com/cran/precrec
Tip revision: 9c56b91df45d18e10abd76aa1359b1482955fbbc authored by Takaya Saito on 04 December 2015, 15:40:36 UTC
version 0.1.1
version 0.1.1
Tip revision: 9c56b91
etc_utils_fortify.R
#' Convert a curves and points object to a data frame for ggplot2
#'
#' The \code{fortify} function converts an S3 object generated by
#' \code{\link{evalmod}} to a data frame for
#' \code{\link[ggplot2]{ggplot2}}.
#'
#' @param model An S3 object generated by \code{\link{evalmod}}.
#' The \code{fortify} function takes one of the following S3 objects.
#'
#' \enumerate{
#'
#' \item ROC and Precision-Recall curves (mode = "rocprc")
#'
#' \tabular{lllll}{
#' \strong{S3 object}
#' \tab \tab \strong{# of models}
#' \tab \tab \strong{# of test datasets} \cr
#'
#' sscurves \tab \tab single \tab \tab single \cr
#' mscurves \tab \tab multiple \tab \tab single \cr
#' smcurves \tab \tab single \tab \tab multiple \cr
#' mmcurves \tab \tab multiple \tab \tab multiple
#' }
#'
#' \item Basic evaluation measures (mode = "basic")
#'
#' \tabular{lllll}{
#' \strong{S3 object}
#' \tab \tab \strong{# of models}
#' \tab \tab \strong{# of test datasets} \cr
#'
#' sspoints \tab \tab single \tab \tab single \cr
#' mspoints \tab \tab multiple \tab \tab single \cr
#' smpoints \tab \tab single \tab \tab multiple \cr
#' mmpoints \tab \tab multiple \tab \tab multiple
#' }
#' }
#'
#' @param raw_curves A Boolean value to specify whether raw curves are
#' shown instead of the average curve. It is effective only
#' when \code{raw_curves} is set to \code{TRUE}
#' of the \code{\link{evalmod}} function.
#'
#' @param ... Not used by this method.
#'
#' @return The \code{fortify} function returns a data frame for
#' \code{\link[ggplot2]{ggplot2}}.
#'
#' @seealso \code{\link{evalmod}} for generating S3 objects with performance
#' evaluation measures.
#' \code{\link{autoplot}} for plotting with \code{\link[ggplot2]{ggplot2}}.
#'
#' @examples
#'
#' ## Load ggplot2
#' library(ggplot2)
#'
#' #############################################################################
#' ### Single model & single test dataset
#' ###
#'
#' ## Load a dataset with 10 positives and 10 negatives
#' data(P10N10)
#'
#' ## Generate an sscurve object that contains ROC and Precision-Recall curves
#' sscurves <- evalmod(scores = P10N10$scores, labels = P10N10$labels)
#'
#' ## Fortify sscurves
#' ssdf <- fortify(sscurves)
#'
#' ## Plot a ROC curve
#' p_roc <- ggplot(subset(ssdf, curvetype == "ROC"), aes(x = x, y = y))
#' p_roc <- p_roc + geom_line()
#' \dontrun{p_roc}
#'
#' ## Plot a Precision-Recall curve
#' p_prc <- ggplot(subset(ssdf, curvetype == "PRC"), aes(x = x, y = y))
#' p_prc <- p_prc + geom_line()
#' \dontrun{p_prc}
#'
#' ## Generate an sspoints object that contains basic evaluation measures
#' sspoints <- evalmod(mode = "basic", scores = P10N10$scores,
#' labels = P10N10$labels)
#' ## Fortify sspoints
#' ssdf <- fortify(sscurves)
#'
#' ## Plot threshold vs. precision
#' p_prec <- ggplot(subset(ssdf, curvetype == "precision"), aes(x = x, y = y))
#' p_prec <- p_prc + geom_point()
#' \dontrun{p_prec}
#'
#'
#' #############################################################################
#' ### Multiple models & single test dataset
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(1, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#' modnames = samps[["modnames"]])
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mscurves <- evalmod(mdat)
#'
#' ## Fortify mscurves
#' msdf <- fortify(mscurves)
#'
#' ## Plot ROC curve
#' df_roc <- subset(msdf, curvetype == "ROC")
#' p_roc <- ggplot(df_roc, aes(x = x, y = y, color = modname))
#' p_roc <- p_roc + geom_line()
#' \dontrun{p_roc}
#'
#' ## Fortified data frame can be used for plotting a Precision-Recall curve
#' df_prc <- subset(msdf, curvetype == "PRC")
#' p_prc <- ggplot(df_prc, aes(x = x, y = y, color = modname))
#' p_prc <- p_prc + geom_line()
#' \dontrun{p_prc}
#'
#' ## Generate an mspoints object that contains basic evaluation measures
#' mspoints <- evalmod(mdat, mode = "basic")
#'
#' ## Fortify mspoints
#' msdf <- fortify(mscurves)
#'
#' ## Plot threshold vs. precision
#' df_prec <- subset(msdf, curvetype == "precision")
#' p_prec <- ggplot(df_prec, aes(x = x, y = y, color = modname))
#' p_prec <- p_prc + geom_point()
#' \dontrun{p_prec}
#'
#'
#' #############################################################################
#' ### Single model & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(10, 100, 100, "good_er")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#' modnames = samps[["modnames"]],
#' dsids = samps[["dsids"]])
#'
#' ## Generate an smcurve object that contains ROC and Precision-Recall curves
#' smcurves <- evalmod(mdat, raw_curves = TRUE)
#'
#' ## Fortify smcurves
#' smdf <- fortify(smcurves)
#'
#' ## Plot average ROC curve
#' df_roc <- subset(smdf, curvetype == "ROC")
#' p_roc <- ggplot(df_roc, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_roc <- p_roc + geom_smooth(stat = "identity")
#' \dontrun{p_roc}
#'
#' ## Plot average Precision-Recall curve
#' df_prc <- subset(smdf, curvetype == "PRC")
#' p_prc <- ggplot(df_prc, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_prc <- p_prc + geom_smooth(stat = "identity")
#' \dontrun{p_prc}
#'
#' ## Generate an smpoints object that contains basic evaluation measures
#' smpoints <- evalmod(mdat, mode = "basic")
#'
#' ## Fortify smpoints
#' smdf <- fortify(smpoints)
#'
#' ## Plot threshold vs. precision
#' df_prec <- subset(smdf, curvetype == "precision")
#' p_prec <- ggplot(df_prec, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_prec <- p_prec + geom_ribbon(aes(min = ymin, ymax = ymax),
#' stat = "identity", alpha = 0.25,
#' fill = "grey25")
#' p_prec <- p_prec + geom_point(aes(x = x, y = y))
#' \dontrun{p_prec}
#'
#'
#' #############################################################################
#' ### Multiple models & multiple test datasets
#' ###
#'
#' ## Create sample datasets with 100 positives and 100 negatives
#' samps <- create_sim_samples(10, 100, 100, "all")
#' mdat <- mmdata(samps[["scores"]], samps[["labels"]],
#' modnames = samps[["modnames"]],
#' dsids = samps[["dsids"]])
#'
#' ## Generate an mscurve object that contains ROC and Precision-Recall curves
#' mmcurves <- evalmod(mdat, raw_curves = TRUE)
#'
#' ## Fortify mmcurves
#' mmdf <- fortify(mmcurves)
#'
#' ## Plot average ROC curve
#' df_roc <- subset(mmdf, curvetype == "ROC")
#' p_roc <- ggplot(df_roc, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_roc <- p_roc + geom_smooth(aes(color = modname), stat = "identity")
#' \dontrun{p_roc}
#'
#' ## Plot average Precision-Recall curve
#' df_prc <- subset(mmdf, curvetype == "PRC")
#' p_prc <- ggplot(df_prc, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_prc <- p_prc + geom_smooth(aes(color = modname), stat = "identity")
#' \dontrun{p_prc}
#'
#' ## Generate an mmpoints object that contains basic evaluation measures
#' mmpoints <- evalmod(mdat, mode = "basic")
#'
#' ## Fortify mmpoints
#' mmdf <- fortify(mmpoints)
#'
#' ## Plot threshold vs. precision
#' df_prec <- subset(smdf, curvetype == "precision")
#' p_prec <- ggplot(df_prec, aes(x = x, y = y, ymin = ymin, ymax = ymax))
#' p_prec <- p_prec + geom_ribbon(aes(min = ymin, ymax = ymax),
#' stat = "identity", alpha = 0.25,
#' fill = "grey25")
#' p_prec <- p_prec + geom_point(aes(x = x, y = y, color = modname))
#' \dontrun{p_prec}
#'
#' @name fortify
NULL
#
# Make a dataframe for plotting
#
.fortify_common <- function(obj, mode = "rocprc", raw_curves = TRUE, ...) {
# === Check package availability ===
.load_ggplot2()
# === Validate input arguments ===
.validate(obj)
new_mode <- .pmatch_mode(mode)
.check_mode(new_mode, obj)
.check_raw_curves(raw_curves, obj)
# Prepare variables
uniq_modnames <- attr(obj, "uniq_modnames")
uniq_dsids <- attr(obj, "uniq_dsids")
modnames <- attr(obj, "data_info")[["modnames"]]
dsids <- attr(obj, "data_info")[["dsids"]]
if (new_mode == "rocprc") {
curvetype_names <- list(ROC = "rocs", PRC = "prcs")
} else if (new_mode == "basic") {
curvetype_names <- list(error = "err", accuracy = "acc",
specificity = "sp", sensitivity = "sn",
precision = "prec")
}
# Make dsis-modname pairs
i <- 1
dsid_modnames <- vector(mode = "character",
length = length(uniq_modnames) * length(uniq_dsids))
for (modname in uniq_modnames) {
for (dsid in uniq_dsids) {
dsid_modnames[i] <- paste(modname, dsid, sep = ":")
i <- i + 1
}
}
# Create curve_df
if (raw_curves) {
curve_df <- .fortify_curve(obj, uniq_modnames, uniq_dsids, modnames,
dsids, dsid_modnames, curvetype_names)
} else {
curve_df <- .fortify_curve_avg(obj, uniq_modnames, uniq_dsids, modnames,
dsids, dsid_modnames, curvetype_names)
}
curve_df
}
#
# Make a dataframe for plotting with regular curves
#
.fortify_curve <- function(obj, uniq_modnames, uniq_dsids, modnames, dsids,
dsid_modnames, curvetype_names) {
curve_df <- NULL
for (curvetype in names(curvetype_names)) {
curves <- obj[[curvetype_names[[curvetype]]]]
for (i in seq_along(curves)) {
x <- curves[[i]][["x"]]
y <- curves[[i]][["y"]]
modname <- factor(rep(modnames[i], length(x)), levels = uniq_modnames)
dsid <- factor(rep(dsids[i], length(x)), levels = uniq_dsids)
dsid_modname <- factor(rep(paste(modnames[i], dsids[i], sep = ":"),
length(x)),
levels = dsid_modnames)
curvename <- factor(rep(curvetype, length(x)),
levels = names(curvetype_names))
curve_df <- rbind(curve_df, data.frame(x = x, y = y, modname = modname,
dsid = dsid,
dsid_modname = dsid_modname,
curvetype = curvename))
}
}
curve_df
}
#
# Make a dataframe for plotting with average curves
#
.fortify_curve_avg <- function(obj, uniq_modnames, uniq_dsids, modnames, dsids,
dsid_modnames, curvetype_names) {
grp_avg <- attr(obj, "grp_avg")
curve_df <- NULL
for (curvetype in names(curvetype_names)) {
avgcurves <- grp_avg[[curvetype_names[[curvetype]]]]
for (i in seq_along(avgcurves)) {
x <- avgcurves[[i]][["x"]]
y <- avgcurves[[i]][["y_avg"]]
ymin <- avgcurves[[i]][["y_ci_l"]]
ymax <- avgcurves[[i]][["y_ci_h"]]
modname <- factor(rep(uniq_modnames[i], length(x)),
levels = uniq_modnames)
curvename <- factor(rep(curvetype, length(x)),
levels = names(curvetype_names))
curve_df <- rbind(curve_df, data.frame(x = x, y = y,
ymin = ymin, ymax = ymax,
modname = modname,
curvetype = curvename))
}
}
curve_df
}