Revision 94a7e298b1a50d93e8a9ccb813a070f7b30f3da1 authored by Christian Thiele on 21 March 2018, 08:27:24 UTC, committed by cran-robot on 21 March 2018, 08:27:24 UTC
0 parent
maximize_spline_metric.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/optimize_metric.R
\name{maximize_spline_metric}
\alias{maximize_spline_metric}
\alias{minimize_spline_metric}
\title{Optimize a metric function in binary classification after spline smoothing}
\usage{
maximize_spline_metric(data, x, class, metric_func = youden,
pos_class = NULL, neg_class = NULL, direction, w = NULL, df = NULL,
spar = 1, nknots = cutpoint_knots, df_offset = NULL, penalty = 1,
control_spar = list(), tol_metric, use_midpoints, ...)
minimize_spline_metric(data, x, class, metric_func = youden,
pos_class = NULL, neg_class = NULL, direction, w = NULL, df = NULL,
spar = 1, nknots = cutpoint_knots, df_offset = NULL, penalty = 1,
control_spar = list(), tol_metric, use_midpoints, ...)
}
\arguments{
\item{data}{A data frame or tibble in which the columns that are given in x
and class can be found.}
\item{x}{(character) The variable name to be used for classification,
e.g. predictions or test values.}
\item{class}{(character) The variable name indicating class membership.}
\item{metric_func}{(function) A function that computes a
metric to be optimized. See description.}
\item{pos_class}{The value of class that indicates the positive class.}
\item{neg_class}{The value of class that indicates the negative class.}
\item{direction}{(character) Use ">=" or "<=" to select whether an x value
>= or <= the cutoff predicts the positive class.}
\item{w}{Optional vector of weights of the same length as x; defaults to all 1.}
\item{df}{The desired equivalent number of degrees of freedom
(trace of the smoother matrix). Must be in (1,nx], nx the number of
unique x values.}
\item{spar}{Smoothing parameter, typically (but not necessarily) in (0,1].
When spar is specified, the coefficient lambda of the integral of the squared
second derivative in the fit (penalized log likelihood) criterion is a
monotone function of spar.}
\item{nknots}{Integer or function giving the number of knots. The function
should accept data and x (the name of the predictor variable) as inputs.
By default nknots = 0.1 * log(n_dat / n_cut) * n_cut where n_dat is the
number of observations and n_cut the number of unique predictor values.}
\item{df_offset}{Allows the degrees of freedom to be increased by df_offset
in the GCV criterion.}
\item{penalty}{The coefficient of the penalty for degrees of freedom in the
GCV criterion.}
\item{control_spar}{Optional list with named components controlling the root
finding when the smoothing parameter spar is computed, i.e., NULL. See
help("smooth.spline") for further information.}
\item{tol_metric}{All cutpoints will be returned that lead to a metric
value in the interval [m_max - tol_metric, m_max + tol_metric] where
m_max is the maximum achievable metric value. This can be used to return
multiple decent cutpoints and to avoid floating-point problems.}
\item{use_midpoints}{(logical) If TRUE (default FALSE) the returned optimal
cutpoint will be the mean of the optimal cutpoint and the next highest
observation (for direction = ">") or the next lowest observation
(for direction = "<") which avoids biasing the optimal cutpoint.}
\item{...}{Further arguments that will be passed to metric_func.}
}
\value{
A tibble with the columns \code{optimal_cutpoint}, the corresponding metric
value and \code{roc_curve}, a nested tibble that includes all possible cutoffs
and the corresponding numbers of true and false positives / negatives and
all corresponding metric values.
}
\description{
Given a function for computing a metric in \code{metric_func}, this function
smoothes the function of metric value per cutpoint using smoothing splines. Then it
optimizes the metric by selecting an optimal cutpoint. For further details
on the smoothing spline see \code{?stats::smooth.spline}.
The \code{metric} function should accept the following inputs:
\itemize{
\item \code{tp}: vector of number of true positives
\item \code{fp}: vector of number of false positives
\item \code{tn}: vector of number of true negatives
\item \code{fn}: vector of number of false negatives
}
}
\details{
The above inputs are arrived at by using all unique values in \code{x}, Inf, and
-Inf as possible cutpoints for classifying the variable in class.
}
\examples{
oc <- cutpointr(suicide, dsi, suicide, gender, method = maximize_spline_metric,
df = 5, metric = accuracy)
plot_metric(oc)
}
\seealso{
Other method functions: \code{\link{maximize_boot_metric}},
\code{\link{maximize_gam_metric}},
\code{\link{maximize_loess_metric}},
\code{\link{maximize_metric}}, \code{\link{oc_manual}},
\code{\link{oc_mean}}, \code{\link{oc_median}},
\code{\link{oc_youden_kernel}},
\code{\link{oc_youden_normal}}
}
Computing file changes ...