1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
optimize_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL, minmax,
                            direction, metric_name = "metric", loess = FALSE,
                            spline = FALSE, gam = FALSE, return_roc = TRUE,
                            tol_metric, use_midpoints, ...) {
    args <- list(...)
    metric_name_call <- as.character(substitute(metric_func))
    if (metric_name_call != "metric_func") metric_name <- metric_name_call
    roccurve <- roc(data = data, x = !!x, class = !!class, pos_class = pos_class,
                    neg_class = neg_class, direction = direction)
    m <- metric_func(tp = roccurve[["tp"]], fp = roccurve[["fp"]],
                     tn = roccurve[["tn"]], fn = roccurve[["fn"]], ...)
    m <- sanitize_metric(m, m_name = metric_name, n = nrow(roccurve))
    roccurve$m <- as.numeric(m)
    if (!is.null(colnames(m))) metric_name <- colnames(m)
    if (loess) {
        roccurve$m_unsmoothed <- roccurve$m
        finite_x <- is.finite(roccurve$x.sorted)
        finite_m <- is.finite(roccurve$m)
        finite_roc <- (finite_x + finite_m) == 2
        mod <- fANCOVA::loess.as(x = roccurve$x.sorted[finite_roc],
                        y = roccurve$m[finite_roc],
                        criterion = args$criterion, degree = args$degree,
                        family = args$family, user.span = args$user.span)
        roccurve$m <- NA
        roccurve$m[finite_roc] <- mod$fitted
    }
    if (spline) {
        roccurve$m_unsmoothed <- roccurve$m
        finite_x <- is.finite(roccurve$x.sorted)
        finite_m <- is.finite(roccurve$m)
        finite_roc <- (finite_x + finite_m) == 2
        if (is.function(args$nknots)) {
            nknots <- args$nknots(data = data, x = x)
            message(paste("nknots:", nknots))
        } else if (is.numeric(args$nknots)) {
            nknots <- args$nknots
        }
        if (!is.null(args[["df"]])) {
            mod <- stats::smooth.spline(x = roccurve$x.sorted[finite_roc],
                                        y = roccurve$m[finite_roc],
                                        w = args[["w"]], spar = args[["spar"]],
                                        cv = FALSE, df = args[["df"]],
                                        nknots = nknots, df.offset = args[["df_offset"]],
                                        penalty = args[["penalty"]],
                                        control.spar = args[["control_spar"]],
                                        tol = 1e-100)
        } else {
            mod <- stats::smooth.spline(x = roccurve$x.sorted[finite_roc],
                                        y = roccurve$m[finite_roc],
                                        w = args[["w"]], spar = args[["spar"]],
                                        cv = FALSE,
                                        nknots = nknots, df.offset = args[["df_offset"]],
                                        penalty = args[["penalty"]],
                                        control.spar = args[["control_spar"]],
                                        tol = 1e-100)
        }
        # smooth.spline builds bins of x values that are closer to each other
        # than tol, so not all original x values may be returned
        if (length(mod$x) != sum(finite_roc)) {
            stop("Not all original x values returned by smooth.spline")
        }
        roccurve$m <- NA
        roccurve$m[finite_roc] <- rev(mod$y)
    }
    if (gam) {
        roccurve$m_unsmoothed <- roccurve$m
        finite_x <- is.finite(roccurve$x.sorted)
        finite_m <- is.finite(roccurve$m)
        finite_roc <- (finite_x + finite_m) == 2
        mod <- mgcv::gam(stats::as.formula(paste(paste0(args$formula[2], " ~"),
                                                 args$formula[3])),
                         data = roccurve[finite_roc, ],
                         optimizer = args[["optimizer"]])
        roccurve$m <- NA
        roccurve$m[finite_roc] <- mod$fitted.values
    }
    if (minmax == "max") {
        m_vals <- stats::na.omit(roccurve$m)
        if (length(m_vals) == 0) stop("All metric values are missing.")
        max_m <- max(m_vals)
        opt <- which((roccurve$m >= (max_m - tol_metric)) &
                         (roccurve$m <= (max_m + tol_metric)))
        oc <- roccurve[["x.sorted"]][opt]
        m_oc <- max_m
    } else if (minmax == "min") {
        m_vals <- stats::na.omit(roccurve$m)
        if (length(m_vals) == 0) stop("All metric values are missing.")
        min_m <- min(m_vals)
        opt <- which((roccurve$m >= (min_m - tol_metric)) &
                         (roccurve$m <= (min_m + tol_metric)))
        oc <- roccurve[["x.sorted"]][opt]
        m_oc <- min_m
    }
    if (length(oc) > 1) {
        res <- tibble::tibble(optimal_cutpoint = list(oc))
        if (use_midpoints) {
            res$optimal_cutpoint <- list(midpoint(oc = unlist(res$optimal_cutpoint),
                                                  x = unlist(data[, x], use.names = FALSE),
                                                  direction = direction))
        }
    } else {
        res <- tibble::tibble(optimal_cutpoint = oc)
        if (use_midpoints) {
            res$optimal_cutpoint <- midpoint(oc = unlist(res$optimal_cutpoint),
                                             x = unlist(data[, x], use.names = FALSE),
                                             direction = direction)
        }
    }
    if (return_roc) {
        res$roc_curve <- list(roccurve)
    }
    if (loess) metric_name <- paste0("loess_", metric_name)
    if (spline) metric_name <- paste0("spline_", metric_name)
    if (gam) metric_name <- paste0("gam_", metric_name)
    res[, metric_name] <- m_oc
    return(res)
}

#' Optimize a metric function in binary classification
#'
#' Given a function for computing a metric in \code{metric_func}, these functions
#' maximize or minimize that metric by selecting an optimal cutpoint.
#' The metric function should accept the following inputs:
#' \itemize{
#'  \item \code{tp}: vector of number of true positives
#'  \item \code{fp}: vector of number of false positives
#'  \item \code{tn}: vector of number of true negatives
#'  \item \code{fn}: vector of number of false negatives
#' }
#'
#' The above inputs are arrived at by using all unique values in \code{x}, Inf, or
#' -Inf as possible cutpoints for classifying the variable in class.
#'
#' @return A tibble with the columns \code{optimal_cutpoint}, the corresponding metric
#' value and \code{roc_curve}, a nested tibble that includes all possible cutoffs
#' and the corresponding numbers of true and false positives / negatives and
#' all corresponding metric values.
#'
#' @inheritParams oc_youden_normal
#' @param metric_func (function) A function that computes a
#' metric to be maximized. See description.
#' @param tol_metric All cutpoints will be returned that lead to a metric
#' value in the interval [m_max - tol_metric, m_max + tol_metric] where
#' m_max is the maximum achievable metric value. This can be used to return
#' multiple decent cutpoints and to avoid floating-point problems.
#' @param use_midpoints (logical) If TRUE (default FALSE) the returned optimal
#' cutpoint will be the mean of the optimal cutpoint and the next highest
#' observation (for direction = ">") or the next lowest observation
#' (for direction = "<") which avoids biasing the optimal cutpoint.
#' @param ... Further arguments that will be passed to \code{metric_func}.
#' @examples
#' cutpointr(suicide, dsi, suicide, method = maximize_metric, metric = accuracy)
#' @name maximize_metric
#' @family method functions
#' @export
maximize_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL,
                            direction, tol_metric,
                            use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "max",
                    direction = direction, metric_name = metric_name,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' @examples
#' cutpointr(suicide, dsi, suicide, method = minimize_metric, metric = abs_d_sens_spec)
#' @rdname maximize_metric
#' @export
minimize_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL,
                            direction, tol_metric, use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "min",
                    direction = direction, metric_name = metric_name,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' Optimize a metric function in binary classification after LOESS smoothing
#'
#' Given a function for computing a metric in \code{metric_func}, these functions
#' smooth the function of metric value per cutpoint using LOESS, then
#' maximize or minimize the metric by selecting an optimal cutpoint. For further details
#' on the LOESS smoothing see \code{?fANCOVA::loess.as}.
#' The \code{metric} function should accept the following inputs:
#' \itemize{
#'  \item \code{tp}: vector of number of true positives
#'  \item \code{fp}: vector of number of false positives
#'  \item \code{tn}: vector of number of true negatives
#'  \item \code{fn}: vector of number of false negatives
#' }
#'
#' The above inputs are arrived at by using all unique values in \code{x}, Inf, and
#' -Inf as possible cutpoints for classifying the variable in class.
#'
#' @return A tibble with the columns \code{optimal_cutpoint}, the corresponding metric
#' value and \code{roc_curve}, a nested tibble that includes all possible cutoffs
#' and the corresponding numbers of true and false positives / negatives and
#' all corresponding metric values.
#'
#' @inheritParams oc_youden_normal
#' @param metric_func (function) A function that computes a
#' metric to be maximized. See description.
#' @param criterion the criterion for automatic smoothing parameter selection:
#'  "aicc" denotes bias-corrected AIC criterion, "gcv" denotes generalized
#'  cross-validation.
#' @param degree the degree of the local polynomials to be used. It can be
#'  0, 1 or 2.
#' @param family if "gaussian" fitting is by least-squares, and if "symmetric"
#'  a re-descending M estimator is used with Tukey's biweight function.
#' @param user.span The user-defined parameter which controls the degree of
#' smoothing
#' @param tol_metric All cutpoints will be returned that lead to a metric
#' value in the interval [m_max - tol_metric, m_max + tol_metric] where
#' m_max is the maximum achievable metric value. This can be used to return
#' multiple decent cutpoints and to avoid floating-point problems.
#' @param use_midpoints (logical) If TRUE (default FALSE) the returned optimal
#' cutpoint will be the mean of the optimal cutpoint and the next highest
#' observation (for direction = ">") or the next lowest observation
#' (for direction = "<") which avoids biasing the optimal cutpoint.
#' @param ... Further arguments that will be passed to metric_func or the
#' loess smoother.
#'
#' @source Xiao-Feng Wang (2010). fANCOVA: Nonparametric Analysis of Covariance.
#'  https://CRAN.R-project.org/package=fANCOVA
#' @source Leeflang, M. M., Moons, K. G., Reitsma, J. B., & Zwinderman, A. H.
#' (2008). Bias in sensitivity and specificity caused by data-driven selection
#' of optimal cutoff values: mechanisms, magnitude, and solutions.
#' Clinical Chemistry, (4), 729–738.
#' @examples
#' oc <- cutpointr(suicide, dsi, suicide, gender, method = maximize_loess_metric,
#' criterion = "aicc", family = "symmetric", degree = 2, user.span = 0.7,
#' metric = accuracy)
#' plot_metric(oc)
#' @name maximize_loess_metric
#' @family method functions
#' @export
maximize_loess_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL, direction,
                            criterion = "aicc", degree = 1, family = "symmetric",
                            user.span = NULL, tol_metric, use_midpoints, ...) {
    if (!requireNamespace("fANCOVA", quietly = TRUE)) {
        stop("fANCOVA package has to be installed to use LOESS smoothing.")
    }
    metric_name <- as.character(substitute(metric_func))
    if (is.function(user.span)) {
        us <- user.span(data = data, x = x)
    } else {
        us <- user.span
    }
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "max",
                    direction = direction, metric_name = metric_name,
                    criterion = criterion, degree = degree, family = family,
                    user.span = us, loess = TRUE,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' @examples
#' oc <- cutpointr(suicide, dsi, suicide, gender, method = minimize_loess_metric,
#' criterion = "aicc", family = "symmetric", degree = 2, user.span = 0.7,
#' metric = misclassification_cost, cost_fp = 1, cost_fn = 10)
#' plot_metric(oc)
#' @rdname maximize_loess_metric
#' @export
minimize_loess_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL, direction,
                            criterion = "aicc", degree = 1, family = "symmetric",
                            user.span = NULL, tol_metric, use_midpoints, ...) {
    if (!requireNamespace("fANCOVA", quietly = TRUE)) {
        stop("fANCOVA package has to be installed to use LOESS smoothing.")
    }
    metric_name <- as.character(substitute(metric_func))
    if (is.function(user.span)) {
        us <- user.span(data = data, x = x)
    } else {
        us <- user.span
    }
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "min",
                    direction = direction, metric_name = metric_name,
                    criterion = criterion, degree = degree, family = family,
                    loess = TRUE, user.span = us,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' Calculate bandwidth for LOESS smoothing of metric functions by rule of thumb
#'
#' This function implements a rule of thumb for selecting the bandwidth when
#' smoothing a function of metric values per cutpoint value, particularly
#' in \code{maximize_loess_metric} and \code{minimize_loess_metric}.
#'
#' The function used for calculating the bandwidth is 0.1 * xsd / sqrt(xn),
#' where xsd is the standard deviation of the unique values of the predictor
#' variable (i.e. all cutpoints) and xn is the number of unique predictor values.
#'
#' @param data A data frame
#' @param x The predictor variable
user_span_cutpointr <- function(data, x) {
    finite_x <- is.finite(data[[x]])
    xsd <- stats::sd(data[[x]][finite_x], na.rm = TRUE)
    xn <- length(unique(data[[x]][finite_x]))

    print(0.1 * xsd / sqrt(xn))
    return(0.1 * xsd / sqrt(xn))
}

#' Optimize a metric function in binary classification after bootstrapping
#'
#' Given a function for computing a metric in \code{metric_func}, these functions
#' bootstrap the data \code{boot_cut} times and
#' maximize or minimize the metric by selecting an optimal cutpoint. The returned
#' optimal cutpoint is the result of applying \code{summary_func}, e.g. the mean,
#' to all optimal cutpoints that were determined in the bootstrap samples.
#' The \code{metric} function should accept the following inputs:
#' \itemize{
#'  \item \code{tp}: vector of number of true positives
#'  \item \code{fp}: vector of number of false positives
#'  \item \code{tn}: vector of number of true negatives
#'  \item \code{fn}: vector of number of false negatives
#' }
#'
#' The above inputs are arrived at by using all unique values in \code{x}, Inf, and
#' -Inf as possible cutpoints for classifying the variable in class.
#' The reported metric represents the usual in-sample performance of the
#' determined cutpoint.
#'
#' @return A tibble with the column \code{optimal_cutpoint}
#'
#' @inheritParams oc_youden_normal
#' @param metric_func (function) A function that computes a single number
#' metric to be maximized. See description.
#' @param summary_func (function) After obtaining the bootstrapped optimal
#' cutpoints this function, e.g. mean or median, is applied to arrive at a single cutpoint.
#' @param boot_cut (numeric) Number of bootstrap repetitions over which the mean
#' optimal cutpoint is calculated.
#' @param inf_rm (logical) whether to remove infinite cutpoints before
#' calculating the summary.
#' @param tol_metric All cutpoints will be passed to \code{summary_func}
#' that lead to a metric
#' value in the interval [m_max - tol_metric, m_max + tol_metric] where
#' m_max is the maximum achievable metric value. This can be used to return
#' multiple decent cutpoints and to avoid floating-point problems.
#' @param use_midpoints (logical) If TRUE (default FALSE) the returned optimal
#' cutpoint will be the mean of the optimal cutpoint and the next highest
#' observation (for direction = ">") or the next lowest observation
#' (for direction = "<") which avoids biasing the optimal cutpoint.
#'
#' @examples
#' set.seed(100)
#' cutpointr(suicide, dsi, suicide, method = maximize_boot_metric,
#'           metric = accuracy, boot_cut = 30)
#' @name maximize_boot_metric
#' @family method functions
#' @export
maximize_boot_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL, direction,
                            summary_func = mean, boot_cut = 50, inf_rm = TRUE,
                            tol_metric, use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimal_cutpoints <- purrr::map(1:boot_cut, function(i) {
        b_ind <- simple_boot(data = data, dep_var = class, stratify = FALSE)
        opt_cut <- optimize_metric(data = data[b_ind, ],
                                   x = x, class = class,
                                   metric_func = metric_func,
                                   pos_class = pos_class, neg_class = neg_class,
                                   minmax = "max", direction = direction,
                                   metric_name = metric_name, return_roc = FALSE,
                                   tol_metric = tol_metric,
                                   use_midpoints = use_midpoints, ...)
        return(unlist(opt_cut$optimal_cutpoint))
    })
    optimal_cutpoints <- unlist(optimal_cutpoints)
    if (inf_rm) optimal_cutpoints <- optimal_cutpoints[is.finite(optimal_cutpoints)]
    return(data.frame(optimal_cutpoint = summary_func(optimal_cutpoints)))
}

#' @rdname maximize_boot_metric
#' @examples
#' set.seed(100)
#' cutpointr(suicide, dsi, suicide, method = minimize_boot_metric,
#'           metric = abs_d_sens_spec, boot_cut = 30)
#' @export
minimize_boot_metric <- function(data, x, class, metric_func = youden,
                            pos_class = NULL, neg_class = NULL, direction,
                            summary_func = mean, boot_cut = 50, inf_rm = TRUE,
                            tol_metric, use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimal_cutpoints <- purrr::map(1:boot_cut, function(i) {
        b_ind <- simple_boot(data = data, dep_var = class, stratify = FALSE)
        opt_cut <- optimize_metric(data = data[b_ind, ],
                                   x = x, class = class,
                                   metric_func = metric_func,
                                   pos_class = pos_class, neg_class = neg_class,
                                   minmax = "min", direction = direction,
                                   metric_name = metric_name, return_roc = FALSE,
                                   tol_metric = tol_metric,
                                   use_midpoints = use_midpoints, ...)
        return(unlist(opt_cut$optimal_cutpoint))
    })
    optimal_cutpoints <- unlist(optimal_cutpoints)
    if (inf_rm) optimal_cutpoints <- optimal_cutpoints[is.finite(optimal_cutpoints)]
    return(data.frame(optimal_cutpoint = summary_func(optimal_cutpoints)))
}



#' Optimize a metric function in binary classification after spline smoothing
#'
#' Given a function for computing a metric in \code{metric_func}, this function
#' smoothes the function of metric value per cutpoint using smoothing splines. Then it
#' optimizes the metric by selecting an optimal cutpoint. For further details
#' on the smoothing spline see \code{?stats::smooth.spline}.
#' The \code{metric} function should accept the following inputs:
#' \itemize{
#'  \item \code{tp}: vector of number of true positives
#'  \item \code{fp}: vector of number of false positives
#'  \item \code{tn}: vector of number of true negatives
#'  \item \code{fn}: vector of number of false negatives
#' }
#'
#' The above inputs are arrived at by using all unique values in \code{x}, Inf, and
#' -Inf as possible cutpoints for classifying the variable in class.
#'
#' @return A tibble with the columns \code{optimal_cutpoint}, the corresponding metric
#' value and \code{roc_curve}, a nested tibble that includes all possible cutoffs
#' and the corresponding numbers of true and false positives / negatives and
#' all corresponding metric values.
#'
#' @inheritParams oc_youden_normal
#' @param metric_func (function) A function that computes a
#' metric to be optimized. See description.
#' @param w Optional vector of weights of the same length as x; defaults to all 1.
#' @param df The desired equivalent number of degrees of freedom
#' (trace of the smoother matrix). Must be in (1,nx], nx the number of
#' unique x values.
#' @param spar Smoothing parameter, typically (but not necessarily) in (0,1].
#' When spar is specified, the coefficient lambda of the integral of the squared
#' second derivative in the fit (penalized log likelihood) criterion is a
#' monotone function of spar.
#' @param nknots Integer or function giving the number of knots. The function
#' should accept data and x (the name of the predictor variable) as inputs.
#' By default nknots = 0.1 * log(n_dat / n_cut) * n_cut where n_dat is the
#' number of observations and n_cut the number of unique predictor values.
#' @param df_offset Allows the degrees of freedom to be increased by df_offset
#' in the GCV criterion.
#' @param penalty The coefficient of the penalty for degrees of freedom in the
#' GCV criterion.
#' @param control_spar Optional list with named components controlling the root
#' finding when the smoothing parameter spar is computed, i.e., NULL. See
#' help("smooth.spline") for further information.
#' @param tol_metric All cutpoints will be returned that lead to a metric
#' value in the interval [m_max - tol_metric, m_max + tol_metric] where
#' m_max is the maximum achievable metric value. This can be used to return
#' multiple decent cutpoints and to avoid floating-point problems.
#' @param use_midpoints (logical) If TRUE (default FALSE) the returned optimal
#' cutpoint will be the mean of the optimal cutpoint and the next highest
#' observation (for direction = ">") or the next lowest observation
#' (for direction = "<") which avoids biasing the optimal cutpoint.
#' @param ... Further arguments that will be passed to metric_func.
#'
#' @examples
#' oc <- cutpointr(suicide, dsi, suicide, gender, method = maximize_spline_metric,
#' df = 5, metric = accuracy)
#' plot_metric(oc)
#' @export
#' @family method functions
#' @name maximize_spline_metric
maximize_spline_metric <- function(data, x, class, metric_func = youden,
                                   pos_class = NULL, neg_class = NULL, direction,
                                   w = NULL, df = NULL, spar = 1,
                                   nknots = cutpoint_knots, df_offset = NULL,
                                   penalty = 1, control_spar = list(),
                                   tol_metric, use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "max",
                    direction = direction, metric_name = metric_name,
                    w = w, df = df, spar = spar, nknots = nknots,
                    df_offset = df_offset, penalty = penalty,
                    control_spar = control_spar, spline = TRUE,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' @rdname maximize_spline_metric
#' @export
minimize_spline_metric <- function(data, x, class, metric_func = youden,
                                   pos_class = NULL, neg_class = NULL, direction,
                                   w = NULL, df = NULL, spar = 1,
                                   nknots = cutpoint_knots,
                                   df_offset = NULL, penalty = 1,
                                   control_spar = list(), tol_metric,
                                   use_midpoints, ...) {
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "min",
                    direction = direction, metric_name = metric_name,
                    w = w, df = df, spar = spar, nknots = nknots,
                    df_offset = df_offset, penalty = penalty,
                    control_spar = control_spar, spline = TRUE,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' Calculate number of knots to use in spline smoothing
#'
#' This function calculates the number of knots
#' when using smoothing splines for smoothing a function of metric values per
#' cutpoint value. The function for calculating the number of knots is equal
#' to \code{stats::.nknots_smspl} but uses the number of unique cutpoints
#' in the data as n.
#'
#' @param data A data frame
#' @param x (character) The name of the predictor variable
#' @examples
#' cutpoint_knots(suicide, "dsi")
#' @export
cutpoint_knots <- function(data, x) {
    n_cut <- length(unique(data[[x]]))
    n_knots <- stats::.nknots.smspl(n_cut)
    return(n_knots)
}

#' Optimize a metric function in binary classification after smoothing via
#' generalized additive models
#'
#' Given a function for computing a metric in \code{metric_func}, these functions
#' smooth the function of metric value per cutpoint using generalized additive
#' models (as implemented in \pkg{mgcv}), then
#' maximize or minimize the metric by selecting an optimal cutpoint. For further details
#' on the GAM smoothing see \code{?mgcv::gam}.
#' The \code{metric} function should accept the following inputs:
#' \itemize{
#'  \item \code{tp}: vector of number of true positives
#'  \item \code{fp}: vector of number of false positives
#'  \item \code{tn}: vector of number of true negatives
#'  \item \code{fn}: vector of number of false negatives
#' }
#'
#' The above inputs are arrived at by using all unique values in \code{x}, Inf, and
#' -Inf as possible cutpoints for classifying the variable in class.
#'
#' @return A tibble with the columns \code{optimal_cutpoint}, the corresponding metric
#' value and \code{roc_curve}, a nested tibble that includes all possible cutoffs
#' and the corresponding numbers of true and false positives / negatives and
#' all corresponding metric values.
#'
#' @inheritParams oc_youden_normal
#' @param metric_func (function) A function that computes a
#' metric to be maximized. See description.
#' @param formula A GAM formula. See \code{help("gam", package = "mgcv")} for
#' details.
#' @param optimizer An array specifying the numerical optimization method to
#' use to optimize the smoothing parameter estimation criterion (given by method).
#' See \code{help("gam", package = "mgcv")} for details.
#' @param tol_metric All cutpoints will be returned that lead to a metric
#' value in the interval [m_max - tol_metric, m_max + tol_metric] where
#' m_max is the maximum achievable metric value. This can be used to return
#' multiple decent cutpoints and to avoid floating-point problems.
#' @param use_midpoints (logical) If TRUE (default FALSE) the returned optimal
#' cutpoint will be the mean of the optimal cutpoint and the next highest
#' observation (for direction = ">") or the next lowest observation
#' (for direction = "<") which avoids biasing the optimal cutpoint.
#' @param ... Further arguments that will be passed to metric_func or the
#' GAM smoother.
#'
#' @examples
#' oc <- cutpointr(suicide, dsi, suicide, gender, method = maximize_gam_metric,
#' metric = accuracy)
#' plot_metric(oc)
#' @name maximize_gam_metric
#' @family method functions
#' @export
maximize_gam_metric <- function(data, x, class, metric_func = youden,
                                pos_class = NULL, neg_class = NULL, direction,
                                formula = m ~ s(x.sorted),
                                optimizer = c("outer", "newton"),
                                tol_metric, use_midpoints, ...) {
    if (!requireNamespace("mgcv", quietly = TRUE)) {
        stop("mgcv package has to be installed to use GAM smoothing.")
    }
    formula <- as.character(formula)
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "max",
                    direction = direction, metric_name = metric_name,
                    formula = formula, optimizer = optimizer, gam = TRUE,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}

#' @examples
#' oc <- cutpointr(suicide, dsi, suicide, gender, method = minimize_gam_metric,
#' metric = abs_d_sens_spec)
#' plot_metric(oc)
#' @rdname maximize_gam_metric
#' @export
minimize_gam_metric <- function(data, x, class, metric_func = youden,
                                pos_class = NULL, neg_class = NULL, direction,
                                formula = m ~ s(x.sorted),
                                optimizer = c("outer", "newton"),
                                tol_metric, use_midpoints, ...) {
    if (!requireNamespace("mgcv", quietly = TRUE)) {
        stop("mgcv package has to be installed to use GAM smoothing.")
    }
    metric_name <- as.character(substitute(metric_func))
    optimize_metric(data = data, x = x, class = class, metric_func = metric_func,
                    pos_class = pos_class, neg_class = neg_class, minmax = "min",
                    direction = direction, metric_name = metric_name,
                    formula = formula, optimizer = optimizer, gam = TRUE,
                    tol_metric = tol_metric, use_midpoints = use_midpoints, ...)
}