https://github.com/cran/cutpointr
Raw File
Tip revision: 4408233eb8624dea85ecf18e86d50c296165c3f2 authored by Christian Thiele on 13 April 2022, 17:12:29 UTC
version 1.1.2
Tip revision: 4408233
cutpointr_benchmarks.html
<!DOCTYPE html>

<html>

<head>

<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />

<meta name="viewport" content="width=device-width, initial-scale=1" />

<meta name="author" content="Christian Thiele" />

<meta name="date" content="2022-04-13" />

<title>cutpointr benchmarks</title>

<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
  var i, h, a;
  for (i = 0; i < hs.length; i++) {
    h = hs[i];
    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
    a = h.attributes;
    while (a.length > 0) h.removeAttribute(a[0].name);
  }
});
</script>
<script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
// v0.0.1
// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.

document.addEventListener('DOMContentLoaded', function() {
  const codeList = document.getElementsByClassName("sourceCode");
  for (var i = 0; i < codeList.length; i++) {
    var linkList = codeList[i].getElementsByTagName('a');
    for (var j = 0; j < linkList.length; j++) {
      if (linkList[j].innerHTML === "") {
        linkList[j].setAttribute('aria-hidden', 'true');
      }
    }
  }
});
</script>

<style type="text/css">
  code{white-space: pre-wrap;}
  span.smallcaps{font-variant: small-caps;}
  span.underline{text-decoration: underline;}
  div.column{display: inline-block; vertical-align: top; width: 50%;}
  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
  ul.task-list{list-style: none;}
    </style>


<style type="text/css">
  code {
    white-space: pre;
  }
  .sourceCode {
    overflow: visible;
  }
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */

</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
  var sheets = document.styleSheets;
  for (var i = 0; i < sheets.length; i++) {
    if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
    try { var rules = sheets[i].cssRules; } catch (e) { continue; }
    for (var j = 0; j < rules.length; j++) {
      var rule = rules[j];
      // check if there is a div.sourceCode rule
      if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue;
      var style = rule.style.cssText;
      // check if color or background-color is set
      if (rule.style.color === '' && rule.style.backgroundColor === '') continue;
      // replace div.sourceCode by a pre.sourceCode rule
      sheets[i].deleteRule(j);
      sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
    }
  }
})();
</script>




<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap; 
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }

code > span.kw { color: #555; font-weight: bold; } 
code > span.dt { color: #902000; } 
code > span.dv { color: #40a070; } 
code > span.bn { color: #d14; } 
code > span.fl { color: #d14; } 
code > span.ch { color: #d14; } 
code > span.st { color: #d14; } 
code > span.co { color: #888888; font-style: italic; } 
code > span.ot { color: #007020; } 
code > span.al { color: #ff0000; font-weight: bold; } 
code > span.fu { color: #900; font-weight: bold; } 
code > span.er { color: #a61717; background-color: #e3d2d2; } 
</style>




</head>

<body>




<h1 class="title toc-ignore">cutpointr benchmarks</h1>
<h4 class="author">Christian Thiele</h4>
<h4 class="date">2022-04-13</h4>



<p>To offer a comparison to established solutions, <strong>cutpointr</strong> will be benchmarked against <code>optimal.cutpoints</code> from the <strong>OptimalCutpoints</strong> package, <strong>ThresholdROC</strong> and custom functions based on the <strong>ROCR</strong> and <strong>pROC</strong> packages. By generating data of different sizes the benchmarks will offer a comparison of the scalability of the different solutions.</p>
<p>Using <code>prediction</code> and <code>performance</code> from the <strong>ROCR</strong> package and <code>roc</code> from the <strong>pROC</strong> package, we can write functions for computing the cutpoint that maximizes the sum of sensitivity and specificity. <strong>pROC</strong> has a built-in function to optimize a few metrics:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="co"># Return cutpoint that maximizes the sum of sensitivity and specificiy</span></span>
<span id="cb1-2"><a href="#cb1-2"></a><span class="co"># ROCR package</span></span>
<span id="cb1-3"><a href="#cb1-3"></a>rocr_sensspec &lt;-<span class="st"> </span><span class="cf">function</span>(x, class) {</span>
<span id="cb1-4"><a href="#cb1-4"></a>    pred &lt;-<span class="st"> </span>ROCR<span class="op">::</span><span class="kw">prediction</span>(x, class)</span>
<span id="cb1-5"><a href="#cb1-5"></a>    perf &lt;-<span class="st"> </span>ROCR<span class="op">::</span><span class="kw">performance</span>(pred, <span class="st">&quot;sens&quot;</span>, <span class="st">&quot;spec&quot;</span>)</span>
<span id="cb1-6"><a href="#cb1-6"></a>    sens &lt;-<span class="st"> </span><span class="kw">slot</span>(perf, <span class="st">&quot;y.values&quot;</span>)[[<span class="dv">1</span>]]</span>
<span id="cb1-7"><a href="#cb1-7"></a>    spec &lt;-<span class="st"> </span><span class="kw">slot</span>(perf, <span class="st">&quot;x.values&quot;</span>)[[<span class="dv">1</span>]]</span>
<span id="cb1-8"><a href="#cb1-8"></a>    cut &lt;-<span class="st"> </span><span class="kw">slot</span>(perf, <span class="st">&quot;alpha.values&quot;</span>)[[<span class="dv">1</span>]]</span>
<span id="cb1-9"><a href="#cb1-9"></a>    cut[<span class="kw">which.max</span>(sens <span class="op">+</span><span class="st"> </span>spec)]</span>
<span id="cb1-10"><a href="#cb1-10"></a>}</span>
<span id="cb1-11"><a href="#cb1-11"></a></span>
<span id="cb1-12"><a href="#cb1-12"></a><span class="co"># pROC package</span></span>
<span id="cb1-13"><a href="#cb1-13"></a>proc_sensspec &lt;-<span class="st"> </span><span class="cf">function</span>(x, class) {</span>
<span id="cb1-14"><a href="#cb1-14"></a>    r &lt;-<span class="st"> </span>pROC<span class="op">::</span><span class="kw">roc</span>(class, x, <span class="dt">algorithm =</span> <span class="dv">2</span>, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="dt">direction =</span> <span class="st">&quot;&lt;&quot;</span>)</span>
<span id="cb1-15"><a href="#cb1-15"></a>    pROC<span class="op">::</span><span class="kw">coords</span>(r, <span class="st">&quot;best&quot;</span>, <span class="dt">ret=</span><span class="st">&quot;threshold&quot;</span>, <span class="dt">transpose =</span> <span class="ot">FALSE</span>)[<span class="dv">1</span>]</span>
<span id="cb1-16"><a href="#cb1-16"></a>}</span></code></pre></div>
<p>The benchmarking will be carried out using the <strong>microbenchmark</strong> package and randomly generated data. The values of the <code>x</code> predictor variable are drawn from a normal distribution which leads to a lot more unique values than were encountered before in the <code>suicide</code> data. Accordingly, the search for an optimal cutpoint is much more demanding, if all possible cutpoints are evaluated.</p>
<p>Benchmarks are run for sample sizes of 100, 1000, 1e4, 1e5, 1e6, and 1e7. For low sample sizes <strong>cutpointr</strong> is slower than the other solutions. While this should be of low practical importance, <strong>cutpointr</strong> scales more favorably with increasing sample size. The speed disadvantage in small samples that leads to the lower limit of around 25ms is mainly due to the nesting of the original data and the results that makes the compact output of <code>cutpointr</code> possible. This observation is emphasized by the fact that <code>cutpointr::roc</code> is quite fast also in small samples. For sample sizes &gt; 1e5 <strong>cutpointr</strong> is a little faster than the function based on <strong>ROCR</strong> and <strong>pROC</strong>. Both of these solutions are generally faster than <strong>OptimalCutpoints</strong> and <strong>ThresholdROC</strong> with the exception of small samples. <strong>OptimalCutpoints</strong> and <strong>ThresholdROC</strong> had to be excluded from benchmarks with more than 1e4 observations due to high memory requirements and/or excessive run times, rendering the use of these packages in larger samples impractical.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a><span class="co"># ROCR package</span></span>
<span id="cb2-2"><a href="#cb2-2"></a>rocr_roc &lt;-<span class="st"> </span><span class="cf">function</span>(x, class) {</span>
<span id="cb2-3"><a href="#cb2-3"></a>    pred &lt;-<span class="st"> </span>ROCR<span class="op">::</span><span class="kw">prediction</span>(x, class)</span>
<span id="cb2-4"><a href="#cb2-4"></a>    perf &lt;-<span class="st"> </span>ROCR<span class="op">::</span><span class="kw">performance</span>(pred, <span class="st">&quot;sens&quot;</span>, <span class="st">&quot;spec&quot;</span>)</span>
<span id="cb2-5"><a href="#cb2-5"></a>    <span class="kw">return</span>(<span class="ot">NULL</span>)</span>
<span id="cb2-6"><a href="#cb2-6"></a>}</span>
<span id="cb2-7"><a href="#cb2-7"></a></span>
<span id="cb2-8"><a href="#cb2-8"></a><span class="co"># pROC package</span></span>
<span id="cb2-9"><a href="#cb2-9"></a>proc_roc &lt;-<span class="st"> </span><span class="cf">function</span>(x, class) {</span>
<span id="cb2-10"><a href="#cb2-10"></a>    r &lt;-<span class="st"> </span>pROC<span class="op">::</span><span class="kw">roc</span>(class, x, <span class="dt">algorithm =</span> <span class="dv">2</span>, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="dt">direction =</span> <span class="st">&quot;&lt;&quot;</span>)</span>
<span id="cb2-11"><a href="#cb2-11"></a>    <span class="kw">return</span>(<span class="ot">NULL</span>)</span>
<span id="cb2-12"><a href="#cb2-12"></a>}</span></code></pre></div>
<p><img src="" style="display: block; margin: auto;" /></p>
<table>
<thead>
<tr class="header">
<th align="right">n</th>
<th align="left">task</th>
<th align="right">OptimalCutpoints</th>
<th align="right">ROCR</th>
<th align="right">ThresholdROC</th>
<th align="right">cutpointr</th>
<th align="right">pROC</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="right">1e+02</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">2.288702</td>
<td align="right">1.812802</td>
<td align="right">1.194301</td>
<td align="right">4.5018015</td>
<td align="right">0.662101</td>
</tr>
<tr class="even">
<td align="right">1e+03</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">45.056801</td>
<td align="right">2.176401</td>
<td align="right">36.239852</td>
<td align="right">4.8394010</td>
<td align="right">0.981001</td>
</tr>
<tr class="odd">
<td align="right">1e+04</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">2538.612001</td>
<td align="right">5.667101</td>
<td align="right">2503.801251</td>
<td align="right">8.5662515</td>
<td align="right">4.031701</td>
</tr>
<tr class="even">
<td align="right">1e+05</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">NA</td>
<td align="right">43.118751</td>
<td align="right">NA</td>
<td align="right">45.3845010</td>
<td align="right">37.150151</td>
</tr>
<tr class="odd">
<td align="right">1e+06</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">NA</td>
<td align="right">607.023851</td>
<td align="right">NA</td>
<td align="right">465.0032010</td>
<td align="right">583.095000</td>
</tr>
<tr class="even">
<td align="right">1e+07</td>
<td align="left">Cutpoint Estimation</td>
<td align="right">NA</td>
<td align="right">7850.258700</td>
<td align="right">NA</td>
<td align="right">5467.3328010</td>
<td align="right">7339.356101</td>
</tr>
<tr class="odd">
<td align="right">1e+02</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">1.732651</td>
<td align="right">NA</td>
<td align="right">0.7973505</td>
<td align="right">0.447701</td>
</tr>
<tr class="even">
<td align="right">1e+03</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">2.035852</td>
<td align="right">NA</td>
<td align="right">0.8593010</td>
<td align="right">0.694802</td>
</tr>
<tr class="odd">
<td align="right">1e+04</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">5.662151</td>
<td align="right">NA</td>
<td align="right">1.8781510</td>
<td align="right">3.658050</td>
</tr>
<tr class="even">
<td align="right">1e+05</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">42.820852</td>
<td align="right">NA</td>
<td align="right">11.0992510</td>
<td align="right">35.329301</td>
</tr>
<tr class="odd">
<td align="right">1e+06</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">612.471901</td>
<td align="right">NA</td>
<td align="right">159.8100505</td>
<td align="right">610.433700</td>
</tr>
<tr class="even">
<td align="right">1e+07</td>
<td align="left">ROC curve calculation</td>
<td align="right">NA</td>
<td align="right">7806.385452</td>
<td align="right">NA</td>
<td align="right">2032.6935510</td>
<td align="right">7081.897251</td>
</tr>
</tbody>
</table>



<!-- code folding -->


<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>
back to top