https://github.com/cran/bayestestR
Revision 23ea3229abe72a5f23dcf3a4cfcd3478d744b536 authored by Dominique Makowski on 20 June 2019, 11:50:03 UTC, committed by cran-robot on 20 June 2019, 11:50:03 UTC
1 parent 9985109
Raw File
Tip revision: 23ea3229abe72a5f23dcf3a4cfcd3478d744b536 authored by Dominique Makowski on 20 June 2019, 11:50:03 UTC
version 0.2.2
Tip revision: 23ea322
probability_of_direction.html
<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<meta name="viewport" content="width=device-width, initial-scale=1">

<style type="text/css">
@font-face {
font-family: octicons-link;
src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAZwABAAAAAACFQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEU0lHAAAGaAAAAAgAAAAIAAAAAUdTVUIAAAZcAAAACgAAAAoAAQAAT1MvMgAAAyQAAABJAAAAYFYEU3RjbWFwAAADcAAAAEUAAACAAJThvmN2dCAAAATkAAAABAAAAAQAAAAAZnBnbQAAA7gAAACyAAABCUM+8IhnYXNwAAAGTAAAABAAAAAQABoAI2dseWYAAAFsAAABPAAAAZwcEq9taGVhZAAAAsgAAAA0AAAANgh4a91oaGVhAAADCAAAABoAAAAkCA8DRGhtdHgAAAL8AAAADAAAAAwGAACfbG9jYQAAAsAAAAAIAAAACABiATBtYXhwAAACqAAAABgAAAAgAA8ASm5hbWUAAAToAAABQgAAAlXu73sOcG9zdAAABiwAAAAeAAAAME3QpOBwcmVwAAAEbAAAAHYAAAB/aFGpk3jaTY6xa8JAGMW/O62BDi0tJLYQincXEypYIiGJjSgHniQ6umTsUEyLm5BV6NDBP8Tpts6F0v+k/0an2i+itHDw3v2+9+DBKTzsJNnWJNTgHEy4BgG3EMI9DCEDOGEXzDADU5hBKMIgNPZqoD3SilVaXZCER3/I7AtxEJLtzzuZfI+VVkprxTlXShWKb3TBecG11rwoNlmmn1P2WYcJczl32etSpKnziC7lQyWe1smVPy/Lt7Kc+0vWY/gAgIIEqAN9we0pwKXreiMasxvabDQMM4riO+qxM2ogwDGOZTXxwxDiycQIcoYFBLj5K3EIaSctAq2kTYiw+ymhce7vwM9jSqO8JyVd5RH9gyTt2+J/yUmYlIR0s04n6+7Vm1ozezUeLEaUjhaDSuXHwVRgvLJn1tQ7xiuVv/ocTRF42mNgZGBgYGbwZOBiAAFGJBIMAAizAFoAAABiAGIAznjaY2BkYGAA4in8zwXi+W2+MjCzMIDApSwvXzC97Z4Ig8N/BxYGZgcgl52BCSQKAA3jCV8CAABfAAAAAAQAAEB42mNgZGBg4f3vACQZQABIMjKgAmYAKEgBXgAAeNpjYGY6wTiBgZWBg2kmUxoDA4MPhGZMYzBi1AHygVLYQUCaawqDA4PChxhmh/8ODDEsvAwHgMKMIDnGL0x7gJQCAwMAJd4MFwAAAHjaY2BgYGaA4DAGRgYQkAHyGMF8NgYrIM3JIAGVYYDT+AEjAwuDFpBmA9KMDEwMCh9i/v8H8sH0/4dQc1iAmAkALaUKLgAAAHjaTY9LDsIgEIbtgqHUPpDi3gPoBVyRTmTddOmqTXThEXqrob2gQ1FjwpDvfwCBdmdXC5AVKFu3e5MfNFJ29KTQT48Ob9/lqYwOGZxeUelN2U2R6+cArgtCJpauW7UQBqnFkUsjAY/kOU1cP+DAgvxwn1chZDwUbd6CFimGXwzwF6tPbFIcjEl+vvmM/byA48e6tWrKArm4ZJlCbdsrxksL1AwWn/yBSJKpYbq8AXaaTb8AAHja28jAwOC00ZrBeQNDQOWO//sdBBgYGRiYWYAEELEwMTE4uzo5Zzo5b2BxdnFOcALxNjA6b2ByTswC8jYwg0VlNuoCTWAMqNzMzsoK1rEhNqByEyerg5PMJlYuVueETKcd/89uBpnpvIEVomeHLoMsAAe1Id4AAAAAAAB42oWQT07CQBTGv0JBhagk7HQzKxca2sJCE1hDt4QF+9JOS0nbaaYDCQfwCJ7Au3AHj+LO13FMmm6cl7785vven0kBjHCBhfpYuNa5Ph1c0e2Xu3jEvWG7UdPDLZ4N92nOm+EBXuAbHmIMSRMs+4aUEd4Nd3CHD8NdvOLTsA2GL8M9PODbcL+hD7C1xoaHeLJSEao0FEW14ckxC+TU8TxvsY6X0eLPmRhry2WVioLpkrbp84LLQPGI7c6sOiUzpWIWS5GzlSgUzzLBSikOPFTOXqly7rqx0Z1Q5BAIoZBSFihQYQOOBEdkCOgXTOHA07HAGjGWiIjaPZNW13/+lm6S9FT7rLHFJ6fQbkATOG1j2OFMucKJJsxIVfQORl+9Jyda6Sl1dUYhSCm1dyClfoeDve4qMYdLEbfqHf3O/AdDumsjAAB42mNgYoAAZQYjBmyAGYQZmdhL8zLdDEydARfoAqIAAAABAAMABwAKABMAB///AA8AAQAAAAAAAAAAAAAAAAABAAAAAA==) format('woff');
}
body {
-webkit-text-size-adjust: 100%;
text-size-adjust: 100%;
color: #333;
font-family: "Helvetica Neue", Helvetica, "Segoe UI", Arial, freesans, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
font-size: 16px;
line-height: 1.6;
word-wrap: break-word;
}
a {
background-color: transparent;
}
a:active,
a:hover {
outline: 0;
}
strong {
font-weight: bold;
}
h1 {
font-size: 2em;
margin: 0.67em 0;
}
img {
border: 0;
}
hr {
box-sizing: content-box;
height: 0;
}
pre {
overflow: auto;
}
code,
kbd,
pre {
font-family: monospace, monospace;
font-size: 1em;
}
input {
color: inherit;
font: inherit;
margin: 0;
}
html input[disabled] {
cursor: default;
}
input {
line-height: normal;
}
input[type="checkbox"] {
box-sizing: border-box;
padding: 0;
}
table {
border-collapse: collapse;
border-spacing: 0;
}
td,
th {
padding: 0;
}
* {
box-sizing: border-box;
}
input {
font: 13px / 1.4 Helvetica, arial, nimbussansl, liberationsans, freesans, clean, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
}
a {
color: #4078c0;
text-decoration: none;
}
a:hover,
a:active {
text-decoration: underline;
}
hr {
height: 0;
margin: 15px 0;
overflow: hidden;
background: transparent;
border: 0;
border-bottom: 1px solid #ddd;
}
hr:before {
display: table;
content: "";
}
hr:after {
display: table;
clear: both;
content: "";
}
h1,
h2,
h3,
h4,
h5,
h6 {
margin-top: 15px;
margin-bottom: 15px;
line-height: 1.1;
}
h1 {
font-size: 30px;
}
h2 {
font-size: 21px;
}
h3 {
font-size: 16px;
}
h4 {
font-size: 14px;
}
h5 {
font-size: 12px;
}
h6 {
font-size: 11px;
}
blockquote {
margin: 0;
}
ul,
ol {
padding: 0;
margin-top: 0;
margin-bottom: 0;
}
ol ol,
ul ol {
list-style-type: lower-roman;
}
ul ul ol,
ul ol ol,
ol ul ol,
ol ol ol {
list-style-type: lower-alpha;
}
dd {
margin-left: 0;
}
code {
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
font-size: 12px;
}
pre {
margin-top: 0;
margin-bottom: 0;
font: 12px Consolas, "Liberation Mono", Menlo, Courier, monospace;
}
.select::-ms-expand {
opacity: 0;
}
.octicon {
font: normal normal normal 16px/1 octicons-link;
display: inline-block;
text-decoration: none;
text-rendering: auto;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
.octicon-link:before {
content: '\f05c';
}
.markdown-body:before {
display: table;
content: "";
}
.markdown-body:after {
display: table;
clear: both;
content: "";
}
.markdown-body>*:first-child {
margin-top: 0 !important;
}
.markdown-body>*:last-child {
margin-bottom: 0 !important;
}
a:not([href]) {
color: inherit;
text-decoration: none;
}
.anchor {
display: inline-block;
padding-right: 2px;
margin-left: -18px;
}
.anchor:focus {
outline: none;
}
h1,
h2,
h3,
h4,
h5,
h6 {
margin-top: 1em;
margin-bottom: 16px;
font-weight: bold;
line-height: 1.4;
}
h1 .octicon-link,
h2 .octicon-link,
h3 .octicon-link,
h4 .octicon-link,
h5 .octicon-link,
h6 .octicon-link {
color: #000;
vertical-align: middle;
visibility: hidden;
}
h1:hover .anchor,
h2:hover .anchor,
h3:hover .anchor,
h4:hover .anchor,
h5:hover .anchor,
h6:hover .anchor {
text-decoration: none;
}
h1:hover .anchor .octicon-link,
h2:hover .anchor .octicon-link,
h3:hover .anchor .octicon-link,
h4:hover .anchor .octicon-link,
h5:hover .anchor .octicon-link,
h6:hover .anchor .octicon-link {
visibility: visible;
}
h1 {
padding-bottom: 0.3em;
font-size: 2.25em;
line-height: 1.2;
border-bottom: 1px solid #eee;
}
h1 .anchor {
line-height: 1;
}
h2 {
padding-bottom: 0.3em;
font-size: 1.75em;
line-height: 1.225;
border-bottom: 1px solid #eee;
}
h2 .anchor {
line-height: 1;
}
h3 {
font-size: 1.5em;
line-height: 1.43;
}
h3 .anchor {
line-height: 1.2;
}
h4 {
font-size: 1.25em;
}
h4 .anchor {
line-height: 1.2;
}
h5 {
font-size: 1em;
}
h5 .anchor {
line-height: 1.1;
}
h6 {
font-size: 1em;
color: #777;
}
h6 .anchor {
line-height: 1.1;
}
p,
blockquote,
ul,
ol,
dl,
table,
pre {
margin-top: 0;
margin-bottom: 16px;
}
hr {
height: 4px;
padding: 0;
margin: 16px 0;
background-color: #e7e7e7;
border: 0 none;
}
ul,
ol {
padding-left: 2em;
}
ul ul,
ul ol,
ol ol,
ol ul {
margin-top: 0;
margin-bottom: 0;
}
li>p {
margin-top: 16px;
}
dl {
padding: 0;
}
dl dt {
padding: 0;
margin-top: 16px;
font-size: 1em;
font-style: italic;
font-weight: bold;
}
dl dd {
padding: 0 16px;
margin-bottom: 16px;
}
blockquote {
padding: 0 15px;
color: #777;
border-left: 4px solid #ddd;
}
blockquote>:first-child {
margin-top: 0;
}
blockquote>:last-child {
margin-bottom: 0;
}
table {
display: block;
width: 100%;
overflow: auto;
word-break: normal;
word-break: keep-all;
}
table th {
font-weight: bold;
}
table th,
table td {
padding: 6px 13px;
border: 1px solid #ddd;
}
table tr {
background-color: #fff;
border-top: 1px solid #ccc;
}
table tr:nth-child(2n) {
background-color: #f8f8f8;
}
img {
max-width: 100%;
box-sizing: content-box;
background-color: #fff;
}
code {
padding: 0;
padding-top: 0.2em;
padding-bottom: 0.2em;
margin: 0;
font-size: 85%;
background-color: rgba(0,0,0,0.04);
border-radius: 3px;
}
code:before,
code:after {
letter-spacing: -0.2em;
content: "\00a0";
}
pre>code {
padding: 0;
margin: 0;
font-size: 100%;
word-break: normal;
white-space: pre;
background: transparent;
border: 0;
}
.highlight {
margin-bottom: 16px;
}
.highlight pre,
pre {
padding: 16px;
overflow: auto;
font-size: 85%;
line-height: 1.45;
background-color: #f7f7f7;
border-radius: 3px;
}
.highlight pre {
margin-bottom: 0;
word-break: normal;
}
pre {
word-wrap: normal;
}
pre code {
display: inline;
max-width: initial;
padding: 0;
margin: 0;
overflow: initial;
line-height: inherit;
word-wrap: normal;
background-color: transparent;
border: 0;
}
pre code:before,
pre code:after {
content: normal;
}
kbd {
display: inline-block;
padding: 3px 5px;
font-size: 11px;
line-height: 10px;
color: #555;
vertical-align: middle;
background-color: #fcfcfc;
border: solid 1px #ccc;
border-bottom-color: #bbb;
border-radius: 3px;
box-shadow: inset 0 -1px 0 #bbb;
}
.pl-c {
color: #969896;
}
.pl-c1,
.pl-s .pl-v {
color: #0086b3;
}
.pl-e,
.pl-en {
color: #795da3;
}
.pl-s .pl-s1,
.pl-smi {
color: #333;
}
.pl-ent {
color: #63a35c;
}
.pl-k {
color: #a71d5d;
}
.pl-pds,
.pl-s,
.pl-s .pl-pse .pl-s1,
.pl-sr,
.pl-sr .pl-cce,
.pl-sr .pl-sra,
.pl-sr .pl-sre {
color: #183691;
}
.pl-v {
color: #ed6a43;
}
.pl-id {
color: #b52a1d;
}
.pl-ii {
background-color: #b52a1d;
color: #f8f8f8;
}
.pl-sr .pl-cce {
color: #63a35c;
font-weight: bold;
}
.pl-ml {
color: #693a17;
}
.pl-mh,
.pl-mh .pl-en,
.pl-ms {
color: #1d3e81;
font-weight: bold;
}
.pl-mq {
color: #008080;
}
.pl-mi {
color: #333;
font-style: italic;
}
.pl-mb {
color: #333;
font-weight: bold;
}
.pl-md {
background-color: #ffecec;
color: #bd2c00;
}
.pl-mi1 {
background-color: #eaffea;
color: #55a532;
}
.pl-mdr {
color: #795da3;
font-weight: bold;
}
.pl-mo {
color: #1d3e81;
}
kbd {
display: inline-block;
padding: 3px 5px;
font: 11px Consolas, "Liberation Mono", Menlo, Courier, monospace;
line-height: 10px;
color: #555;
vertical-align: middle;
background-color: #fcfcfc;
border: solid 1px #ccc;
border-bottom-color: #bbb;
border-radius: 3px;
box-shadow: inset 0 -1px 0 #bbb;
}
.task-list-item {
list-style-type: none;
}
.task-list-item+.task-list-item {
margin-top: 3px;
}
.task-list-item input {
margin: 0 0.35em 0.25em -1.6em;
vertical-align: middle;
}
:checked+.radio-label {
z-index: 1;
position: relative;
border-color: #4078c0;
}
.sourceLine {
display: inline-block;
}
code .kw { color: #000000; }
code .dt { color: #ed6a43; }
code .dv { color: #009999; }
code .bn { color: #009999; }
code .fl { color: #009999; }
code .ch { color: #009999; }
code .st { color: #183691; }
code .co { color: #969896; }
code .ot { color: #0086b3; }
code .al { color: #a61717; }
code .fu { color: #63a35c; }
code .er { color: #a61717; background-color: #e3d2d2; }
code .wa { color: #000000; }
code .cn { color: #008080; }
code .sc { color: #008080; }
code .vs { color: #183691; }
code .ss { color: #183691; }
code .im { color: #000000; }
code .va {color: #008080; }
code .cf { color: #000000; }
code .op { color: #000000; }
code .bu { color: #000000; }
code .ex { color: #000000; }
code .pp { color: #999999; }
code .at { color: #008080; }
code .do { color: #969896; }
code .an { color: #008080; }
code .cv { color: #008080; }
code .in { color: #008080; }
</style>
<style>
body {
  box-sizing: border-box;
  min-width: 200px;
  max-width: 980px;
  margin: 0 auto;
  padding: 45px;
  padding-top: 0px;
}
</style>

</head>

<body>

<h1 id="probability-of-direction-pd">Probability of Direction (pd)</h1>
<ul>
<li><a href="#what-is-the-pd">What is the <em>pd?</em></a></li>
<li><a href="#relationship-with-the-p-value">Relationship with the <em>p</em>-value</a></li>
<li><a href="#methods-of-computation">Methods of computation</a></li>
<li><a href="#methods-comparison">Methods comparison</a>
<ul>
<li><a href="#correlation">Correlation</a></li>
<li><a href="#accuracy">Accuracy</a></li>
</ul></li>
</ul>
<p>This vignette can be referred to by citing the package:</p>
<ul>
<li>Makowski, D., Ben-Shachar M. S. &amp; Lüdecke, D. (2019). <em>Understand and Describe Bayesian Models and Posterior Distributions using bayestestR</em>. Available from <a href="https://github.com/easystats/bayestestR">https://github.com/easystats/bayestestR</a>. DOI: <a href="https://zenodo.org/record/2556486">10.5281/zenodo.2556486</a>.</li>
</ul>
<hr />
<h1 id="what-is-the-pd">What is the <em>pd?</em></h1>
<p>The <strong>Probability of Direction (pd)</strong> is an index of <strong>effect existence</strong>, ranging from 50% to 100%, representing the certainty with which an effect goes in a particular direction (<em>i.e.</em>, is positive or negative).</p>
<p>Beyond its <strong>simplicity of interpretation, understanding and computation</strong>, this index also presents other interesting properties:</p>
<ul>
<li>It is <strong>independent from the model</strong>: It is solely based on the posterior distributions and does not require any additional information from the data or the model.</li>
<li>It is <strong>robust</strong> to the scale of both the response variable and the predictors.</li>
<li>It is strongly correlated with the frequentist <strong><em>p</em>-value</strong>, and can thus be used to draw parallels and give some reference to readers non-familiar with Bayesian statistics.</li>
</ul>
<p>However, this index is not relevant to assess the magnitude and importance of an effect (the meaning of “significance”), which is better achieved through other indices such as the <a href="https://easystats.github.io/bayestestR/articles/region_of_practical_equivalence.html">ROPE percentage</a>. In fact, indices of significance and existence are totally independent. You can have an effect with a <em>pd</em> of <strong>99.99%</strong>, for which the whole posterior distribution is concentrated in the <code>[0.0001, 0.002]</code> range. In this case, the effect is <strong>positive with a high certainty</strong>, but also <strong>not significant</strong> (<em>i.e.</em>, very small).</p>
<p>Indices of effect existence, such as the <em>pd</em>, are particularly useful in exploratory research or clinical studies, for which the focus is to make sure that the effect of interest is not in the opposite direction (for clinical studies, that a treatment is not harmful). However, once the effect’s direction is confirmed, the focus should shift toward its significance, including a precise estimation of its magnitude, relevance and importance.</p>
<h1 id="relationship-with-the-p-value">Relationship with the <em>p</em>-value</h1>
<p>In most cases, it seems that the <em>pd</em> has a direct correspondance with the frequentist <strong>one-sided <em>p</em>-value</strong> through the formula: [p_{one sided} = 1-p_d/100] Similarly, the <strong>two-sided <em>p</em>-value</strong> (the most commonly reported one) is equivalent through the formula: [p_{two sided} = 2*(1-p_d/100)] Thus, the two-sided <em>p</em>-value of respectively <strong>.1</strong>, <strong>.05</strong>, <strong>.01</strong> and <strong>.001</strong> would correspond approximately to a <em>pd</em> of <strong>95%</strong>, <strong>97.5%</strong>, <strong>99.5%</strong> and <strong>99.95%</strong> .</p>
<img src="" title="Correlation between the frequentist p-value and the probability of direction (pd)" alt="Correlation between the frequentist p-value and the probability of direction (pd)" style="display: block; margin: auto;" />

<blockquote>
<p><strong>But if it’s like the <em>p</em>-value, it must be bad because the <em>p</em>-value is bad [<em>insert reference to the reproducibility crisis</em>].</strong></p>
</blockquote>
<p>In fact, this aspect of the reproducibility crisis might have been misunderstood. Indeed, it is not that the <em>p</em>-value is an intrinsically bad or wrong index. Rather, it is its <strong>misuse</strong>, <strong>misunderstanding</strong> and <strong>misinterpretation</strong> that fuels the decay of the situation. For instance, the fact that the <strong>pd</strong> is highly correlated with the <em>p</em>-value suggests that the latter is more an index of effect <em>existence</em> than <em>significance</em> (<em>i.e.</em>, “worth of interest”). The Bayesian version, the <strong>pd</strong>, has an intuitive meaning and makes obvious the fact that <strong>all thresholds are arbitrary</strong>. Nevertheless, the <strong>mathematical and interpretative transparency</strong> of the <strong>pd</strong>, and its reconceptualisation as an index of effect existence, offers a valuable insight into the characterization of Bayesian results. Moreover, its concomittant proximity with the frequentist <em>p</em>-value makes it a perfect metric to ease the transition of psychological research into the adoption of the Bayesian framework.</p>
<h1 id="methods-of-computation">Methods of computation</h1>
<p>The most <strong>simple and direct</strong> way to compute the <strong>pd</strong> is to 1) look at the median’s sign, 2) select the portion of the posterior of the same sign and 3) compute the percentage that this portion represents. This “simple” method is the most straigtfoward, but its precision is directly tied to the number of posterior draws.</p>
<p>The second approach relies on <a href="https://easystats.github.io/bayestestR/reference/estimate_density.html"><strong>density estimation</strong></a>. It starts by estimating the density function (for which many methods are available), and then computing the <a href="https://easystats.github.io/bayestestR/reference/area_under_curve.html"><strong>area under the curve</strong></a> (AUC) of the density curve on the other side of 0. The density-based method could hypothetically be considered as more precise, but strongly depends on the method used to estimate the density function.</p>
<h1 id="methods-comparison">Methods comparison</h1>
<p>Let’s compare the 4 available methods, the <strong>direct</strong> method and 3 <strong>density-based</strong> methods differing by their density estimation algorithm (see <a href="https://easystats.github.io/bayestestR/reference/estimate_density.html"><code>estimate_density</code></a>).</p>
<h2 id="correlation">Correlation</h2>
<p>Let’s start by testing the proximity and similarity of the results obtained by different methods.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" title="1"><span class="kw">library</span>(bayestestR)</a>
<a class="sourceLine" id="cb1-2" title="2"><span class="kw">library</span>(logspline)</a>
<a class="sourceLine" id="cb1-3" title="3"><span class="kw">library</span>(KernSmooth)</a>
<a class="sourceLine" id="cb1-4" title="4"></a>
<a class="sourceLine" id="cb1-5" title="5"><span class="co"># Compute the correlations</span></a>
<a class="sourceLine" id="cb1-6" title="6">data &lt;-<span class="st"> </span><span class="kw">data.frame</span>()</a>
<a class="sourceLine" id="cb1-7" title="7"><span class="cf">for</span>(the_mean <span class="cf">in</span> <span class="kw">runif</span>(<span class="dv">25</span>, <span class="dv">0</span>, <span class="dv">4</span>)){</a>
<a class="sourceLine" id="cb1-8" title="8">  <span class="cf">for</span>(the_sd <span class="cf">in</span> <span class="kw">runif</span>(<span class="dv">25</span>, <span class="fl">0.5</span>, <span class="dv">4</span>)){</a>
<a class="sourceLine" id="cb1-9" title="9">    x &lt;-<span class="st"> </span><span class="kw">rnorm</span>(<span class="dv">100</span>, the_mean, <span class="kw">abs</span>(the_sd))</a>
<a class="sourceLine" id="cb1-10" title="10">    data &lt;-<span class="st"> </span><span class="kw">rbind</span>(data,</a>
<a class="sourceLine" id="cb1-11" title="11">      <span class="kw">data.frame</span>(<span class="st">&quot;direct&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(x),</a>
<a class="sourceLine" id="cb1-12" title="12">                 <span class="st">&quot;kernel&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(x, <span class="dt">method=</span><span class="st">&quot;kernel&quot;</span>),</a>
<a class="sourceLine" id="cb1-13" title="13">                 <span class="st">&quot;logspline&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(x, <span class="dt">method=</span><span class="st">&quot;logspline&quot;</span>),</a>
<a class="sourceLine" id="cb1-14" title="14">                 <span class="st">&quot;KernSmooth&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(x, <span class="dt">method=</span><span class="st">&quot;KernSmooth&quot;</span>)</a>
<a class="sourceLine" id="cb1-15" title="15">                 ))</a>
<a class="sourceLine" id="cb1-16" title="16">  }</a>
<a class="sourceLine" id="cb1-17" title="17">}</a>
<a class="sourceLine" id="cb1-18" title="18">data &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(<span class="kw">sapply</span>(data, as.numeric))</a>
<a class="sourceLine" id="cb1-19" title="19"></a>
<a class="sourceLine" id="cb1-20" title="20"><span class="co"># Visualize the correlations</span></a>
<a class="sourceLine" id="cb1-21" title="21"><span class="kw">library</span>(ggplot2)</a>
<a class="sourceLine" id="cb1-22" title="22"><span class="kw">library</span>(GGally)</a>
<a class="sourceLine" id="cb1-23" title="23"></a>
<a class="sourceLine" id="cb1-24" title="24">GGally<span class="op">::</span><span class="kw">ggpairs</span>(data) <span class="op">+</span></a>
<a class="sourceLine" id="cb1-25" title="25"><span class="st">  </span><span class="kw">theme_classic</span>()</a></code></pre></div>
<img src="" style="display: block; margin: auto;" />

<p>All methods give are highly correlated and give very similar results. That means that the method choice is not a drastic game changer and cannot be used to tweak the results too much.</p>
<h2 id="accuracy">Accuracy</h2>
<p>To test the accuracy of each methods, we will start by computing the <strong>direct <em>pd</em></strong> from a very dense distribution (with a large amount of observations). This will be our baseline, or “true” <em>pd</em>. Then, we will iteratively draw smaller samples from this parent distribution, and we will compute the <em>pd</em> with different methods. The closer this estimate is from the reference one, the better.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" title="1">data &lt;-<span class="st"> </span><span class="kw">data.frame</span>()</a>
<a class="sourceLine" id="cb2-2" title="2"><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">25</span>){</a>
<a class="sourceLine" id="cb2-3" title="3">  the_mean &lt;-<span class="st"> </span><span class="kw">runif</span>(<span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">4</span>)</a>
<a class="sourceLine" id="cb2-4" title="4">  the_sd &lt;-<span class="st"> </span><span class="kw">abs</span>(<span class="kw">runif</span>(<span class="dv">1</span>, <span class="fl">0.5</span>, <span class="dv">4</span>))</a>
<a class="sourceLine" id="cb2-5" title="5">  parent_distribution &lt;-<span class="st"> </span><span class="kw">rnorm</span>(<span class="dv">100000</span>, the_mean, the_sd)</a>
<a class="sourceLine" id="cb2-6" title="6">  true_pd &lt;-<span class="st"> </span><span class="kw">pd</span>(parent_distribution)</a>
<a class="sourceLine" id="cb2-7" title="7">  </a>
<a class="sourceLine" id="cb2-8" title="8">  <span class="cf">for</span>(j <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">25</span>){</a>
<a class="sourceLine" id="cb2-9" title="9">    sample_size &lt;-<span class="st"> </span><span class="kw">round</span>(<span class="kw">runif</span>(<span class="dv">1</span>, <span class="dv">25</span>, <span class="dv">5000</span>))</a>
<a class="sourceLine" id="cb2-10" title="10">    subsample &lt;-<span class="st"> </span><span class="kw">sample</span>(parent_distribution, sample_size)</a>
<a class="sourceLine" id="cb2-11" title="11">    data &lt;-<span class="st"> </span><span class="kw">rbind</span>(data,</a>
<a class="sourceLine" id="cb2-12" title="12">      <span class="kw">data.frame</span>(<span class="st">&quot;sample_size&quot;</span> =<span class="st"> </span>sample_size, </a>
<a class="sourceLine" id="cb2-13" title="13">                 <span class="st">&quot;true&quot;</span> =<span class="st"> </span>true_pd,</a>
<a class="sourceLine" id="cb2-14" title="14">                 <span class="st">&quot;direct&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(subsample) <span class="op">-</span><span class="st"> </span>true_pd,</a>
<a class="sourceLine" id="cb2-15" title="15">                 <span class="st">&quot;kernel&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(subsample, <span class="dt">method=</span><span class="st">&quot;kernel&quot;</span>)<span class="op">-</span><span class="st"> </span>true_pd,</a>
<a class="sourceLine" id="cb2-16" title="16">                 <span class="st">&quot;logspline&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(subsample, <span class="dt">method=</span><span class="st">&quot;logspline&quot;</span>) <span class="op">-</span><span class="st"> </span>true_pd,</a>
<a class="sourceLine" id="cb2-17" title="17">                 <span class="st">&quot;KernSmooth&quot;</span> =<span class="st"> </span><span class="kw">pd</span>(subsample, <span class="dt">method=</span><span class="st">&quot;KernSmooth&quot;</span>) <span class="op">-</span><span class="st"> </span>true_pd</a>
<a class="sourceLine" id="cb2-18" title="18">                 ))</a>
<a class="sourceLine" id="cb2-19" title="19">  }</a>
<a class="sourceLine" id="cb2-20" title="20">}</a>
<a class="sourceLine" id="cb2-21" title="21">data &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(<span class="kw">sapply</span>(data, as.numeric))</a></code></pre></div>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1"><span class="kw">library</span>(tidyr)</a>
<a class="sourceLine" id="cb3-2" title="2"><span class="kw">library</span>(dplyr)</a>
<a class="sourceLine" id="cb3-3" title="3"></a>
<a class="sourceLine" id="cb3-4" title="4">data <span class="op">%&gt;%</span><span class="st"> </span></a>
<a class="sourceLine" id="cb3-5" title="5"><span class="st">  </span>tidyr<span class="op">::</span><span class="kw">gather</span>(Method, Distance, <span class="op">-</span>sample_size, <span class="op">-</span>true) <span class="op">%&gt;%</span><span class="st"> </span></a>
<a class="sourceLine" id="cb3-6" title="6"><span class="st">  </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(<span class="dt">x=</span>sample_size, <span class="dt">y =</span> Distance, <span class="dt">color =</span> Method, <span class="dt">fill=</span> Method)) <span class="op">+</span></a>
<a class="sourceLine" id="cb3-7" title="7"><span class="st">  </span><span class="kw">geom_point</span>(<span class="dt">alpha=</span><span class="fl">0.3</span>, <span class="dt">stroke=</span><span class="dv">0</span>, <span class="dt">shape=</span><span class="dv">16</span>) <span class="op">+</span></a>
<a class="sourceLine" id="cb3-8" title="8"><span class="st">  </span><span class="kw">geom_smooth</span>(<span class="dt">alpha=</span><span class="fl">0.2</span>) <span class="op">+</span></a>
<a class="sourceLine" id="cb3-9" title="9"><span class="st">  </span><span class="kw">geom_hline</span>(<span class="dt">yintercept=</span><span class="dv">0</span>) <span class="op">+</span></a>
<a class="sourceLine" id="cb3-10" title="10"><span class="st">  </span><span class="kw">theme_classic</span>() <span class="op">+</span></a>
<a class="sourceLine" id="cb3-11" title="11"><span class="st">  </span><span class="kw">xlab</span>(<span class="st">&quot;</span><span class="ch">\n</span><span class="st">Distribution Size&quot;</span>)</a></code></pre></div>
<img src="" style="display: block; margin: auto;" />

<p>The “Kernel” based density methods seems to consistently underestimate the <em>pd</em>. Interestingly, the “direct” method appears as being the more reliable, even in the case of small number of posterior draws.</p>

</body>
</html>
back to top