{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Useful msprime docs:\n", "https://github.com/jeromekelleher/spg-chapter/blob/master/jupyter/msprime-chapter-examples.ipynb" ] }, { "cell_type": "code", "execution_count": 289, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%matplotlib inline\n", "from __future__ import print_function\n", "import msprime\n", "import matplotlib.pyplot as plt\n", "import ipyrad.analysis as ipa\n", "import numpy as np\n", "import pandas as pd\n", "import ipyparallel as ipp\n", "import datetime\n", "import time\n", "import sys\n", "import os\n", "from tempfile import TemporaryFile\n", "from IPython.display import SVG, display\n", "\n", "simout = \"simout\"" ] }, { "cell_type": "code", "execution_count": 290, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [####################] 100% done " ] } ], "source": [ "def progressbar(njobs, finished, msg=\"\", spacer=\" \"):\n", " \"\"\" prints a progress bar \"\"\"\n", " if njobs:\n", " progress = 100*(finished / float(njobs))\n", " else:\n", " progress = 100\n", "\n", " hashes = '#'*int(progress/5.)\n", " nohash = ' '*int(20-len(hashes))\n", "\n", " args = [spacer, hashes+nohash, int(progress), msg]\n", " print(\"\\r{}[{}] {:>3}% {} \".format(*args), end=\"\")\n", " sys.stdout.flush()\n", "\n", "for i in range(100):\n", " progressbar(100, i, \"watdo\")\n", "progressbar(100, 100, \"done\")" ] }, { "cell_type": "code", "execution_count": 282, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INFO: # PCs < # samples. Forcing # PCs = 8\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 282, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAFgCAYAAADuCe0ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHwRJREFUeJzt3Xt0VeWd//HPNwkeICi3xCKBmjOYCIgXICDWdgZMabW1aqvTVrBj26mX36oTFMW26dBOS83SUlHzm479eWlnykVr661Vi0Vqq04n2qBVQYQWE5VbTRgMNKVHEr6/P85JPMTgAcnJfpLzfq3FWsneJ3t/uOWTZ+/n7MfcXQAAhCYv6gAAAHSHggIABImCAgAEiYICAASJggIABImCAgAEiYICAASJggIABImCAgAEqSDqAIeiqKjIS0tLo44BoA9bs2ZNs7sXR50DmfWpgiotLVV9fX3UMQD0YWb2atQZcHC4xAcACBIFBQAIEgUFAAhSn7oHBQDZsGbNmqMLCgrukDRJ/ODem/ZJWtvW1valqVOnvtF1JwUFIOcVFBTcMWrUqAnFxcU78/LyWCSvl+zbt8+ampombt++/Q5J53Tdz08KACBNKi4u3kU59a68vDwvLi5uUXLk+s79vZwHAEKURzlFI/Xn3m0XUVAAgCBRUABwiDasf/mIy8+9MH7xqWeWX37uhfEN618+IupM/VFOF1RjQ4PmX3SJLpv1Sc2/6BI1NjREHQlA4Dasf/mIr3x0bvkpP982Yubv9x55ys+3jfjKR+eW91RJ/fjHPx5mZlOfe+65gZK0YcOGI8rKyk6QpIceeujIWbNmHdcT5zlc6bmyJWcLqrGhQQtmz1H58j9qxm93qXz5H7Vg9hxKCsC7uumr3yo58/UjYzHLlyTFLF9nvn5k7KavfqukJ45/9913j5gyZcpfli5dOqInjteX5WxB1S6sUeWmmNL/kVVuiql2YU3EyQCEbM/2nQM6vm90iFm+9vx554DDPXZLS0tefX39kB/96EeN999///BD/fqHH354yPjx4yeOHz9+4oQJEybu3Lkzr6WlJe+0004rnzhx4oTy8vKJy5YtGyYlR0DxePyEz3zmM8eWlZWdcM4558QfeOCBI6dMmTL+2GOPnfT4448PlqT58+ePPu+88+IzZswoP/bYYyfdeOONRV3P29bWpssuu2zMpEmTJpSXl09cvHhxkSS9+uqrAyoqKo4fP378xLKyshNWrlw55FB+Pzn7PqjWLc3q7h9Z69YdESUC0BcMGjV8b8K37ff9I+HtGvS+o/ce7rGXL18+bObMmS0nnXRSYtiwYe1PPfXU4OLi4raD/fobb7xxVG1t7asf+chHWltaWvIGDx68T5IefvjhP40YMWLftm3bCk499dTxc+bMeVOSXn/99YE/+clPXpk6deqrJ5100oTly5ePrK+vf3nFihXDrrvuumNmzZq1SZLWr18/aM2aNet3796dP3ny5Innn39+S/p5b7755qKhQ4e2r127dv2ePXts2rRp4z/xiU/suuuuu4ZXVla23HDDDdvb2tq0e/fuQxoU5ewIqrCkSAlv329bwttVOHpkRIkA9AVXXf/NLSvH7k50fP9IeLtWjt2duOr6b2453GPfc889Iy688MKdknT++ef/76Fe5psxY8ZfrrnmmrHf+c53jm5ubs4fMGCA9u3bZ1deeeWY8vLyibNmzSp/4403jti8eXOBJJWUlCSmT5++Jz8/X+Xl5XvOOOOMXXl5eZoyZcpfN2/eHOs47llnnfXmkCFD/Jhjjmk77bTTdj355JOF6ed97LHHjrrnnntGjh8/fuLkyZMn7Ny5s+Cll14aOGPGjNa77rqraP78+aOfeeaZQcOHD993KL+fnB1BVS2q1oK6OZ2X+RLertXjElq8qDrqaAACdvyE8W/d8OjyjTd99Vsle/68c8Cg9x2994brf7Dl+Anj3zqc427fvj2/rq7uqI0bNw664oor1N7ebmbmV1111TseAXQgNTU1288777yWBx98cOgHPvCBCStXrtz45JNPFu7YsaPgxRdfXB+LxbykpOTEPXv25EnSEUcc0fner7y8PA0cONAlKT8/X+3t7daxz8z2O0/Xz93dbrzxxtfOP//8XV0zPfHEExvuvffeoZ///OfjVVVVf77iiisO+jJVzhZUaTyuxatWqHZhjVq37lDh6JFavKhapfF41NEABO74CePf+sGDd/XojKqlS5cO/9SnPrVjxYoVnetVTZs27fjGxsaDnh24bt262PTp0/dMnz59z9NPP124du3agS0tLflFRUV7Y7GY/+IXvzhy69athzzb8Je//OWw6667btuuXbvy6urqjrzpppu2JBKJzpaaPXt2y6233lp89tln747FYv7CCy/ESktL927fvr0gHo+/dfXVVze3trbmPfvss4MlUVAHozQe15Jlt0cdAwD005/+dOS11167LX3bueeeu7OmpuaYgz3Gd7/73aN/97vfHZWXl+fl5eV7LrjggpY333wz/6yzzjpu0qRJE0444YS/xuPxvx1qtsmTJ7dWVlaWbd269YhrrrlmW2lp6d4NGzZ0Ft1VV13V3NjYGDvxxBMnuLuNGDFi7yOPPLLp0UcfPbK2tnZUQUGBDx48uH358uWHVOrm3nee7lFRUeGsqAvgcJjZGnevSN/2/PPPN5588snNUWUK2fz580cPGTKk/dvf/vafs3WO559/vujkk08u7bo9ZydJAADCltOX+ACgL7rllltG3nrrre9L3zZt2rS/LF269LWePteSJUu29vQxDxYFBQDSvn379llfeaL5vHnzdsybN69fvGlz3759puTChe/AJT4AkNY2NTUNTX2zRC9JLVg4VNLa7vYzggKQ89ra2r60ffv2O7Zv386S772rc8n37nZSUABy3tSpU99QN0uOI1r8pAAACBIFBQAIEgUFAAhSv7wH1djQkHzG3pZmFZYUqYpn7AFAn9PvCqpjpdy3n1K+Uwvq5mjxqhWUFAD0If3uEh8r5QJA/9DvCoqVcgGgf+h3BcVKuQDQP/S7gqpaVK3V4xJKX4559biEqlgpFwD6lH5XUB0r5W6cW6a6mUO1cW4ZEyQAoA/qd7P4JFbKBYD+oN+NoAAA/QMFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACFLkBWVm+Wb2nJk9FHUWAEA4Ii8oSfMkrY86BAAgLJEWlJmNkfRxSXdEmQMAEJ6oR1A3S7pW0r6IcwAAAhNZQZnZ2ZLecPc1GV53qZnVm1l9U1NTL6UDAEQtyhHU6ZLOMbNGSXdLOsPMlnV9kbvf5u4V7l5RXFzc2xkBABGJrKDc/WvuPsbdSyV9VtKv3f2iqPIAAMIS9T0oAAC6FcSS7+7+G0m/iTgGACAgjKAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQQpiuQ0AOFSNDQ2qXVij1i3NKiwpUtWiapXG41HHQg+ioAD0OY0NDVowe44qN8UUs3wlfKcW1M3R4lUrKKl+hEt8APqc2oU1neUkSTHLV+WmmGoX1kScDD2JggLQ57Ruae4spw4xy1fr1h0RJUI2UFAA+pzCkiIlvH2/bQlvV+HokRElQjZQUAD6nKpF1Vo9LtFZUglv1+pxCVUtqo44GXoSBQWgzymNx7V41QptnFumuplDtXFuGRMk+iFm8QHok0rjcS1ZdnvUMZBFjKAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQaKgAABBoqAAAEGioAAAQYqsoMxsrJk9bmbrzWydmc2LKgsAIDwFEZ67TdLV7v6smR0paY2ZrXL3lyLMBAAIRGQjKHff5u7Ppj7eLWm9pJKo8gAAwhLEPSgzK5U0WdLT0SYBAIQi8oIysyGS7pV0pbvv6mb/pWZWb2b1TU1NvR8QABCJSAvKzAYoWU7L3f2+7l7j7re5e4W7VxQXF/duQABAZKKcxWeS7pS03t2XRJUDABCmKEdQp0v6nKQzzOwPqV8fizAPACAgkU0zd/enJFlU5wcAhC3ySRIAAHSHggIABImCAgAEiYICAASJggIABImCAgAEiYICAASJggIABImCAgAEiYICAASJggIABImCAgAEiYICAATpkArKzArNLD9bYQAA6PCuBWVmeWY2x8weNrM3JL0saZuZrTOzxWZW1jsxAQC5JtMI6nFJ4yR9TdIodx/r7kdL+pCkOknXm9lFWc4IAMhBmRYs/LC77+260d3/V9K9ku41swFZSQYAyGnvWlBdy8nMBkq6SNIgSSvcfUd3BQYAwOE61Fl8t0jKl/Q3SQ/0fBwAAJIyTZJYYWbj0jaNkLRc0l2ShmczGAAgt2W6B/Wvkr5jZlslLZL0PUk/lzRQ0r9lNxoAIJdlugf1iqQ5ZvZBST+R9LCk2e7e3hvhAAC5K9MlvuFm9mVJEyV9WlKLpEfN7OzeCAcAyF2ZJkk8ICmh5CW9pe7+Y0mfkDTVzH6e7XAAgNyV6R7USEkrlJxW/k+S5O57JH3LzI7JcjYAQA7LVFDfkLRKUrukr6bvcPdt2QoFAECmSRL3Sbqvl7IAANDpXQvKzPIkXSzpfEljJbVJ+qOkH7j7b7KeDgCQszJd4rtT0quSrpd0gaRdkp6U9K9mdqK7/98s5wMA5KhMBTXV3b+Q+vgpM6tz92+Y2ROS/iCJggIAZEWmaeZ7Ox51ZGZTJL0lSe6ekORZzgYAyGGZRlALJD1uZn+TNEDSZyXJzIolPZTlbACAHJZpFt+vzexYSSPdvTlte5Oka7MdDgCQuzIut+FJzV23m9mo7EQCAODQ14NKd2ePpQAAoIv3XFDu/vGeDAIAQLpMkyQkdU6KGKPkG3Ub3P0vWU0FAMh5mZ4kMVFSraRSSe+X9Jyko83st5LmuXtL1hMCAHJSpkt8P5T0ZXc/TtIHJb3s7nFJ/y3uQQEAsihTQQ1y9w2S5O7PSDox9fHtSi5iCABAVmS6B7XJzBZKWi3pU0o+3khmNuAgvhYAgPcs0wjqi5KOlFSt5Mq681LbByu1gCEAANmQ6UkSb6qbJ0akJkfUZSsUAADv+X1QZnZbTwYBACBdpmnmIw60S9LHej4OAABJmSY6NCm5YKGlbfPU50dnKxQAAJkK6hVJle7+WtcdZvb64Z7czM6UdIukfEl3uPv1h3tMAED/kOke1M2Shh9g33cP58Rmli/p+5LOUvI9VRemnlwBAEDGWXzff5d9h7vc+3RJf3L3VyTJzO6WdK6klw7zuACAfuBdR1Bm9sEM+48ys0nv8dwlktIvE25Obet6jkvNrN7M6puamt7jqQAAfU2me1Dnm9l3Ja2UtEbJSRMDJR0naZakYyVd/R7Pbd1s83dscL9N0m2SVFFR8Y79AID+KdMlvqvMbLikCyT9o6RjJO2RtF7S/3P3pw7j3JsljU37fIykrYdxPABAP5LxeXruvlPS7alfPen3ksrMLC5pi6TPSprTw+cAAPRRkT3w1d3bzOwKSY8qOc38h+6+Lqo8AICwRPpEcnd/RNIjUWYAAITpPT+LDwCAbMpYUKmp5OO62X5SdiIBAJD5fVCflvSypHvNbJ2ZTUvb/Z/ZDAYAyG2ZRlDVkqa6+ymSviBpqZl9KrWvu/cxAQDQIzJNksh3922S5O7PmNksSQ+Z2Rh186ZaAAB6SqYR1O70+0+pspqp5DPzTshiLgBAjss0gvo/6nIpz913p5bJ+HTWUgEAcl6mEVSrpPd1s32GpLqejwMAQNLBrAe1u5vte1L7AADIikwFVeruL3Td6O71kkqzkggAAGUuqIHvsm9QTwYBACBdpoL6vZld0nWjmf2zkutDAQCQFZlm8V0p6X4zm6u3C6lC0hGSPpnNYACA3JZpwcI/S/pA6g26HUu7P+zuv856MgBATnvXgjKzgZIuV3KJ9xcl3enubb0RDACQ2zLdg/ovJS/pvSjpLEnfy3oiAACU+R7URHc/UZLM7E5Jz2Q/EgAAmUdQezs+4NIeAKA3ZRpBnWxmu1Ifm6RBqc9Nkrv7UVlNBwDIWZlm8eX3VhCEqbGhQbULa9S6pVmFJUWqWlSt0ng86lgAckCmERRyWGNDgxbMnqPKTTHFLF8J36kFdXO0eNUKSgpA1mW6B4UcVruwprOcJClm+arcFFPtwpqIkwHIBRQUDqh1S3NnOXWIWb5at+6IKBGAXEJB4YAKS4qU8Pb9tiW8XYWjR0aUCEAuoaBwQFWLqrV6XKKzpBLertXjEqpaVB1xMgC5gILCAZXG41q8aoU2zi1T3cyh2ji3rE9MkGhsaND8iy7RZbM+qfkXXaLGhoaoIwF4D8zdo85w0CoqKry+vj7qGAjYO2ceJkd9faFY0TvMbI27V0SdA5kxgkK/wsxDoP+goNCvMPMQ6D8oKPQrzDwE+g8KCv0KMw+B/oOCQr/SV2ceAngnnsWHfqc0HteSZbdHHQPAYWIEBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACFIkBWVmi83sZTN7wczuN7NhUeQAAIQrqhHUKkmT3P0kSRslfS2iHACAQEVSUO7+K3dvS31aJ2lMFDkAAOEK4R7UFyX98kA7zexSM6s3s/qmpqZejAUAiFLWFiw0s8ckjepm19fd/cHUa74uqU3S8gMdx91vk3SbJFVUVHgWogIAApS1gnL3D7/bfjO7WNLZkirdneIBAOwnkiXfzexMSV+R9A/u/tcoMiA6jQ0Nql1Yo9YtzSosKVLVomqVxuNRxwIQmEgKStK/S4pJWmVmklTn7pdHlAW9qLGhQQtmz1Hlpphilq+E79SCujlavGoFJQVgP1HN4jvO3ce6+ympX5RTjqhdWNNZTpIUs3xVboqpdmFNxMkAhCaEWXzIIa1bmjvLqUPM8tW6dUdEiQCEioJCryosKVLC2/fblvB2FY4eGVEiAKGioNCrqhZVa/W4RGdJJbxdq8clVLWoOuJkAEJDQaFXlcbjWrxqhTbOLVPdzKHaOLeMCRIAuhXVLD7ksNJ4XEuW3R51DACBYwQFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFAAgSBQUACBIFBQAIUkHUARCGxoYG1S6sUeuWZhWWFKlqUbVK4/GoYwHIYRQU1NjQoAWz56hyU0wxy1fCd2pB3RwtXrWCkgIQGS7xQbULazrLSZJilq/KTTHVLqyJOBmAXEZBQa1bmjvLqUPM8tW6dUdEiQAg4oIys2vMzM2sKMocua6wpEgJb99vW8LbVTh6ZESJACDCgjKzsZJmS3otqgxIqlpUrdXjEp0llfB2rR6XUNWi6oiTAchlUY6gbpJ0rSSPMAMklcbjWrxqhTbOLVPdzKHaOLeMCRIAIhfJLD4zO0fSFnd/3swyvfZSSZdK0vvf//5eSJebSuNxLVl2e9QxAKBT1grKzB6TNKqbXV+XVC3pIwdzHHe/TdJtklRRUcFoCwByRNYKyt0/3N12MztRUlxSx+hpjKRnzWy6u2/PVh4AQN/S65f43P1FSUd3fG5mjZIq3L25t7MAAMLF+6AAAEGK/FFH7l4adQYAQHgYQQEAgkRBAQCCREEBAIJEQQEAgkRBAQCCREEBAIJEQQEAgkRBAQCCREEBAIJEQQEAgkRBAQCCREEBAIJEQQEAgkRBAQCCFPlyG6FpbGhQ7cIatW5pVmFJkaoWVas0Ho86FgDkHAoqTWNDgxbMnqPKTTHFLF8J36kFdXO0eNUKSgoAehmX+NLULqzpLCdJilm+KjfFVLuwJuJkAJB7KKg0rVuaO8upQ8zy1bp1R0SJACB3UVBpCkuKlPD2/bYlvF2Fo0dGlAgAchcFlaZqUbVWj0t0llTC27V6XEJVi6ojTgYAuYeCSlMaj2vxqhXaOLdMdTOHauPcMiZIAEBEmMXXRWk8riXLbo86BgDkPEZQAIAgUVAAgCBRUACAIFFQAIAgUVAAgCBRUACAIFFQAIAgUVAAgCBRUACAIJm7R53hoJlZk6RXs3DoIknNWTju4SBTZqHlkch0MKLOc6y7F0d4fhykPlVQ2WJm9e5eEXWOdGTKLLQ8EpkORmh5EC4u8QEAgkRBAQCCREEl3RZ1gG6QKbPQ8khkOhih5UGguAcFAAgSIygAQJAoKABAkCioLszsGjNzMysKIMtiM3vZzF4ws/vNbFhEOc40sw1m9icz+2oUGbrkGWtmj5vZejNbZ2bzos4kSWaWb2bPmdlDUWeRJDMbZmY/S/0bWm9mpwWQ6arU39laM7vLzAZGnQnhoqDSmNlYSbMlvRZ1lpRVkia5+0mSNkr6Wm8HMLN8Sd+XdJakiZIuNLOJvZ2jizZJV7v7BEkzJH05gEySNE/S+qhDpLlF0kp3Hy/pZEWczcxKJFVJqnD3SZLyJX02ykwIGwW1v5skXSspiJkj7v4rd29LfVonaUwEMaZL+pO7v+Lub0m6W9K5EeTo5O7b3P3Z1Me7lfzGWxJlJjMbI+njku6IMkcHMztK0t9LulOS3P0td38z2lSSpAJJg8ysQNJgSVsjzoOAUVApZnaOpC3u/nzUWQ7gi5J+GcF5SyS9nvb5ZkVcBunMrFTSZElPR5tENyv5w82+iHN0+DtJTZJ+lLrseIeZFUYZyN23SPqeklcotklqcfdfRZkJYcupgjKzx1LXvrv+OlfS1yV9I7BMHa/5upKXtZb3dj5J1s22IEaYZjZE0r2SrnT3XRHmOFvSG+6+JqoM3SiQNEXSre4+WVKrpEjvH5rZcCVH33FJoyUVmtlFUWZC2AqiDtCb3P3D3W03sxOV/E/zvJlJyUtpz5rZdHffHkWmtGwXSzpbUqVH86a1zZLGpn0+RgFcljGzAUqW03J3vy/iOKdLOsfMPiZpoKSjzGyZu0f5zXezpM3u3jGy/JkiLihJH5bU4O5NkmRm90n6gKRlkaZCsHJqBHUg7v6iux/t7qXuXqrkf+4p2S6nTMzsTElfkXSOu/81ohi/l1RmZnEzO0LJm9o/jyiLJMmSP0XcKWm9uy+JMoskufvX3H1M6t/OZyX9OuJyUurf7utmdnxqU6WklyKMJCUv7c0ws8Gpv8NKhTWpBIHJqRFUH/TvkmKSVqVGdnXufnlvBnD3NjO7QtKjSs66+qG7r+vNDN04XdLnJL1oZn9Ibat290cizBSif5G0PPWDxSuSvhBlGHd/2sx+JulZJS9ZPycee4R3waOOAABB4hIfACBIFBQAIEgUFAAgSBQUACBIFBQAIEgUFLLCzNrN7A+pp2L81MwGp7aPMrO7zWyTmb1kZo+YWXlq30ozezPT08DN7GYz+/vUx8tTT1pfa2Y/TL2Bt+N1M1MZ1pnZbw9wrLiZPW1mfzSzn6SmZMvM/iV1zEfStn3QzJakfW2xma083D8rAN2joJAte9z9lNRTq9+SdHnqzZn3S/qNu49z94mSqiW9L/U1i5V8f9MBmdkISTPc/YnUpuWSxks6UdIgSV9KvW6YpP9Q8k3OJ0j6xwMc8gZJN7l7maSdkv45tf1Lkk5S8r06H01lXyhpUccXpp6IsM3MTj+YPxAAh4aCQm94UtJxkmZJ2uvuP+jY4e5/cPcnUx+vlrQ7w7EukNQ5anH3RzxF0jN6+4nvcyTd5+6vpV73RtcDpUrnDCUfAyRJ/yXpvLSXDFDyidt7lSzOR9x9Z5fDPCBpbobMAN4DCgpZlVpW4SxJL0qaJOlwH6h6enfHSF3a+5zeLq9yScPN7DdmtsbM/qmbY42U9GbakibpT2r/npJLnBRL+m9JFys5IuuqXtKH3uPvBcC7oKCQLYNSjyGqV/IZbHf20HGPUXIZia7+Q9ITHaMxJR/jNVXJNZo+Kmlhx72uNAd8Uru7L3X3yaln6s2XVCvprNQKtTeZWcf/nTeUfDI3gB7Gs/iQLXvc/ZT0DWa2TslLdId1XCWfGJ5+3G8qOdK5LG3zZknN7t4qqdXMnlByVdmNaa9pljTMzApSo6h3PKndzEZLmubu3zKzZySdJuk6JR90uiqVZc9h/p4AdIMRFHrTryXFzOySjg1mNs3M/uEQjrFeyftZHV//JSVHSBe6e/pigQ9K+pCZFaRmEJ6qLk/OTt23elxvl+bFqa9Lt0jJyRFSchKGK7ko4eDUtnJJaw8hP4CDREGh16QK4ZOSZqemma+T9G9KjVrM7ElJP5VUaWabzeyj3RzmYUkz0z7/gZKzAP8nNaX8G6lzrVfyftQLSk6euMPd16bO80hqZCQllzOZb2Z/UvKeVOelSDObnDrWc6lNdyp5L22K3r7XNSuVCUAP42nm6HPM7ClJZ7v7mwFkeULSud3M7gNwmCgo9DlmdqqS97heiDhHsaTT3f2BKHMA/RUFBQAIEvegAABBoqAAAEGioAAAQaKgAABBoqAAAEH6/0Sc7jypEfkTAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "local_Ne = 1e5\n", "g = 0\n", "split_time = 2e5\n", "pop1_samples = 8\n", "pop2_samples = 8\n", "nloci = 100\n", "length=150\n", "\n", "migmat = [[0, 0], [0, 0]]\n", "\n", "def simulate_nloci(n=1, split_time=2e5, pop1_samples=8, pop2_samples=8, nloci=100, migmat=[[0, 0], [0, 0]]):\n", " ts_list = []\n", " for i in xrange(n):\n", " pop1 = msprime.PopulationConfiguration(sample_size=pop1_samples,\n", " initial_size=local_Ne,\n", " growth_rate=g)\n", "\n", " pop2 = msprime.PopulationConfiguration(sample_size=pop2_samples,\n", " initial_size=local_Ne,\n", " growth_rate=g)\n", "\n", " split_event = msprime.MassMigration(time=split_time,\n", " source=0,\n", " destination=1,\n", " proportion=1)\n", "\n", " debug = msprime.DemographyDebugger(population_configurations=[pop1, pop2],\n", " demographic_events=[split_event],\n", " migration_matrix=migmat)\n", "\n", " tree_sequence = msprime.simulate(length=length,\\\n", " migration_matrix=migmat,\\\n", " mutation_rate=1e-8, \\\n", " population_configurations=[pop1, pop2],\\\n", " demographic_events=[split_event])\n", " ts_list.append(tree_sequence)\n", "\n", " return ts_list\n", "\n", "def plot_tree_sequence(tree_sequence):\n", " tree = tree_sequence.first()\n", " colour_map = {0:\"red\", 1:\"blue\"}\n", " node_colours = {u: colour_map[tree.population(u)] for u in tree.nodes()}\n", " display(SVG(tree.draw(width=600, height=400, node_colours=node_colours)))\n", "\n", "def write_vcf(ts_list, outfile, unlinked=False):\n", " ## get the header\n", " head = []\n", " with TemporaryFile() as outtmp:\n", " ts_list[0].write_vcf(outtmp, ploidy=2, contig_id='1')\n", " outtmp.seek(0)\n", " head = outtmp.readlines()[:6]\n", "\n", " with open(outfile, 'w') as output:\n", " output.write(\"\".join(head))\n", " for i, ts in enumerate(ts_list):\n", " with TemporaryFile() as outtmp:\n", " ## the +2 here is because enumerate starts at 0, and we\n", " ## want our first locus of this list to start at 2\n", " ts.write_vcf(outtmp, ploidy=2, contig_id=str(i+2))\n", " outtmp.seek(0)\n", " ## Get rid of the 6 lines of vcf header\n", " dat = outtmp.readlines()[6:]\n", " ## Add the AA field\n", " new_dat = [x[:x.rfind(\".\")] + \"AA=A\" + x[x.rfind(\".\")+1:] for x in dat]\n", " dat = new_dat\n", " if unlinked:\n", " try:\n", " dat = np.random.choice(dat, size=1)\n", " except:\n", " ## ignore loci with no snps\n", " pass\n", " output.write(\"\".join(dat))\n", "\n", "def simulate_missing(infile, outfile=None, avg_missing=0.3, std_missing=0.05):\n", " if outfile == None:\n", " outfile = infile\n", " dat = ''\n", " header = ''\n", " with open(infile) as infile:\n", " dat = infile.readlines()\n", " header = dat[:6]\n", " dat = pd.DataFrame([x.split() for x in dat[6:]], columns=header[-1].split())\n", " samps = [x for x in dat.columns if \"msp\" in x]\n", " missingness = np.random.normal(avg_missing, std_missing, len(samps))\n", " for samp, miss in zip(samps, missingness):\n", " #print(samp, miss, len(dat[samp]))\n", " draws = np.random.uniform(size=len(dat[samp]))\n", " dat[samp] = np.where(draws < miss, \".|.\", dat[samp])\n", " with open(outfile, 'w') as outfile:\n", " outfile.write(\"\".join(header))\n", " dat.to_csv(outfile, sep=\"\\t\", index=False, header=False)\n", "\n", "## 100 loci is almost instant\n", "## 1000 loci ~5 seconds\n", "migmat = [[0, 1e-5], [1e-5, 0]]\n", "ts_list = simulate_nloci(n=nloci, migmat=migmat)\n", "#plot_tree_sequence(ts_list[0])\n", "write_vcf(ts_list, \"./{}/tmp.vcf\".format(simout), unlinked=True)\n", "simulate_missing(\"{}/tmp.vcf\".format(simout), avg_missing=0.5)\n", "pca = ipa.pca(\"./{}/tmp.vcf\".format(simout))\n", "pca.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simulate vcf files serially" ] }, { "cell_type": "code", "execution_count": 230, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Doing no_migration 0\n", "Doing no_migration 1\n", "Doing no_migration 2\n", "Doing no_migration 3\n", "Doing no_migration 4\n", "Doing symmetric_migration 0\n", "Doing symmetric_migration 1\n", "Doing symmetric_migration 2\n", "Doing symmetric_migration 3\n", "Doing symmetric_migration 4\n", "Doing asymmetric_migration 0\n", "Doing asymmetric_migration 1\n", "Doing asymmetric_migration 2\n", "Doing asymmetric_migration 3\n", "Doing asymmetric_migration 4\n" ] } ], "source": [ "nsims = 5\n", "models = {\"no_migration\":[[0, 0], [0, 0]],\n", " \"symmetric_migration\":[[0, 1e-5], [1e-5, 0]],\n", " \"asymmetric_migration\":[[0, 1e-5], [1e-6, 0]]}\n", "\n", "for model, migmat in models.items():\n", " outdir = \"./{}/{}\".format(simout, model)\n", " if not os.path.exists(outdir):\n", " os.mkdir(outdir)\n", " for i in xrange(nsims):\n", " print(\"Doing {} {}\".format(model, i))\n", " ts_list = simulate_nloci(nloci, migmat=migmat)\n", " write_vcf(ts_list, \"./{}/{}-sim{}.vcf\".format(outdir, model, i), unlinked=False)\n", " simulate_missing(\"{}/{}-sim{}.vcf\".format(outdir, model, i))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simulate vcf files in parallel\n", "Broken" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [], "source": [ "## Define a function so we can parallelize the above cell\n", "def sim_vcf(model, nloci, missingness, migmat, outdir, rep):\n", " ts_list = simulate_nloci(nloci, migmat=migmat)\n", " write_vcf(ts_list, \"./{}/{}-sim{}.vcf\".format(outdir, model, rep), unlinked=False)\n", " simulate_missing(\"{}/{}-sim{}.vcf\".format(outdir, model, rep), avg_missing=missingness)\n" ] }, { "cell_type": "code", "execution_count": 242, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5\n" ] } ], "source": [ "## Run this in a python2.7 env and wait for clients to attach\n", "#!ipcluster start -n 4 --cluster-id=ipp2 --daemonize\n", "ipyclient = ipp.Client(cluster_id=\"ipp2\")\n", "print(len(ipyclient.ids))" ] }, { "cell_type": "code", "execution_count": 244, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [####################] 100% Simulating vcf files | 0:00:01 \n", " [####################] 100% Simulating vcf files | 0:00:01 \n", " [####################] 100% Simulating vcf files | 0:00:01 \n" ] } ], "source": [ "nsims = 5\n", "nloci = 1000\n", "missingness = .3\n", "models = {\"no_migration\":[[0, 0], [0, 0]],\n", " \"symmetric_migration\":[[0, 1e-5], [1e-5, 0]],\n", " \"asymmetric_migration\":[[0, 1e-5], [1e-6, 0]]}\n", "\n", "ipyclient[:].use_dill()\n", "ipyclient[:].push(dict(simulate_nloci=simulate_nloci, write_vcf=write_vcf, simulate_missing=simulate_missing))\n", "lbview = ipyclient.load_balanced_view()\n", "parallel_jobs = {}\n", "\n", "start = time.time()\n", "printstr = \" Simulating vcf files | {}\"\n", "for model, migmat in models.items():\n", " outdir = \"./{}/{}\".format(simout, model)\n", " if not os.path.exists(outdir):\n", " os.mkdir(outdir)\n", " for rep in xrange(nsims):\n", " parallel_jobs[rep] = lbview.apply(sim_vcf, *(model, nloci, missingness, migmat, outdir, rep))\n", " while 1:\n", " fin = [i.ready() for i in parallel_jobs.values()]\n", " elapsed = datetime.timedelta(seconds=int(time.time()-start))\n", " progressbar(len(fin), sum(fin),\n", " printstr.format(elapsed))\n", " time.sleep(0.1)\n", " if len(fin) == sum(fin):\n", " print(\"\")\n", " break" ] }, { "cell_type": "code", "execution_count": 250, "metadata": {}, "outputs": [ { "ename": "RemoteError", "evalue": "NameError(global name 'simulate_nloci' is not defined)", "output_type": "error", "traceback": [ "\u001b[0;31m\u001b[0m\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36msim_vcf\u001b[0;34m(model, nloci, missingness, migmat, outdir, rep)\u001b[0m", "\u001b[0;31mNameError\u001b[0m: global name 'simulate_nloci' is not defined" ] } ], "source": [ "#parallel_jobs[2].result()\n", "ipyclient[0].push(dict(simulate_nloci=simulate_nloci, write_vcf=write_vcf, simulate_missing=simulate_missing))\n", "wat = ipyclient[0].apply(sim_vcf, *(model, nloci, .3, migmat, \"/tmp/\", 0))\n", "print(wat.result())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Crap below here" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.3044425 0.29402902 0.2848071 0.28072836 0.32856761 0.2867087\n", " 0.28905754 0.28444419]\n", "[0.13055066 0.27542815 0.31284536 0.08255794 0.0181362 0.28084621\n", " 0.25152279 0.01150193]\n", "6\t60\t.\tA\tT\t.\tPASS\t.\tGT\t1|0\t0|0\t0|0\t1|0\t0|0\t0|0\t0|0\t0|0\n", "\n" ] } ], "source": [ "avg_missing = 0.3\n", "std_missing = 0.05\n", "\n", "missingness = np.random.normal(avg_missing, std_missing, 8)\n", "print(missingness)\n", "draw = np.random.uniform(size=8)\n", "print(draw)\n", "ts = ts_list[1]\n", "with TemporaryFile() as outtmp:\n", " ts.write_vcf(outtmp, ploidy=2, contig_id=str(i+2))\n", " outtmp.seek(0)\n", " ## Get rid of the 6 lines of vcf header\n", " dat = outtmp.readlines()[6:]\n", " print(dat[0])" ] }, { "cell_type": "code", "execution_count": 288, "metadata": {}, "outputs": [], "source": [ "\n", "ts_list = simulate_nloci(1000)\n", "write_vcf(ts_list, \"./1000loci.vcf\", unlinked=True)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }