We are hiring ! See our job offers.
Raw File
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (demography.py, line 369)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  File \u001b[0;32m\"/home/isaac/miniconda2/lib/python2.7/site-packages/momi-2.1.15-py2.7-linux-x86_64.egg/momi/demography.py\"\u001b[0;36m, line \u001b[0;32m369\u001b[0m\n\u001b[0;31m    print(chrom_name, 0, length, sep=\"\\t\", file=bed_f)\u001b[0m\n\u001b[0m                                    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from itertools import product\n",
    "from collections import Counter, OrderedDict\n",
    "import json\n",
    "import momi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read the dadi sfs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "global name 'momi' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-103-ac630a0ea97a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     79\u001b[0m     \u001b[0msfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m \u001b[0;31m#dadi_to_momi(infile, verbose=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0mdadi_to_momi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/isaac/easySFS/jupyter-notebooks/1000_folded/dadi/pop1-pop2.sfs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     82\u001b[0m \u001b[0mdadi_to_momi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/isaac/easySFS/jupyter-notebooks/1000_unfolded/dadi/pop1-pop2.sfs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-103-ac630a0ea97a>\u001b[0m in \u001b[0;36mdadi_to_momi\u001b[0;34m(infile, outfile, verbose)\u001b[0m\n\u001b[1;32m     74\u001b[0m         \u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmomi_sfs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m     \u001b[0;31m## make it pretty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m     \u001b[0msfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmomi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     77\u001b[0m     \u001b[0;31m## Fold if unfolded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mfolded\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: global name 'momi' is not defined"
     ]
    }
   ],
   "source": [
    "def dadi_to_momi(infile, outfile=None, verbose=False):\n",
    "    if outfile == None:\n",
    "        outfile = infile.split(\".sfs\")[0] + \"_momi.sfs\"\n",
    "    dat = open(infile).readlines()\n",
    "    ## Get rid of comment lines\n",
    "    dat = [x.strip() for x in dat if not x.startswith(\"#\")]\n",
    "    if not len(dat) == 3:\n",
    "        raise Exception(\"Malformed dadi sfs {}.\\n  Must have 3 lines, yours has {}\".format(infile, len(dat)))\n",
    "\n",
    "    ## Parse the info line into nsamps per pop (list of ints), folding flag, and pop names list (if they are given)\n",
    "    info = dat[0].split()\n",
    "    nsamps = []\n",
    "    ## Keep carving off values as long as they cast successfully as int\n",
    "    for i in info:\n",
    "        try:\n",
    "            nsamps.append(int(i))\n",
    "        except:\n",
    "            pass\n",
    "    nsamps = np.array(nsamps)\n",
    "    pops = [x.replace('\"', '') for x in info[len(nsamps)+1:]]\n",
    "    folded = info[len(nsamps)]\n",
    "    folded = False if \"un\" in folded else True\n",
    "    if not len(pops) == len(nsamps):\n",
    "        print(\"Number of populations doesn't agree with number of pop names, using generic names.\")\n",
    "        pops = [\"pop\"+x for x in range(len(nsamps))]\n",
    "    if verbose: print(\"Info nsamps={} folded={} pops={}\".format(nsamps, folded, pops))\n",
    "\n",
    "    ## Get mask\n",
    "    mask = list(map(int, dat[2].split()))\n",
    "    if verbose: print(mask)\n",
    "\n",
    "    ## Get sfs, and reshape based on sample sizes\n",
    "    sfs = list(map(float, dat[1].split()))\n",
    "    if verbose: print(sfs)\n",
    "    length = np.ma.array(sfs, mask=mask).sum()\n",
    "    sfs = np.array(sfs).reshape(nsamps)\n",
    "    if verbose: print(\"length {}\".format(length))\n",
    "    if verbose: print(sfs)\n",
    "\n",
    "    ## Get counts per sfs bin\n",
    "    counts = Counter()\n",
    "    for sfsbin in product(*[range(y) for y in [x for x in nsamps]]):\n",
    "        ## Ignore monomorphic snps\n",
    "        ## nsamps minus 1 here because of the off by one diff between number\n",
    "        ## of bins in the sfs and actual number of samples\n",
    "        if sfsbin == tuple(nsamps-1) or sfsbin == tuple([0] * len(nsamps)):\n",
    "            continue\n",
    "        ## ignore zero bin counts\n",
    "        if sfs[sfsbin] == 0:\n",
    "            continue\n",
    "        if verbose: print(sfsbin, sfs[sfsbin]),\n",
    "        counts.update({sfsbin:sfs[sfsbin]})\n",
    "    if verbose: print(\"nbins {}\".format(len(counts)))\n",
    "\n",
    "    ## Convert SFS bin style into momi config style\n",
    "    configs = pd.DataFrame(index=range(len(counts)), columns=pops)\n",
    "    \n",
    "    locus_info = []\n",
    "    for i, c in enumerate(counts):\n",
    "        ## (n-1) here because nbins in dadi sfs is n+1\n",
    "        cfg = np.array([[(n-1)-x, x] for x,n in zip(c, nsamps)])\n",
    "        configs.iloc[i] = [list(map(int, list(x))) for x in cfg]\n",
    "        locus_info.append([0, i, counts[c]])\n",
    "    if verbose: print(\"n_snps {}\".format(np.sum([x[2] for x in locus_info])))\n",
    "\n",
    "    ## Finally build the momi style sfs dictionary and write it out\n",
    "    momi_sfs = {\"sampled_pops\":pops,\n",
    "        \"folded\":folded,\n",
    "        \"length\":int(length),\n",
    "        \"configs\":configs.values.tolist(),\n",
    "        \"(locus,config_id,count)\":locus_info}\n",
    "\n",
    "    with open(outfile, 'w') as out:\n",
    "        out.write(\"{}\".format(json.dumps(momi_sfs)))\n",
    "    ## make it pretty\n",
    "    sfs = momi.Sfs.load(outfile)\n",
    "    ## Fold if unfolded\n",
    "    if folded: sfs = sfs.fold()\n",
    "    sfs.dump(outfile)\n",
    "#dadi_to_momi(infile, verbose=True)\n",
    "dadi_to_momi(\"/home/isaac/easySFS/jupyter-notebooks/1000_folded/dadi/pop1-pop2.sfs\", verbose=False)\n",
    "dadi_to_momi(\"/home/isaac/easySFS/jupyter-notebooks/1000_unfolded/dadi/pop1-pop2.sfs\", verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, array([list([7, 1]), list([5, 3])], dtype=object))\n",
      "(1, array([list([5, 3]), list([8, 0])], dtype=object))\n",
      "(2, array([list([8, 0]), list([0, 8])], dtype=object))\n",
      "(3, array([list([8, 0]), list([1, 7])], dtype=object))\n",
      "(4, array([list([6, 2]), list([2, 6])], dtype=object))\n",
      "(5, array([list([7, 1]), list([2, 6])], dtype=object))\n",
      "(6, array([list([5, 3]), list([1, 7])], dtype=object))\n",
      "(7, array([list([6, 2]), list([3, 5])], dtype=object))\n",
      "(8, array([list([8, 0]), list([5, 3])], dtype=object))\n",
      "(9, array([list([4, 4]), list([8, 0])], dtype=object))\n",
      "(10, array([list([5, 3]), list([5, 3])], dtype=object))\n",
      "(11, array([list([8, 0]), list([2, 6])], dtype=object))\n",
      "(12, array([list([7, 1]), list([3, 5])], dtype=object))\n",
      "(13, array([list([5, 3]), list([0, 8])], dtype=object))\n",
      "(14, array([list([6, 2]), list([6, 2])], dtype=object))\n",
      "(15, array([list([5, 3]), list([3, 5])], dtype=object))\n",
      "(16, array([list([4, 4]), list([7, 1])], dtype=object))\n",
      "(17, array([list([7, 1]), list([7, 1])], dtype=object))\n",
      "(18, array([list([6, 2]), list([2, 6])], dtype=object))\n",
      "(19, array([list([8, 0]), list([4, 4])], dtype=object))\n",
      "(20, array([list([6, 2]), list([0, 8])], dtype=object))\n",
      "(21, array([list([7, 1]), list([4, 4])], dtype=object))\n",
      "(22, array([list([8, 0]), list([3, 5])], dtype=object))\n",
      "(23, array([list([6, 2]), list([7, 1])], dtype=object))\n",
      "(24, array([list([4, 4]), list([6, 2])], dtype=object))\n",
      "(25, array([list([7, 1]), list([8, 0])], dtype=object))\n",
      "(26, array([list([8, 0]), list([0, 8])], dtype=object))\n",
      "(27, array([list([5, 3]), list([3, 5])], dtype=object))\n",
      "(28, array([list([8, 0]), list([7, 1])], dtype=object))\n",
      "(29, array([list([7, 1]), list([0, 8])], dtype=object))\n",
      "(30, array([list([5, 3]), list([2, 6])], dtype=object))\n",
      "(31, array([list([6, 2]), list([1, 7])], dtype=object))\n",
      "(32, array([list([8, 0]), list([6, 2])], dtype=object))\n",
      "(33, array([list([6, 2]), list([8, 0])], dtype=object))\n",
      "(34, array([list([5, 3]), list([4, 4])], dtype=object))\n",
      "(35, array([list([6, 2]), list([4, 4])], dtype=object))\n"
     ]
    }
   ],
   "source": [
    "d = OrderedDict([('[[7 1]\\n [5 3]]', 2), ('[[5 3]\\n [8 0]]', 50), ('[[8 0]\\n [0 8]]', 74), ('[[8 0]\\n [1 7]]', 18), ('[[6 2]\\n [2 6]]', 2), ('[[7 1]\\n [2 6]]', 1), ('[[5 3]\\n [1 7]]', 1), ('[[6 2]\\n [3 5]]', 2), ('[[8 0]\\n [5 3]]', 48), ('[[4 4]\\n [8 0]]', 32), ('[[5 3]\\n [5 3]]', 1), ('[[8 0]\\n [2 6]]', 23), ('[[7 1]\\n [3 5]]', 4), ('[[5 3]\\n [0 8]]', 38), ('[[6 2]\\n [6 2]]', 1), ('[[5 3]\\n [3 5]]', 2), ('[[4 4]\\n [7 1]]', 2), ('[[7 1]\\n [7 1]]', 2), ('[[8 0]\\n [4 4]]', 32), ('[[6 2]\\n [0 8]]', 23), ('[[7 1]\\n [4 4]]', 3), ('[[8 0]\\n [3 5]]', 28), ('[[6 2]\\n [7 1]]', 1), ('[[4 4]\\n [6 2]]', 1), ('[[7 1]\\n [8 0]]', 185), ('[[8 0]\\n [7 1]]', 185), ('[[7 1]\\n [0 8]]', 17), ('[[5 3]\\n [2 6]]', 2), ('[[6 2]\\n [1 7]]', 4), ('[[8 0]\\n [6 2]]', 66), ('[[6 2]\\n [8 0]]', 87), ('[[5 3]\\n [4 4]]', 2), ('[[6 2]\\n [4 4]]', 2)])\n",
    "for i, row in configs.iterrows():\n",
    "    print(i, row.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['5 5 unfolded \"nuttalli\" \"pugetensis\"',\n",
       " '776.1460592424144 115.1222178069079 24.96239616420053 6.218025728002017 1.073392240447087 107.7264883754097 40.66889634511905 18.65179284371549 6.329399414025897 1.065189279571691 26.17961131785413 19.70143248690903 12.07926204967499 4.544540090166469 0.7711291771526296 6.954173763250236 7.96663510183704 5.391325472681568 2.026077574941607 0.3284864310491528 1.410459329911039 1.890674276442391 1.269396064688684 0.4543005907520786 0.06863883286683978',\n",
       " '1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dat = open(infile).readlines()\n",
    "## Get rid of comment lines\n",
    "dat = [x.strip() for x in dat if not x.startswith(\"#\")]\n",
    "if not len(dat) == 3:\n",
    "    print(\"Malformed dadi sfs. Must have 3 lines, yours has {}\".format(len(dat)))\n",
    "dat\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([5, 5]), 'unfolded', ['\"nuttalli\"', '\"pugetensis\"'])\n",
      "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]\n",
      "[776, 115, 25, 6, 1, 108, 41, 19, 6, 1, 26, 20, 12, 5, 1, 7, 8, 5, 2, 0, 1, 2, 1, 0, 0]\n",
      "length 412\n",
      "[[776 115  25   6   1]\n",
      " [108  41  19   6   1]\n",
      " [ 26  20  12   5   1]\n",
      " [  7   8   5   2   0]\n",
      " [  1   2   1   0   0]]\n"
     ]
    }
   ],
   "source": [
    "## Get nsamps\n",
    "info = dat[0].split()\n",
    "nsamps = []\n",
    "for i in info:\n",
    "    try:\n",
    "        nsamps.append(int(i))\n",
    "    except:\n",
    "        pass\n",
    "nsamps = np.array(nsamps)\n",
    "pops = info[len(nsamps)+1:]\n",
    "folded = info[len(nsamps)]\n",
    "if not len(pops) == len(nsamps):\n",
    "    print(\"Number of populations doesn't agree with number of pop names, using generic names.\")\n",
    "    pops = [\"pop\"+str(x) for x in range(len(nsamps))]\n",
    "print(nsamps, folded, pops)\n",
    "\n",
    "## Get mask\n",
    "mask = map(int, dat[2].split())\n",
    "print(mask)\n",
    "if np.sum(mask) > 2:\n",
    "    print(\"momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\")\n",
    "\n",
    "## Get sfs\n",
    "sfs = map(int, map(np.round, map(float, dat[1].split())))\n",
    "print(sfs)\n",
    "length = np.ma.array(sfs, mask=mask).sum()\n",
    "sfs = np.array(sfs).reshape(nsamps)\n",
    "print(\"length {}\".format(length))\n",
    "print(sfs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((0, 1), 115)\n",
      "((0, 2), 25)\n",
      "((0, 3), 6)\n",
      "((0, 4), 1)\n",
      "((1, 0), 108)\n",
      "((1, 1), 41)\n",
      "((1, 2), 19)\n",
      "((1, 3), 6)\n",
      "((1, 4), 1)\n",
      "((2, 0), 26)\n",
      "((2, 1), 20)\n",
      "((2, 2), 12)\n",
      "((2, 3), 5)\n",
      "((2, 4), 1)\n",
      "((3, 0), 7)\n",
      "((3, 1), 8)\n",
      "((3, 2), 5)\n",
      "((3, 3), 2)\n",
      "((4, 0), 1)\n",
      "((4, 1), 2)\n",
      "((4, 2), 1)\n"
     ]
    }
   ],
   "source": [
    "from collections import Counter\n",
    "counts = Counter()\n",
    "for sfsbin in product(*[range(y) for y in [x for x in nsamps]]):\n",
    "    ## Ignore monomorphic snps\n",
    "    ## nsamps minus 1 here because of the off by one diff between number\n",
    "    ## of bins in the sfs and actual number of samples\n",
    "    if sfsbin == tuple(nsamps-1) or sfsbin == tuple([0] * len(nsamps)):\n",
    "        continue\n",
    "    ## ignore zero bin counts\n",
    "    if sfs[sfsbin] == 0:\n",
    "        continue\n",
    "    print(sfsbin, sfs[sfsbin])\n",
    "    counts.update({sfsbin:sfs[sfsbin]})\n",
    "counts\n",
    "configs = pd.DataFrame(index=range(len(counts)), columns=pops)\n",
    "\n",
    "#print(configs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[0, 0, 1],\n",
       " [1, 0, 1],\n",
       " [2, 0, 1],\n",
       " [3, 0, 1],\n",
       " [4, 0, 1],\n",
       " [5, 0, 1],\n",
       " [6, 0, 1],\n",
       " [7, 0, 1],\n",
       " [8, 0, 1],\n",
       " [9, 0, 1],\n",
       " [10, 0, 1],\n",
       " [11, 0, 1],\n",
       " [12, 0, 1],\n",
       " [13, 0, 1],\n",
       " [14, 0, 1],\n",
       " [15, 0, 1],\n",
       " [16, 0, 1],\n",
       " [17, 0, 1],\n",
       " [18, 0, 1],\n",
       " [19, 0, 1],\n",
       " [20, 0, 1],\n",
       " [21, 0, 1],\n",
       " [22, 0, 1],\n",
       " [23, 0, 1],\n",
       " [24, 0, 1],\n",
       " [25, 0, 1],\n",
       " [26, 0, 1],\n",
       " [27, 0, 1],\n",
       " [28, 0, 1],\n",
       " [29, 0, 1],\n",
       " [30, 0, 1],\n",
       " [31, 0, 1],\n",
       " [32, 0, 1],\n",
       " [33, 0, 1],\n",
       " [34, 0, 1],\n",
       " [35, 0, 1],\n",
       " [36, 0, 1],\n",
       " [37, 0, 1],\n",
       " [38, 0, 1],\n",
       " [39, 0, 1],\n",
       " [40, 0, 1],\n",
       " [41, 0, 1],\n",
       " [42, 0, 1],\n",
       " [43, 0, 1],\n",
       " [44, 0, 1],\n",
       " [45, 0, 1],\n",
       " [46, 0, 1],\n",
       " [47, 0, 1],\n",
       " [48, 0, 1],\n",
       " [49, 0, 1],\n",
       " [50, 0, 1],\n",
       " [51, 0, 1],\n",
       " [52, 0, 1],\n",
       " [53, 0, 1],\n",
       " [54, 0, 1],\n",
       " [55, 0, 1],\n",
       " [56, 0, 1],\n",
       " [57, 0, 1],\n",
       " [58, 0, 1],\n",
       " [59, 0, 1],\n",
       " [60, 0, 1],\n",
       " [61, 0, 1],\n",
       " [62, 0, 1],\n",
       " [63, 0, 1],\n",
       " [64, 0, 1],\n",
       " [65, 0, 1],\n",
       " [66, 0, 1],\n",
       " [67, 0, 1],\n",
       " [68, 0, 1],\n",
       " [69, 0, 1],\n",
       " [70, 0, 1],\n",
       " [71, 0, 1],\n",
       " [72, 0, 1],\n",
       " [73, 0, 1],\n",
       " [74, 0, 1],\n",
       " [75, 0, 1],\n",
       " [76, 0, 1],\n",
       " [77, 0, 1],\n",
       " [78, 0, 1],\n",
       " [79, 0, 1],\n",
       " [80, 0, 1],\n",
       " [81, 0, 1],\n",
       " [82, 0, 1],\n",
       " [83, 0, 1],\n",
       " [84, 0, 1],\n",
       " [85, 0, 1],\n",
       " [86, 0, 1],\n",
       " [87, 0, 1],\n",
       " [88, 0, 1],\n",
       " [89, 0, 1],\n",
       " [90, 0, 1],\n",
       " [91, 0, 1],\n",
       " [92, 0, 1],\n",
       " [93, 0, 1],\n",
       " [94, 0, 1],\n",
       " [95, 0, 1],\n",
       " [96, 0, 1],\n",
       " [97, 0, 1],\n",
       " [98, 0, 1],\n",
       " [99, 0, 1],\n",
       " [100, 0, 1],\n",
       " [101, 0, 1],\n",
       " [102, 0, 1],\n",
       " [103, 0, 1],\n",
       " [104, 0, 1],\n",
       " [105, 0, 1],\n",
       " [106, 0, 1],\n",
       " [107, 0, 1],\n",
       " [108, 0, 1],\n",
       " [109, 0, 1],\n",
       " [110, 0, 1],\n",
       " [111, 0, 1],\n",
       " [112, 0, 1],\n",
       " [113, 0, 1],\n",
       " [114, 0, 1],\n",
       " [115, 1, 1],\n",
       " [116, 1, 1],\n",
       " [117, 1, 1],\n",
       " [118, 1, 1],\n",
       " [119, 1, 1],\n",
       " [120, 1, 1],\n",
       " [121, 1, 1],\n",
       " [122, 1, 1],\n",
       " [123, 1, 1],\n",
       " [124, 1, 1],\n",
       " [125, 1, 1],\n",
       " [126, 1, 1],\n",
       " [127, 1, 1],\n",
       " [128, 1, 1],\n",
       " [129, 1, 1],\n",
       " [130, 1, 1],\n",
       " [131, 1, 1],\n",
       " [132, 1, 1],\n",
       " [133, 1, 1],\n",
       " [134, 2, 1],\n",
       " [135, 2, 1],\n",
       " [136, 2, 1],\n",
       " [137, 2, 1],\n",
       " [138, 2, 1],\n",
       " [139, 3, 1],\n",
       " [140, 3, 1],\n",
       " [141, 3, 1],\n",
       " [142, 3, 1],\n",
       " [143, 3, 1],\n",
       " [144, 3, 1],\n",
       " [145, 4, 1],\n",
       " [146, 4, 1],\n",
       " [147, 4, 1],\n",
       " [148, 4, 1],\n",
       " [149, 4, 1],\n",
       " [150, 4, 1],\n",
       " [151, 4, 1],\n",
       " [152, 4, 1],\n",
       " [153, 5, 1],\n",
       " [154, 5, 1],\n",
       " [155, 6, 1],\n",
       " [156, 6, 1],\n",
       " [157, 6, 1],\n",
       " [158, 6, 1],\n",
       " [159, 6, 1],\n",
       " [160, 6, 1],\n",
       " [161, 6, 1],\n",
       " [162, 7, 1],\n",
       " [163, 8, 1],\n",
       " [164, 8, 1],\n",
       " [165, 8, 1],\n",
       " [166, 8, 1],\n",
       " [167, 8, 1],\n",
       " [168, 8, 1],\n",
       " [169, 8, 1],\n",
       " [170, 8, 1],\n",
       " [171, 8, 1],\n",
       " [172, 8, 1],\n",
       " [173, 8, 1],\n",
       " [174, 8, 1],\n",
       " [175, 9, 1],\n",
       " [176, 10, 1],\n",
       " [177, 10, 1],\n",
       " [178, 10, 1],\n",
       " [179, 10, 1],\n",
       " [180, 10, 1],\n",
       " [181, 10, 1],\n",
       " [182, 10, 1],\n",
       " [183, 10, 1],\n",
       " [184, 10, 1],\n",
       " [185, 10, 1],\n",
       " [186, 10, 1],\n",
       " [187, 10, 1],\n",
       " [188, 10, 1],\n",
       " [189, 10, 1],\n",
       " [190, 10, 1],\n",
       " [191, 10, 1],\n",
       " [192, 10, 1],\n",
       " [193, 10, 1],\n",
       " [194, 10, 1],\n",
       " [195, 10, 1],\n",
       " [196, 10, 1],\n",
       " [197, 10, 1],\n",
       " [198, 10, 1],\n",
       " [199, 10, 1],\n",
       " [200, 10, 1],\n",
       " [201, 11, 1],\n",
       " [202, 11, 1],\n",
       " [203, 11, 1],\n",
       " [204, 11, 1],\n",
       " [205, 11, 1],\n",
       " [206, 11, 1],\n",
       " [207, 11, 1],\n",
       " [208, 11, 1],\n",
       " [209, 11, 1],\n",
       " [210, 11, 1],\n",
       " [211, 11, 1],\n",
       " [212, 11, 1],\n",
       " [213, 11, 1],\n",
       " [214, 11, 1],\n",
       " [215, 11, 1],\n",
       " [216, 11, 1],\n",
       " [217, 11, 1],\n",
       " [218, 11, 1],\n",
       " [219, 11, 1],\n",
       " [220, 11, 1],\n",
       " [221, 11, 1],\n",
       " [222, 11, 1],\n",
       " [223, 11, 1],\n",
       " [224, 11, 1],\n",
       " [225, 11, 1],\n",
       " [226, 11, 1],\n",
       " [227, 12, 1],\n",
       " [228, 12, 1],\n",
       " [229, 12, 1],\n",
       " [230, 12, 1],\n",
       " [231, 12, 1],\n",
       " [232, 13, 1],\n",
       " [233, 13, 1],\n",
       " [234, 13, 1],\n",
       " [235, 13, 1],\n",
       " [236, 13, 1],\n",
       " [237, 13, 1],\n",
       " [238, 13, 1],\n",
       " [239, 13, 1],\n",
       " [240, 13, 1],\n",
       " [241, 13, 1],\n",
       " [242, 13, 1],\n",
       " [243, 13, 1],\n",
       " [244, 13, 1],\n",
       " [245, 13, 1],\n",
       " [246, 13, 1],\n",
       " [247, 13, 1],\n",
       " [248, 13, 1],\n",
       " [249, 13, 1],\n",
       " [250, 13, 1],\n",
       " [251, 13, 1],\n",
       " [252, 14, 1],\n",
       " [253, 15, 1],\n",
       " [254, 15, 1],\n",
       " [255, 15, 1],\n",
       " [256, 15, 1],\n",
       " [257, 15, 1],\n",
       " [258, 15, 1],\n",
       " [259, 15, 1],\n",
       " [260, 15, 1],\n",
       " [261, 15, 1],\n",
       " [262, 15, 1],\n",
       " [263, 15, 1],\n",
       " [264, 15, 1],\n",
       " [265, 15, 1],\n",
       " [266, 15, 1],\n",
       " [267, 15, 1],\n",
       " [268, 15, 1],\n",
       " [269, 15, 1],\n",
       " [270, 15, 1],\n",
       " [271, 15, 1],\n",
       " [272, 15, 1],\n",
       " [273, 15, 1],\n",
       " [274, 15, 1],\n",
       " [275, 15, 1],\n",
       " [276, 15, 1],\n",
       " [277, 15, 1],\n",
       " [278, 15, 1],\n",
       " [279, 15, 1],\n",
       " [280, 15, 1],\n",
       " [281, 15, 1],\n",
       " [282, 15, 1],\n",
       " [283, 15, 1],\n",
       " [284, 15, 1],\n",
       " [285, 15, 1],\n",
       " [286, 15, 1],\n",
       " [287, 15, 1],\n",
       " [288, 15, 1],\n",
       " [289, 15, 1],\n",
       " [290, 15, 1],\n",
       " [291, 15, 1],\n",
       " [292, 15, 1],\n",
       " [293, 15, 1],\n",
       " [294, 15, 1],\n",
       " [295, 15, 1],\n",
       " [296, 15, 1],\n",
       " [297, 15, 1],\n",
       " [298, 15, 1],\n",
       " [299, 15, 1],\n",
       " [300, 15, 1],\n",
       " [301, 15, 1],\n",
       " [302, 15, 1],\n",
       " [303, 15, 1],\n",
       " [304, 15, 1],\n",
       " [305, 15, 1],\n",
       " [306, 15, 1],\n",
       " [307, 15, 1],\n",
       " [308, 15, 1],\n",
       " [309, 15, 1],\n",
       " [310, 15, 1],\n",
       " [311, 15, 1],\n",
       " [312, 15, 1],\n",
       " [313, 15, 1],\n",
       " [314, 15, 1],\n",
       " [315, 15, 1],\n",
       " [316, 15, 1],\n",
       " [317, 15, 1],\n",
       " [318, 15, 1],\n",
       " [319, 15, 1],\n",
       " [320, 15, 1],\n",
       " [321, 15, 1],\n",
       " [322, 15, 1],\n",
       " [323, 15, 1],\n",
       " [324, 15, 1],\n",
       " [325, 15, 1],\n",
       " [326, 15, 1],\n",
       " [327, 15, 1],\n",
       " [328, 15, 1],\n",
       " [329, 15, 1],\n",
       " [330, 15, 1],\n",
       " [331, 15, 1],\n",
       " [332, 15, 1],\n",
       " [333, 15, 1],\n",
       " [334, 15, 1],\n",
       " [335, 15, 1],\n",
       " [336, 15, 1],\n",
       " [337, 15, 1],\n",
       " [338, 15, 1],\n",
       " [339, 15, 1],\n",
       " [340, 15, 1],\n",
       " [341, 15, 1],\n",
       " [342, 15, 1],\n",
       " [343, 15, 1],\n",
       " [344, 15, 1],\n",
       " [345, 15, 1],\n",
       " [346, 15, 1],\n",
       " [347, 15, 1],\n",
       " [348, 15, 1],\n",
       " [349, 15, 1],\n",
       " [350, 15, 1],\n",
       " [351, 15, 1],\n",
       " [352, 15, 1],\n",
       " [353, 15, 1],\n",
       " [354, 15, 1],\n",
       " [355, 15, 1],\n",
       " [356, 15, 1],\n",
       " [357, 15, 1],\n",
       " [358, 15, 1],\n",
       " [359, 15, 1],\n",
       " [360, 15, 1],\n",
       " [361, 16, 1],\n",
       " [362, 17, 1],\n",
       " [363, 17, 1],\n",
       " [364, 17, 1],\n",
       " [365, 17, 1],\n",
       " [366, 17, 1],\n",
       " [367, 17, 1],\n",
       " [368, 18, 1],\n",
       " [369, 18, 1],\n",
       " [370, 19, 1],\n",
       " [371, 19, 1],\n",
       " [372, 19, 1],\n",
       " [373, 19, 1],\n",
       " [374, 19, 1],\n",
       " [375, 19, 1],\n",
       " [376, 19, 1],\n",
       " [377, 19, 1],\n",
       " [378, 19, 1],\n",
       " [379, 19, 1],\n",
       " [380, 19, 1],\n",
       " [381, 19, 1],\n",
       " [382, 19, 1],\n",
       " [383, 19, 1],\n",
       " [384, 19, 1],\n",
       " [385, 19, 1],\n",
       " [386, 19, 1],\n",
       " [387, 19, 1],\n",
       " [388, 19, 1],\n",
       " [389, 19, 1],\n",
       " [390, 19, 1],\n",
       " [391, 19, 1],\n",
       " [392, 19, 1],\n",
       " [393, 19, 1],\n",
       " [394, 19, 1],\n",
       " [395, 19, 1],\n",
       " [396, 19, 1],\n",
       " [397, 19, 1],\n",
       " [398, 19, 1],\n",
       " [399, 19, 1],\n",
       " [400, 19, 1],\n",
       " [401, 19, 1],\n",
       " [402, 19, 1],\n",
       " [403, 19, 1],\n",
       " [404, 19, 1],\n",
       " [405, 19, 1],\n",
       " [406, 19, 1],\n",
       " [407, 19, 1],\n",
       " [408, 19, 1],\n",
       " [409, 19, 1],\n",
       " [410, 19, 1],\n",
       " [411, 20, 1]]"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "locus_count = 0\n",
    "locus_info = []\n",
    "for i, c in enumerate(counts):\n",
    "    ## (n-1) here because nbins in dadi sfs is n+1\n",
    "    configs.iloc[i] = [[(n-1)-x, x] for x,n in zip(c, nsamps)]\n",
    "    locus_info.extend([x, i, 1] for x in range(locus_count, locus_count+counts[c]))\n",
    "    locus_count += counts[c]\n",
    "configs\n",
    "locus_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "momi_sfs = {\"sampled_pops\":pops,\n",
    "        \"folded\":folded,\n",
    "        \"length\":length,\n",
    "        \"configs\":configs.values.tolist(),\n",
    "        \"(locus,config_id,count)\":locus_info}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"/tmp/watdo.sfs\", 'w') as outfile:\n",
    "    outfile.write(\"{}\".format(momi_sfs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Crap below here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/isaac/easySFS/jupyter-notebooks\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
     ]
    }
   ],
   "source": [
    "!pwd\n",
    "!../easySFS.py -i 25loci.vcf -p simout/simpops.txt --proj 8,8 -o 25_folded -f\n",
    "!../easySFS.py -i 25loci.vcf -p simout/simpops.txt --proj 8,8 -o 25_unfolded --unfolded -f\n",
    "dadi_to_momi(\"./25_folded/dadi/pop1-pop2.sfs\")\n",
    "dadi_to_momi(\"./25_unfolded/dadi/pop1-pop2.sfs\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/isaac/easySFS/jupyter-notebooks\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
     ]
    }
   ],
   "source": [
    "!pwd\n",
    "!../easySFS.py -i 100loci.vcf -p simout/simpops.txt --proj 8,8 -o 100_folded -f\n",
    "!../easySFS.py -i 100loci.vcf -p simout/simpops.txt --proj 8,8 -o 100_unfolded --unfolded -f\n",
    "dadi_to_momi(\"./100_folded/dadi/pop1-pop2.sfs\")\n",
    "dadi_to_momi(\"./100_unfolded/dadi/pop1-pop2.sfs\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/isaac/easySFS/jupyter-notebooks\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "Processing 2 populations - ['pop1', 'pop2']\n",
      "  Sampling one snp per locus\n",
      "Doing 1D sfs - pop1\n",
      "Doing 1D sfs - pop2\n",
      "Doing 2D sfs - ('pop1', 'pop2')\n",
      "Doing multiSFS for all pops\n",
      "momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
     ]
    }
   ],
   "source": [
    "!pwd\n",
    "!../easySFS.py -i 1000loci.vcf -p simout/simpops.txt --proj 8,8 -o 1000_folded -f\n",
    "!../easySFS.py -i 1000loci.vcf -p simout/simpops.txt --proj 8,8 -o 1000_unfolded --unfolded -f\n",
    "dadi_to_momi(\"./1000_folded/dadi/pop1-pop2.sfs\")\n",
    "dadi_to_momi(\"./1000_unfolded/dadi/pop1-pop2.sfs\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
back to top