{
"cells": [
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (demography.py, line 369)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"/home/isaac/miniconda2/lib/python2.7/site-packages/momi-2.1.15-py2.7-linux-x86_64.egg/momi/demography.py\"\u001b[0;36m, line \u001b[0;32m369\u001b[0m\n\u001b[0;31m print(chrom_name, 0, length, sep=\"\\t\", file=bed_f)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from itertools import product\n",
"from collections import Counter, OrderedDict\n",
"import json\n",
"import momi"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read the dadi sfs\n"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "global name 'momi' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-103-ac630a0ea97a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0msfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;31m#dadi_to_momi(infile, verbose=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0mdadi_to_momi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/isaac/easySFS/jupyter-notebooks/1000_folded/dadi/pop1-pop2.sfs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m \u001b[0mdadi_to_momi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/isaac/easySFS/jupyter-notebooks/1000_unfolded/dadi/pop1-pop2.sfs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-103-ac630a0ea97a>\u001b[0m in \u001b[0;36mdadi_to_momi\u001b[0;34m(infile, outfile, verbose)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmomi_sfs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;31m## make it pretty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0msfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmomi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0;31m## Fold if unfolded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfolded\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: global name 'momi' is not defined"
]
}
],
"source": [
"def dadi_to_momi(infile, outfile=None, verbose=False):\n",
" if outfile == None:\n",
" outfile = infile.split(\".sfs\")[0] + \"_momi.sfs\"\n",
" dat = open(infile).readlines()\n",
" ## Get rid of comment lines\n",
" dat = [x.strip() for x in dat if not x.startswith(\"#\")]\n",
" if not len(dat) == 3:\n",
" raise Exception(\"Malformed dadi sfs {}.\\n Must have 3 lines, yours has {}\".format(infile, len(dat)))\n",
"\n",
" ## Parse the info line into nsamps per pop (list of ints), folding flag, and pop names list (if they are given)\n",
" info = dat[0].split()\n",
" nsamps = []\n",
" ## Keep carving off values as long as they cast successfully as int\n",
" for i in info:\n",
" try:\n",
" nsamps.append(int(i))\n",
" except:\n",
" pass\n",
" nsamps = np.array(nsamps)\n",
" pops = [x.replace('\"', '') for x in info[len(nsamps)+1:]]\n",
" folded = info[len(nsamps)]\n",
" folded = False if \"un\" in folded else True\n",
" if not len(pops) == len(nsamps):\n",
" print(\"Number of populations doesn't agree with number of pop names, using generic names.\")\n",
" pops = [\"pop\"+x for x in range(len(nsamps))]\n",
" if verbose: print(\"Info nsamps={} folded={} pops={}\".format(nsamps, folded, pops))\n",
"\n",
" ## Get mask\n",
" mask = list(map(int, dat[2].split()))\n",
" if verbose: print(mask)\n",
"\n",
" ## Get sfs, and reshape based on sample sizes\n",
" sfs = list(map(float, dat[1].split()))\n",
" if verbose: print(sfs)\n",
" length = np.ma.array(sfs, mask=mask).sum()\n",
" sfs = np.array(sfs).reshape(nsamps)\n",
" if verbose: print(\"length {}\".format(length))\n",
" if verbose: print(sfs)\n",
"\n",
" ## Get counts per sfs bin\n",
" counts = Counter()\n",
" for sfsbin in product(*[range(y) for y in [x for x in nsamps]]):\n",
" ## Ignore monomorphic snps\n",
" ## nsamps minus 1 here because of the off by one diff between number\n",
" ## of bins in the sfs and actual number of samples\n",
" if sfsbin == tuple(nsamps-1) or sfsbin == tuple([0] * len(nsamps)):\n",
" continue\n",
" ## ignore zero bin counts\n",
" if sfs[sfsbin] == 0:\n",
" continue\n",
" if verbose: print(sfsbin, sfs[sfsbin]),\n",
" counts.update({sfsbin:sfs[sfsbin]})\n",
" if verbose: print(\"nbins {}\".format(len(counts)))\n",
"\n",
" ## Convert SFS bin style into momi config style\n",
" configs = pd.DataFrame(index=range(len(counts)), columns=pops)\n",
" \n",
" locus_info = []\n",
" for i, c in enumerate(counts):\n",
" ## (n-1) here because nbins in dadi sfs is n+1\n",
" cfg = np.array([[(n-1)-x, x] for x,n in zip(c, nsamps)])\n",
" configs.iloc[i] = [list(map(int, list(x))) for x in cfg]\n",
" locus_info.append([0, i, counts[c]])\n",
" if verbose: print(\"n_snps {}\".format(np.sum([x[2] for x in locus_info])))\n",
"\n",
" ## Finally build the momi style sfs dictionary and write it out\n",
" momi_sfs = {\"sampled_pops\":pops,\n",
" \"folded\":folded,\n",
" \"length\":int(length),\n",
" \"configs\":configs.values.tolist(),\n",
" \"(locus,config_id,count)\":locus_info}\n",
"\n",
" with open(outfile, 'w') as out:\n",
" out.write(\"{}\".format(json.dumps(momi_sfs)))\n",
" ## make it pretty\n",
" sfs = momi.Sfs.load(outfile)\n",
" ## Fold if unfolded\n",
" if folded: sfs = sfs.fold()\n",
" sfs.dump(outfile)\n",
"#dadi_to_momi(infile, verbose=True)\n",
"dadi_to_momi(\"/home/isaac/easySFS/jupyter-notebooks/1000_folded/dadi/pop1-pop2.sfs\", verbose=False)\n",
"dadi_to_momi(\"/home/isaac/easySFS/jupyter-notebooks/1000_unfolded/dadi/pop1-pop2.sfs\", verbose=False)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(0, array([list([7, 1]), list([5, 3])], dtype=object))\n",
"(1, array([list([5, 3]), list([8, 0])], dtype=object))\n",
"(2, array([list([8, 0]), list([0, 8])], dtype=object))\n",
"(3, array([list([8, 0]), list([1, 7])], dtype=object))\n",
"(4, array([list([6, 2]), list([2, 6])], dtype=object))\n",
"(5, array([list([7, 1]), list([2, 6])], dtype=object))\n",
"(6, array([list([5, 3]), list([1, 7])], dtype=object))\n",
"(7, array([list([6, 2]), list([3, 5])], dtype=object))\n",
"(8, array([list([8, 0]), list([5, 3])], dtype=object))\n",
"(9, array([list([4, 4]), list([8, 0])], dtype=object))\n",
"(10, array([list([5, 3]), list([5, 3])], dtype=object))\n",
"(11, array([list([8, 0]), list([2, 6])], dtype=object))\n",
"(12, array([list([7, 1]), list([3, 5])], dtype=object))\n",
"(13, array([list([5, 3]), list([0, 8])], dtype=object))\n",
"(14, array([list([6, 2]), list([6, 2])], dtype=object))\n",
"(15, array([list([5, 3]), list([3, 5])], dtype=object))\n",
"(16, array([list([4, 4]), list([7, 1])], dtype=object))\n",
"(17, array([list([7, 1]), list([7, 1])], dtype=object))\n",
"(18, array([list([6, 2]), list([2, 6])], dtype=object))\n",
"(19, array([list([8, 0]), list([4, 4])], dtype=object))\n",
"(20, array([list([6, 2]), list([0, 8])], dtype=object))\n",
"(21, array([list([7, 1]), list([4, 4])], dtype=object))\n",
"(22, array([list([8, 0]), list([3, 5])], dtype=object))\n",
"(23, array([list([6, 2]), list([7, 1])], dtype=object))\n",
"(24, array([list([4, 4]), list([6, 2])], dtype=object))\n",
"(25, array([list([7, 1]), list([8, 0])], dtype=object))\n",
"(26, array([list([8, 0]), list([0, 8])], dtype=object))\n",
"(27, array([list([5, 3]), list([3, 5])], dtype=object))\n",
"(28, array([list([8, 0]), list([7, 1])], dtype=object))\n",
"(29, array([list([7, 1]), list([0, 8])], dtype=object))\n",
"(30, array([list([5, 3]), list([2, 6])], dtype=object))\n",
"(31, array([list([6, 2]), list([1, 7])], dtype=object))\n",
"(32, array([list([8, 0]), list([6, 2])], dtype=object))\n",
"(33, array([list([6, 2]), list([8, 0])], dtype=object))\n",
"(34, array([list([5, 3]), list([4, 4])], dtype=object))\n",
"(35, array([list([6, 2]), list([4, 4])], dtype=object))\n"
]
}
],
"source": [
"d = OrderedDict([('[[7 1]\\n [5 3]]', 2), ('[[5 3]\\n [8 0]]', 50), ('[[8 0]\\n [0 8]]', 74), ('[[8 0]\\n [1 7]]', 18), ('[[6 2]\\n [2 6]]', 2), ('[[7 1]\\n [2 6]]', 1), ('[[5 3]\\n [1 7]]', 1), ('[[6 2]\\n [3 5]]', 2), ('[[8 0]\\n [5 3]]', 48), ('[[4 4]\\n [8 0]]', 32), ('[[5 3]\\n [5 3]]', 1), ('[[8 0]\\n [2 6]]', 23), ('[[7 1]\\n [3 5]]', 4), ('[[5 3]\\n [0 8]]', 38), ('[[6 2]\\n [6 2]]', 1), ('[[5 3]\\n [3 5]]', 2), ('[[4 4]\\n [7 1]]', 2), ('[[7 1]\\n [7 1]]', 2), ('[[8 0]\\n [4 4]]', 32), ('[[6 2]\\n [0 8]]', 23), ('[[7 1]\\n [4 4]]', 3), ('[[8 0]\\n [3 5]]', 28), ('[[6 2]\\n [7 1]]', 1), ('[[4 4]\\n [6 2]]', 1), ('[[7 1]\\n [8 0]]', 185), ('[[8 0]\\n [7 1]]', 185), ('[[7 1]\\n [0 8]]', 17), ('[[5 3]\\n [2 6]]', 2), ('[[6 2]\\n [1 7]]', 4), ('[[8 0]\\n [6 2]]', 66), ('[[6 2]\\n [8 0]]', 87), ('[[5 3]\\n [4 4]]', 2), ('[[6 2]\\n [4 4]]', 2)])\n",
"for i, row in configs.iterrows():\n",
" print(i, row.values)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['5 5 unfolded \"nuttalli\" \"pugetensis\"',\n",
" '776.1460592424144 115.1222178069079 24.96239616420053 6.218025728002017 1.073392240447087 107.7264883754097 40.66889634511905 18.65179284371549 6.329399414025897 1.065189279571691 26.17961131785413 19.70143248690903 12.07926204967499 4.544540090166469 0.7711291771526296 6.954173763250236 7.96663510183704 5.391325472681568 2.026077574941607 0.3284864310491528 1.410459329911039 1.890674276442391 1.269396064688684 0.4543005907520786 0.06863883286683978',\n",
" '1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat = open(infile).readlines()\n",
"## Get rid of comment lines\n",
"dat = [x.strip() for x in dat if not x.startswith(\"#\")]\n",
"if not len(dat) == 3:\n",
" print(\"Malformed dadi sfs. Must have 3 lines, yours has {}\".format(len(dat)))\n",
"dat\n"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(array([5, 5]), 'unfolded', ['\"nuttalli\"', '\"pugetensis\"'])\n",
"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]\n",
"[776, 115, 25, 6, 1, 108, 41, 19, 6, 1, 26, 20, 12, 5, 1, 7, 8, 5, 2, 0, 1, 2, 1, 0, 0]\n",
"length 412\n",
"[[776 115 25 6 1]\n",
" [108 41 19 6 1]\n",
" [ 26 20 12 5 1]\n",
" [ 7 8 5 2 0]\n",
" [ 1 2 1 0 0]]\n"
]
}
],
"source": [
"## Get nsamps\n",
"info = dat[0].split()\n",
"nsamps = []\n",
"for i in info:\n",
" try:\n",
" nsamps.append(int(i))\n",
" except:\n",
" pass\n",
"nsamps = np.array(nsamps)\n",
"pops = info[len(nsamps)+1:]\n",
"folded = info[len(nsamps)]\n",
"if not len(pops) == len(nsamps):\n",
" print(\"Number of populations doesn't agree with number of pop names, using generic names.\")\n",
" pops = [\"pop\"+str(x) for x in range(len(nsamps))]\n",
"print(nsamps, folded, pops)\n",
"\n",
"## Get mask\n",
"mask = map(int, dat[2].split())\n",
"print(mask)\n",
"if np.sum(mask) > 2:\n",
" print(\"momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\")\n",
"\n",
"## Get sfs\n",
"sfs = map(int, map(np.round, map(float, dat[1].split())))\n",
"print(sfs)\n",
"length = np.ma.array(sfs, mask=mask).sum()\n",
"sfs = np.array(sfs).reshape(nsamps)\n",
"print(\"length {}\".format(length))\n",
"print(sfs)\n"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"((0, 1), 115)\n",
"((0, 2), 25)\n",
"((0, 3), 6)\n",
"((0, 4), 1)\n",
"((1, 0), 108)\n",
"((1, 1), 41)\n",
"((1, 2), 19)\n",
"((1, 3), 6)\n",
"((1, 4), 1)\n",
"((2, 0), 26)\n",
"((2, 1), 20)\n",
"((2, 2), 12)\n",
"((2, 3), 5)\n",
"((2, 4), 1)\n",
"((3, 0), 7)\n",
"((3, 1), 8)\n",
"((3, 2), 5)\n",
"((3, 3), 2)\n",
"((4, 0), 1)\n",
"((4, 1), 2)\n",
"((4, 2), 1)\n"
]
}
],
"source": [
"from collections import Counter\n",
"counts = Counter()\n",
"for sfsbin in product(*[range(y) for y in [x for x in nsamps]]):\n",
" ## Ignore monomorphic snps\n",
" ## nsamps minus 1 here because of the off by one diff between number\n",
" ## of bins in the sfs and actual number of samples\n",
" if sfsbin == tuple(nsamps-1) or sfsbin == tuple([0] * len(nsamps)):\n",
" continue\n",
" ## ignore zero bin counts\n",
" if sfs[sfsbin] == 0:\n",
" continue\n",
" print(sfsbin, sfs[sfsbin])\n",
" counts.update({sfsbin:sfs[sfsbin]})\n",
"counts\n",
"configs = pd.DataFrame(index=range(len(counts)), columns=pops)\n",
"\n",
"#print(configs)\n"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[0, 0, 1],\n",
" [1, 0, 1],\n",
" [2, 0, 1],\n",
" [3, 0, 1],\n",
" [4, 0, 1],\n",
" [5, 0, 1],\n",
" [6, 0, 1],\n",
" [7, 0, 1],\n",
" [8, 0, 1],\n",
" [9, 0, 1],\n",
" [10, 0, 1],\n",
" [11, 0, 1],\n",
" [12, 0, 1],\n",
" [13, 0, 1],\n",
" [14, 0, 1],\n",
" [15, 0, 1],\n",
" [16, 0, 1],\n",
" [17, 0, 1],\n",
" [18, 0, 1],\n",
" [19, 0, 1],\n",
" [20, 0, 1],\n",
" [21, 0, 1],\n",
" [22, 0, 1],\n",
" [23, 0, 1],\n",
" [24, 0, 1],\n",
" [25, 0, 1],\n",
" [26, 0, 1],\n",
" [27, 0, 1],\n",
" [28, 0, 1],\n",
" [29, 0, 1],\n",
" [30, 0, 1],\n",
" [31, 0, 1],\n",
" [32, 0, 1],\n",
" [33, 0, 1],\n",
" [34, 0, 1],\n",
" [35, 0, 1],\n",
" [36, 0, 1],\n",
" [37, 0, 1],\n",
" [38, 0, 1],\n",
" [39, 0, 1],\n",
" [40, 0, 1],\n",
" [41, 0, 1],\n",
" [42, 0, 1],\n",
" [43, 0, 1],\n",
" [44, 0, 1],\n",
" [45, 0, 1],\n",
" [46, 0, 1],\n",
" [47, 0, 1],\n",
" [48, 0, 1],\n",
" [49, 0, 1],\n",
" [50, 0, 1],\n",
" [51, 0, 1],\n",
" [52, 0, 1],\n",
" [53, 0, 1],\n",
" [54, 0, 1],\n",
" [55, 0, 1],\n",
" [56, 0, 1],\n",
" [57, 0, 1],\n",
" [58, 0, 1],\n",
" [59, 0, 1],\n",
" [60, 0, 1],\n",
" [61, 0, 1],\n",
" [62, 0, 1],\n",
" [63, 0, 1],\n",
" [64, 0, 1],\n",
" [65, 0, 1],\n",
" [66, 0, 1],\n",
" [67, 0, 1],\n",
" [68, 0, 1],\n",
" [69, 0, 1],\n",
" [70, 0, 1],\n",
" [71, 0, 1],\n",
" [72, 0, 1],\n",
" [73, 0, 1],\n",
" [74, 0, 1],\n",
" [75, 0, 1],\n",
" [76, 0, 1],\n",
" [77, 0, 1],\n",
" [78, 0, 1],\n",
" [79, 0, 1],\n",
" [80, 0, 1],\n",
" [81, 0, 1],\n",
" [82, 0, 1],\n",
" [83, 0, 1],\n",
" [84, 0, 1],\n",
" [85, 0, 1],\n",
" [86, 0, 1],\n",
" [87, 0, 1],\n",
" [88, 0, 1],\n",
" [89, 0, 1],\n",
" [90, 0, 1],\n",
" [91, 0, 1],\n",
" [92, 0, 1],\n",
" [93, 0, 1],\n",
" [94, 0, 1],\n",
" [95, 0, 1],\n",
" [96, 0, 1],\n",
" [97, 0, 1],\n",
" [98, 0, 1],\n",
" [99, 0, 1],\n",
" [100, 0, 1],\n",
" [101, 0, 1],\n",
" [102, 0, 1],\n",
" [103, 0, 1],\n",
" [104, 0, 1],\n",
" [105, 0, 1],\n",
" [106, 0, 1],\n",
" [107, 0, 1],\n",
" [108, 0, 1],\n",
" [109, 0, 1],\n",
" [110, 0, 1],\n",
" [111, 0, 1],\n",
" [112, 0, 1],\n",
" [113, 0, 1],\n",
" [114, 0, 1],\n",
" [115, 1, 1],\n",
" [116, 1, 1],\n",
" [117, 1, 1],\n",
" [118, 1, 1],\n",
" [119, 1, 1],\n",
" [120, 1, 1],\n",
" [121, 1, 1],\n",
" [122, 1, 1],\n",
" [123, 1, 1],\n",
" [124, 1, 1],\n",
" [125, 1, 1],\n",
" [126, 1, 1],\n",
" [127, 1, 1],\n",
" [128, 1, 1],\n",
" [129, 1, 1],\n",
" [130, 1, 1],\n",
" [131, 1, 1],\n",
" [132, 1, 1],\n",
" [133, 1, 1],\n",
" [134, 2, 1],\n",
" [135, 2, 1],\n",
" [136, 2, 1],\n",
" [137, 2, 1],\n",
" [138, 2, 1],\n",
" [139, 3, 1],\n",
" [140, 3, 1],\n",
" [141, 3, 1],\n",
" [142, 3, 1],\n",
" [143, 3, 1],\n",
" [144, 3, 1],\n",
" [145, 4, 1],\n",
" [146, 4, 1],\n",
" [147, 4, 1],\n",
" [148, 4, 1],\n",
" [149, 4, 1],\n",
" [150, 4, 1],\n",
" [151, 4, 1],\n",
" [152, 4, 1],\n",
" [153, 5, 1],\n",
" [154, 5, 1],\n",
" [155, 6, 1],\n",
" [156, 6, 1],\n",
" [157, 6, 1],\n",
" [158, 6, 1],\n",
" [159, 6, 1],\n",
" [160, 6, 1],\n",
" [161, 6, 1],\n",
" [162, 7, 1],\n",
" [163, 8, 1],\n",
" [164, 8, 1],\n",
" [165, 8, 1],\n",
" [166, 8, 1],\n",
" [167, 8, 1],\n",
" [168, 8, 1],\n",
" [169, 8, 1],\n",
" [170, 8, 1],\n",
" [171, 8, 1],\n",
" [172, 8, 1],\n",
" [173, 8, 1],\n",
" [174, 8, 1],\n",
" [175, 9, 1],\n",
" [176, 10, 1],\n",
" [177, 10, 1],\n",
" [178, 10, 1],\n",
" [179, 10, 1],\n",
" [180, 10, 1],\n",
" [181, 10, 1],\n",
" [182, 10, 1],\n",
" [183, 10, 1],\n",
" [184, 10, 1],\n",
" [185, 10, 1],\n",
" [186, 10, 1],\n",
" [187, 10, 1],\n",
" [188, 10, 1],\n",
" [189, 10, 1],\n",
" [190, 10, 1],\n",
" [191, 10, 1],\n",
" [192, 10, 1],\n",
" [193, 10, 1],\n",
" [194, 10, 1],\n",
" [195, 10, 1],\n",
" [196, 10, 1],\n",
" [197, 10, 1],\n",
" [198, 10, 1],\n",
" [199, 10, 1],\n",
" [200, 10, 1],\n",
" [201, 11, 1],\n",
" [202, 11, 1],\n",
" [203, 11, 1],\n",
" [204, 11, 1],\n",
" [205, 11, 1],\n",
" [206, 11, 1],\n",
" [207, 11, 1],\n",
" [208, 11, 1],\n",
" [209, 11, 1],\n",
" [210, 11, 1],\n",
" [211, 11, 1],\n",
" [212, 11, 1],\n",
" [213, 11, 1],\n",
" [214, 11, 1],\n",
" [215, 11, 1],\n",
" [216, 11, 1],\n",
" [217, 11, 1],\n",
" [218, 11, 1],\n",
" [219, 11, 1],\n",
" [220, 11, 1],\n",
" [221, 11, 1],\n",
" [222, 11, 1],\n",
" [223, 11, 1],\n",
" [224, 11, 1],\n",
" [225, 11, 1],\n",
" [226, 11, 1],\n",
" [227, 12, 1],\n",
" [228, 12, 1],\n",
" [229, 12, 1],\n",
" [230, 12, 1],\n",
" [231, 12, 1],\n",
" [232, 13, 1],\n",
" [233, 13, 1],\n",
" [234, 13, 1],\n",
" [235, 13, 1],\n",
" [236, 13, 1],\n",
" [237, 13, 1],\n",
" [238, 13, 1],\n",
" [239, 13, 1],\n",
" [240, 13, 1],\n",
" [241, 13, 1],\n",
" [242, 13, 1],\n",
" [243, 13, 1],\n",
" [244, 13, 1],\n",
" [245, 13, 1],\n",
" [246, 13, 1],\n",
" [247, 13, 1],\n",
" [248, 13, 1],\n",
" [249, 13, 1],\n",
" [250, 13, 1],\n",
" [251, 13, 1],\n",
" [252, 14, 1],\n",
" [253, 15, 1],\n",
" [254, 15, 1],\n",
" [255, 15, 1],\n",
" [256, 15, 1],\n",
" [257, 15, 1],\n",
" [258, 15, 1],\n",
" [259, 15, 1],\n",
" [260, 15, 1],\n",
" [261, 15, 1],\n",
" [262, 15, 1],\n",
" [263, 15, 1],\n",
" [264, 15, 1],\n",
" [265, 15, 1],\n",
" [266, 15, 1],\n",
" [267, 15, 1],\n",
" [268, 15, 1],\n",
" [269, 15, 1],\n",
" [270, 15, 1],\n",
" [271, 15, 1],\n",
" [272, 15, 1],\n",
" [273, 15, 1],\n",
" [274, 15, 1],\n",
" [275, 15, 1],\n",
" [276, 15, 1],\n",
" [277, 15, 1],\n",
" [278, 15, 1],\n",
" [279, 15, 1],\n",
" [280, 15, 1],\n",
" [281, 15, 1],\n",
" [282, 15, 1],\n",
" [283, 15, 1],\n",
" [284, 15, 1],\n",
" [285, 15, 1],\n",
" [286, 15, 1],\n",
" [287, 15, 1],\n",
" [288, 15, 1],\n",
" [289, 15, 1],\n",
" [290, 15, 1],\n",
" [291, 15, 1],\n",
" [292, 15, 1],\n",
" [293, 15, 1],\n",
" [294, 15, 1],\n",
" [295, 15, 1],\n",
" [296, 15, 1],\n",
" [297, 15, 1],\n",
" [298, 15, 1],\n",
" [299, 15, 1],\n",
" [300, 15, 1],\n",
" [301, 15, 1],\n",
" [302, 15, 1],\n",
" [303, 15, 1],\n",
" [304, 15, 1],\n",
" [305, 15, 1],\n",
" [306, 15, 1],\n",
" [307, 15, 1],\n",
" [308, 15, 1],\n",
" [309, 15, 1],\n",
" [310, 15, 1],\n",
" [311, 15, 1],\n",
" [312, 15, 1],\n",
" [313, 15, 1],\n",
" [314, 15, 1],\n",
" [315, 15, 1],\n",
" [316, 15, 1],\n",
" [317, 15, 1],\n",
" [318, 15, 1],\n",
" [319, 15, 1],\n",
" [320, 15, 1],\n",
" [321, 15, 1],\n",
" [322, 15, 1],\n",
" [323, 15, 1],\n",
" [324, 15, 1],\n",
" [325, 15, 1],\n",
" [326, 15, 1],\n",
" [327, 15, 1],\n",
" [328, 15, 1],\n",
" [329, 15, 1],\n",
" [330, 15, 1],\n",
" [331, 15, 1],\n",
" [332, 15, 1],\n",
" [333, 15, 1],\n",
" [334, 15, 1],\n",
" [335, 15, 1],\n",
" [336, 15, 1],\n",
" [337, 15, 1],\n",
" [338, 15, 1],\n",
" [339, 15, 1],\n",
" [340, 15, 1],\n",
" [341, 15, 1],\n",
" [342, 15, 1],\n",
" [343, 15, 1],\n",
" [344, 15, 1],\n",
" [345, 15, 1],\n",
" [346, 15, 1],\n",
" [347, 15, 1],\n",
" [348, 15, 1],\n",
" [349, 15, 1],\n",
" [350, 15, 1],\n",
" [351, 15, 1],\n",
" [352, 15, 1],\n",
" [353, 15, 1],\n",
" [354, 15, 1],\n",
" [355, 15, 1],\n",
" [356, 15, 1],\n",
" [357, 15, 1],\n",
" [358, 15, 1],\n",
" [359, 15, 1],\n",
" [360, 15, 1],\n",
" [361, 16, 1],\n",
" [362, 17, 1],\n",
" [363, 17, 1],\n",
" [364, 17, 1],\n",
" [365, 17, 1],\n",
" [366, 17, 1],\n",
" [367, 17, 1],\n",
" [368, 18, 1],\n",
" [369, 18, 1],\n",
" [370, 19, 1],\n",
" [371, 19, 1],\n",
" [372, 19, 1],\n",
" [373, 19, 1],\n",
" [374, 19, 1],\n",
" [375, 19, 1],\n",
" [376, 19, 1],\n",
" [377, 19, 1],\n",
" [378, 19, 1],\n",
" [379, 19, 1],\n",
" [380, 19, 1],\n",
" [381, 19, 1],\n",
" [382, 19, 1],\n",
" [383, 19, 1],\n",
" [384, 19, 1],\n",
" [385, 19, 1],\n",
" [386, 19, 1],\n",
" [387, 19, 1],\n",
" [388, 19, 1],\n",
" [389, 19, 1],\n",
" [390, 19, 1],\n",
" [391, 19, 1],\n",
" [392, 19, 1],\n",
" [393, 19, 1],\n",
" [394, 19, 1],\n",
" [395, 19, 1],\n",
" [396, 19, 1],\n",
" [397, 19, 1],\n",
" [398, 19, 1],\n",
" [399, 19, 1],\n",
" [400, 19, 1],\n",
" [401, 19, 1],\n",
" [402, 19, 1],\n",
" [403, 19, 1],\n",
" [404, 19, 1],\n",
" [405, 19, 1],\n",
" [406, 19, 1],\n",
" [407, 19, 1],\n",
" [408, 19, 1],\n",
" [409, 19, 1],\n",
" [410, 19, 1],\n",
" [411, 20, 1]]"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"locus_count = 0\n",
"locus_info = []\n",
"for i, c in enumerate(counts):\n",
" ## (n-1) here because nbins in dadi sfs is n+1\n",
" configs.iloc[i] = [[(n-1)-x, x] for x,n in zip(c, nsamps)]\n",
" locus_info.extend([x, i, 1] for x in range(locus_count, locus_count+counts[c]))\n",
" locus_count += counts[c]\n",
"configs\n",
"locus_info"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"momi_sfs = {\"sampled_pops\":pops,\n",
" \"folded\":folded,\n",
" \"length\":length,\n",
" \"configs\":configs.values.tolist(),\n",
" \"(locus,config_id,count)\":locus_info}\n"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"with open(\"/tmp/watdo.sfs\", 'w') as outfile:\n",
" outfile.write(\"{}\".format(momi_sfs))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Crap below here"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/isaac/easySFS/jupyter-notebooks\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
]
}
],
"source": [
"!pwd\n",
"!../easySFS.py -i 25loci.vcf -p simout/simpops.txt --proj 8,8 -o 25_folded -f\n",
"!../easySFS.py -i 25loci.vcf -p simout/simpops.txt --proj 8,8 -o 25_unfolded --unfolded -f\n",
"dadi_to_momi(\"./25_folded/dadi/pop1-pop2.sfs\")\n",
"dadi_to_momi(\"./25_unfolded/dadi/pop1-pop2.sfs\")\n"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/isaac/easySFS/jupyter-notebooks\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
]
}
],
"source": [
"!pwd\n",
"!../easySFS.py -i 100loci.vcf -p simout/simpops.txt --proj 8,8 -o 100_folded -f\n",
"!../easySFS.py -i 100loci.vcf -p simout/simpops.txt --proj 8,8 -o 100_unfolded --unfolded -f\n",
"dadi_to_momi(\"./100_folded/dadi/pop1-pop2.sfs\")\n",
"dadi_to_momi(\"./100_unfolded/dadi/pop1-pop2.sfs\")\n"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/isaac/easySFS/jupyter-notebooks\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"Processing 2 populations - ['pop1', 'pop2']\n",
" Sampling one snp per locus\n",
"Doing 1D sfs - pop1\n",
"Doing 1D sfs - pop2\n",
"Doing 2D sfs - ('pop1', 'pop2')\n",
"Doing multiSFS for all pops\n",
"momi sfs conversion requires unfolded sfs. Folded sfs will 'work' but with unknown results.\n"
]
}
],
"source": [
"!pwd\n",
"!../easySFS.py -i 1000loci.vcf -p simout/simpops.txt --proj 8,8 -o 1000_folded -f\n",
"!../easySFS.py -i 1000loci.vcf -p simout/simpops.txt --proj 8,8 -o 1000_unfolded --unfolded -f\n",
"dadi_to_momi(\"./1000_folded/dadi/pop1-pop2.sfs\")\n",
"dadi_to_momi(\"./1000_unfolded/dadi/pop1-pop2.sfs\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}