https://github.com/tskit-dev/msprime
Tip revision: a3da1b02836537cf35564c914938c75d8ed1acec authored by Jerome Kelleher on 20 July 2016, 15:58:14 UTC
Changelog for 0.3.2.
Changelog for 0.3.2.
Tip revision: a3da1b0
large-example.py
"""
Example in which we reproduce the simulations in the GQT paper,
"Efficient compression and analysis of large genetic variation datasets"
by Layer et al.
"""
from __future__ import print_function
from __future__ import division
import time
import msprime
def main():
before = time.clock()
# Run the actual simulations
tree_sequence = msprime.simulate(
sample_size=10**5,
length=100 * 10**6,
Ne=1e4,
recombination_rate=1e-8,
mutation_rate=1e-8,
random_seed=1 # Arbitrary - make this reproducible.
)
duration = time.clock() - before
print("Simulated 100k genomes in {0:.3f} seconds.".format(duration))
# Write the results to file, which is small and can be quickly reloaded
# to avoid the cost of re-running the simulation. We can reload the
# file in a few seconds using msprime.load(filename).
tree_sequence.dump("tmp__NOBACKUP__/large-example.hdf5")
if __name__ == "__main__":
main()