https://github.com/tskit-dev/msprime
Raw File
Tip revision: a3da1b02836537cf35564c914938c75d8ed1acec authored by Jerome Kelleher on 20 July 2016, 15:58:14 UTC
Changelog for 0.3.2.
Tip revision: a3da1b0
large-example.py
"""
Example in which we reproduce the simulations in the GQT paper,
"Efficient compression and analysis of large genetic variation datasets"
by Layer et al.
"""
from __future__ import print_function
from __future__ import division

import time

import msprime


def main():
    before = time.clock()
    # Run the actual simulations
    tree_sequence = msprime.simulate(
        sample_size=10**5,
        length=100 * 10**6,
        Ne=1e4,
        recombination_rate=1e-8,
        mutation_rate=1e-8,
        random_seed=1  # Arbitrary - make this reproducible.
    )
    duration = time.clock() - before
    print("Simulated 100k genomes in {0:.3f} seconds.".format(duration))

    # Write the results to file, which is small and can be quickly reloaded
    # to avoid the cost of re-running the simulation. We can reload the
    # file in a few seconds using msprime.load(filename).
    tree_sequence.dump("tmp__NOBACKUP__/large-example.hdf5")


if __name__ == "__main__":
    main()
back to top