Raw File
simulate_genome.cpp
// ==========================================================================
//                         Mason - A Read Simulator
// ==========================================================================
// Copyright (c) 2006-2024, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in the
//       documentation and/or other materials provided with the distribution.
//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
//       its contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
// ==========================================================================

#include "simulate_genome.h"
#include <random>

// ----------------------------------------------------------------------------
// Function simulateGenome()
// ----------------------------------------------------------------------------

// Simulate a genome given the simulation options.
//
// The resulting sequence is written to stream.

int simulateGenome(seqan2::SeqFileOut & stream, MasonSimulateGenomeOptions const & options)
{
    // Initialize std generator and distribution
    std::mt19937 generator(options.seed);
    std::uniform_real_distribution<double> distribution(0, 1);
    auto randomNumber = std::bind ( distribution, generator );

    seqan2::CharString id;
    seqan2::Dna5String contig;

    for (unsigned i = 0; i < length(options.contigLengths); ++i)
    {
        clear(id);
        clear(contig);

        std::stringstream ss;
        ss << (i + 1);
        id = ss.str();

        std::cerr << "contig " << id << " ...";

        for (int64_t j = 0; j < options.contigLengths[i];)
        {
            double x = randomNumber();
            if (x < 0.25)
                appendValue(contig, 'A');
            else if (x < 0.5)
                appendValue(contig, 'C');
            else if (x < 0.75)
                appendValue(contig, 'G');
            else if (x < 1.0)
                appendValue(contig, 'T');
            else
                continue;  // Redraw.
            ++j;
        }

        try
        {
            writeRecord(stream, id, contig);
        }
        catch (seqan2::IOError const & ioErr)
        {
            std::cerr << "\nERROR: Could not write contig " << id << " to output file.\n";
            return 1;
        }

        std::cerr << " DONE\n";
    }

    return 0;
}

// ----------------------------------------------------------------------------
// Function simulateGenome()
// ----------------------------------------------------------------------------

int simulateGenome(char const * filename, MasonSimulateGenomeOptions const & options)
{
    seqan2::SeqFileOut stream;
    if (!open(stream, filename))
    {
        std::cerr << "ERROR: Could not open " << filename << "for writing!\n";
        return 1;
    }

    return simulateGenome(stream, options);
}
back to top