https://github.com/AlexanderDilthey/MHC-PRG
Raw File
Tip revision: e59943adb8855532573a6c276651efad1e18a6b1 authored by Alexander Dilthey on 18 December 2018, 10:20:48 UTC
Update HLA-PRG.md
Tip revision: e59943a
MHC-PRG.cpp
//============================================================================
// Name        : MHC-PRG.cpp
// Author      : 
// Version     :
// Copyright   : Your copyright notice
// Description : Hello World in C++, Ansi-style
//============================================================================

#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <algorithm>
#include <vector>
#include <cmath>
#include <assert.h>
#include <omp.h>
#include <iomanip>
#include <boost/algorithm/string.hpp>

#include "MHC-PRG.h"
#include "LocusCodeAllocation.h"
#include "Data/HaplotypePanel.h"
#include "Data/GenotypePanel.h"
#include "Graph/Graph.h"
#include "Graph/Node.h"
#include "Graph/HMM.h"
#include "Graph/MultiHMM.h"
#include "Graph/AlphaHMM.h"
#include "NextGen/NextGen.h"
#include "NextGen/simulationSuite.h"

#include "NextGen/Validation.h"
#include "NextGen/HLAtypes.h"

#include "GraphAligner/GraphAligner.h"
#include "GraphAligner/AlignerTests.h"
#include "GraphAlignerUnique/UniqueAlignerTests.h"

#include "readFilter/readFilter.h"
#include "readFilter/filterLongOverlappingReads.h"

#include "Utilities.h"

using namespace std;

void testing();
Config CONFIG;
double epsilon = 1.0e-7;

struct NodePair {
	Node* n1;
	Node* n2;
};


int main(int argc, char *argv[])
{
	CONFIG.threads = 40;
	CONFIG.quiet = true;
	
	vector<string> arguments (argv + 1, argv + argc + !argc);
	if((arguments.size() > 0) && (arguments.at(0) != "domode"))
		errEx("Please go into domode!");
	
	if((arguments.size() > 0) && (arguments.at(1) == "loadGraph"))
	{

	}
	else if((arguments.size() > 0) && (arguments.at(1) == "simulateGraph"))
	{
		string graph_file = arguments.at(2);
		Graph* g = new Graph();
		g->readFromFile(graph_file);
		g->simulateHaplotypes(500000);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "gappifyGraph"))
	{
		string graph_file = arguments.at(2);
		Graph* g = new Graph();
		g->readFromFile(graph_file);
		g->makeEdgesGaps(0.1);
		g->writeToFile(graph_file+".gaps");
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "determineRequiredKMers"))
	{
		omp_set_num_threads(CONFIG.threads);

		string graph_file = arguments.at(2);
		string output_file = arguments.at(3);

		LargeGraph* kMerG = new LargeGraph();
		kMerG->readFromFile(graph_file);

		vector<string> requiredKMers = kMerG->requiredKMers();

		ofstream output;
		output.open (output_file.c_str(), ios::out | ios::trunc);
		if (output.is_open())
		{
			for(int i = 0; i < (int)requiredKMers.size(); i++)
			{
				output << requiredKMers.at(i);
				if(i != (requiredKMers.size()-1))
				{
					output << "\n";
				}
			}
		}
		else
		{
			errEx("Cannot open for writing: "+output_file);
		}
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "describekMerGraph"))
	{
		string graph_file = arguments.at(2);
		string temp_dir = arguments.at(3);
		string temp_label = arguments.at(4);

		bool output_kMer_levels = false;
		std::string referenceGenomeCortexGraph;

		int k = 31;
		
		for(unsigned int i = 5; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--output_kMer_levels")
			{
				output_kMer_levels = true;
			}
			if(arguments.at(i) == "--referenceGenomeCortexGraph")
			{
				referenceGenomeCortexGraph = arguments.at(i+1);
			}
			if(arguments.at(i) == "--k")
			{
				k = Utilities::StrtoI(arguments.at(i+1));
			}			
		}

		if(referenceGenomeCortexGraph.length())
		{
			if(! output_kMer_levels)
			{
				throw std::runtime_error("If you specify --referenceGenomeCortexGraph, please activate also --output_kMer_levels");
			}
		}

		if(k == 25)
		{
			describeGraph_25(graph_file, temp_dir, temp_label, output_kMer_levels, referenceGenomeCortexGraph);
		}
		else
		{
			assert(k == 31);
			describeGraph(graph_file, temp_dir, temp_label, output_kMer_levels, referenceGenomeCortexGraph);
		}
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "plotGraph"))
	{
		string graph_file = arguments.at(2);
		string start_level_str = arguments.at(3);
		string stop_level_str = arguments.at(4);
		string graphviz_output_file = arguments.at(5);

		int start_level = Utilities::StrtoI(start_level_str);
		int stop_level = Utilities::StrtoI(stop_level_str);
		assert(stop_level > start_level);

		Graph* g = new Graph();
		g->readFromFile(graph_file);
		g->graphViz(start_level, stop_level, graphviz_output_file);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "plotGraph2"))
	{
		string graph_file = arguments.at(2);
		string locus_string  = arguments.at(3);
		string graphviz_output_file = arguments.at(4);


		Graph* g = new Graph();
		g->readFromFile(graph_file);
		g->graphViz2(locus_string, graphviz_output_file);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "describeNucleotideGraph"))
	{
		string graph_file = arguments.at(2);
		string temp_dir = arguments.at(3);
		string temp_label = arguments.at(4);

		describeNucleotideGraph(graph_file, temp_dir, temp_label);
	}	
	else if((arguments.size() > 0) && (arguments.at(1) == "simulationSuite"))
	{
		int threads = 40;
		int genotypingMode = -1;
		bool error = true;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--threads")
			{
				threads = Utilities::StrtoI(arguments.at(i+1));
			}
			if(arguments.at(i) == "--genotypingMode")
			{
				genotypingMode = Utilities::StrtoI(arguments.at(i+1));
			}
			if(arguments.at(i) == "--error")
			{
				error = (bool)Utilities::StrtoI(arguments.at(i+1));
			}
		}

		omp_set_num_threads(threads);

		CONFIG.threads=threads;

		string graph_file = arguments.at(2);
		string temp_dir = arguments.at(3);
		string temp_label = arguments.at(4);

		std::string filename_qualityMatrix = "GraphAlignerUnique/predefinedQualityMatrices/I101_NA12878.txt";

		simulationSuite(graph_file, temp_dir, temp_label, genotypingMode, error, filename_qualityMatrix);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "testGraphAligner"))
	{
		//std::cout << "domode testGraphAligner!\n" << std::flush;
		//testGraphAligner();
		//testDiagonalInverseExtendAlignment();
		//testChainFindingAndExtension();
		//testExtensionStop();
		//testSeedAndExtend_Algorithm();
		//test_Simple_longRange_SeedAndExtend();
		//test_Simple_longRange_SeedAndExtend_2();
		//test_Simple_longRange_SeedAndExtend_3();
		//test_Simple_longRange_SeedAndExtend_4();
		// testSeedAndExtend();
		//GraphAlignerUnique::tests::testChains();
		//GraphAlignerUnique::tests::testSeedAndExtend();
		//GraphAlignerUnique::tests::testSeedAndExtend_local();
		GraphAlignerUnique::tests::testSeedAndExtend_short();
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "testGraphAligner_realGraph"))
	{
		string graph_file = arguments.at(2);

		std::string filename_qualityMatrix = "GraphAlignerUnique/predefinedQualityMatrices/I101_NA12878.txt";

		double insertSize_mean = 200;
		double insertSize_sd = 15;
		int read_length = 101;
		bool longBadReads = false;
		bool greedyLocalExtension = false;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--filename_qualityMatrix")
			{
				filename_qualityMatrix = arguments.at(i+1);
			}

			if(arguments.at(i) == "--insertSize_mean")
			{
				insertSize_mean = Utilities::StrtoD(arguments.at(i+1));
			}
			if(arguments.at(i) == "--insertSize_sd")
			{
				insertSize_sd = Utilities::StrtoD(arguments.at(i+1));
			}

			if(arguments.at(i) == "--readLength")
			{
				read_length = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--longBadReads")
			{
				longBadReads = true;
				assert(read_length == 250);
			}


			if(arguments.at(i) == "--greedyLocalExtension")
			{
				greedyLocalExtension = true;
			}
		}

		assert(insertSize_mean > 0);
		assert(insertSize_mean < 1000);
		assert(insertSize_sd >= 0);
		assert(insertSize_sd <= 20);

		GraphAlignerUnique::tests::testSeedAndExtend_local_realGraph(graph_file, read_length, insertSize_mean, insertSize_sd, filename_qualityMatrix, longBadReads, greedyLocalExtension);
	}

	else if((arguments.size() > 0) && (arguments.at(1) == "nextGenValidationTest"))
	{
		string viterbi_diploid_gS = "../tmp/kMerCount__GS_nextGen_varigraph_new_AA02O9Q_Z1_31_required.binaryCount.viterbiGenomeString";
		string deBruijnGraph = "Test/test_refgenome_31.ctx";
		string VCFfile = "Test/test_variants.vcf";
		string referenceGenomeFile = "Test/test_refgenome.fa";

		testValidation(viterbi_diploid_gS, deBruijnGraph, 31, VCFfile, referenceGenomeFile);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "nextGenValidation"))
	{
		bool labelOnly = false;
		omp_set_num_threads(40);
		CONFIG.threads=40;

		int cortex_height = 26;
		int cortex_width = 140;
		int kMer_size = 31;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--cortex_height")
			{
				cortex_height = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--cortex_width")
			{
				cortex_width = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		string graph_file = arguments.at(2);
		string sampleCount = arguments.at(3);
		int chromotypes_start = Utilities::StrtoI(arguments.at(4));
		int chromotypes_stop = Utilities::StrtoI(arguments.at(5));
		string vcfFile = arguments.at(6);
		int vcf_start = Utilities::StrtoI(arguments.at(7));
		int vcf_stop = Utilities::StrtoI(arguments.at(8));
		string referenceGenome = arguments.at(9);
		string deBruijnGraph = arguments.at(10);
		string outputDirectory = arguments.at(11);
		string contigsFile_fasta = arguments.at(12);
		std::string graphDir = arguments.at(13);

		std::cout << "Amended haplotypes versus VCF...\n==============================================================\n" << std::flush;
		
		string output_file_amendedHaplotypes = sampleCount+".amendedHaplotypes";
		string output_file_viterbi_genomeString = sampleCount+".viterbiGenomeString";

		validateAllChromotypesVsVCF(output_file_viterbi_genomeString, output_file_amendedHaplotypes, chromotypes_start, chromotypes_stop, vcfFile, vcf_start, vcf_stop, referenceGenome, deBruijnGraph, kMer_size, cortex_height, cortex_width, outputDirectory, graphDir);

		//		validateAmendedChromotypesVsVCF(output_file_amendedHaplotypes, chromotypes_start, chromotypes_stop, vcfFile, vcf_start, vcf_stop, referenceGenome, deBruijnGraph, kMer_size, cortex_height, cortex_width);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "validateCompleteVCF"))
	{
		bool labelOnly = false;
		omp_set_num_threads(40);
		CONFIG.threads=40;

		int cortex_height = 26;
		int cortex_width = 140;
		int kMer_size = 31;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--cortex_height")
			{
				cortex_height = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--cortex_width")
			{
				cortex_width = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		string vcfFile = arguments.at(2);
		string referenceGenome = arguments.at(3);
		string deBruijnGraph = arguments.at(4);
		string outputDirectory = arguments.at(5);

		validateCompleteVCF(vcfFile, referenceGenome, deBruijnGraph, kMer_size, cortex_height, cortex_width, outputDirectory);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "alignShortReadsToHLAGraph"))
	{
		bool labelOnly = false;
		// omp_set_num_threads(40);
		CONFIG.threads=40;


		std::string input_FASTQ;
		std::string graph_dir;
		std::string referenceGenome;
		std::string IS_mean;
		std::string IS_sd;
		
		bool debug = false;
		bool MiSeq250bp = false;
		
		for(unsigned int i = 0; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--input_FASTQ")
			{
				input_FASTQ = arguments.at(i+1);
			}

			if(arguments.at(i) == "--graphDir")
			{
				graph_dir = arguments.at(i+1);
			}

			if(arguments.at(i) == "--referenceGenome")
			{
				referenceGenome = arguments.at(i+1);
			}

			if(arguments.at(i) == "--debug")
			{
				debug = true;
			}
			
			if(arguments.at(i) == "--MiSeq250bp")
			{
				MiSeq250bp = true;
			}

			if(arguments.at(i) == "--IS_mean")
			{
				IS_mean = arguments.at(i+1);
			}

			if(arguments.at(i) == "--IS_sd")
			{
				IS_sd = arguments.at(i+1);
			}
		}

		assert(input_FASTQ.length());
		assert(graph_dir.length());
		assert(referenceGenome.length());

		std::vector<std::pair<double, double>> inserSize_mean_sd_perFile;
		
		if(debug)
		{
			assert(IS_mean.length());
		}
		
		if(IS_mean.length())
		{
			assert(IS_sd.length());
			
			double IS_mean_d = Utilities::StrtoD(IS_mean);
			double IS_sd_d = Utilities::StrtoD(IS_sd);
			
			std::vector<std::string> files = Utilities::split(input_FASTQ, ",");
			
			std::cout << "Use externally provided IS mean / sd estimates " << IS_mean_d << " / " << IS_sd_d << "\n";
			
			for(unsigned int i = 0; i < files.size(); i++)
			{
				inserSize_mean_sd_perFile.push_back(make_pair(IS_mean_d, IS_sd_d));
			} 
		}
		else
		{
			estimateInsertSizeFromGraph(input_FASTQ, graph_dir, inserSize_mean_sd_perFile, true);
		}
		
		alignShortReadsToHLAGraph_multipleAlignments(input_FASTQ, graph_dir, referenceGenome, inserSize_mean_sd_perFile, debug, MiSeq250bp);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "alignLongUnpairedReadsToHLAGraph"))
	{
		bool labelOnly = false;
		// omp_set_num_threads(40);
		CONFIG.threads=40;


		std::string input_FASTQ;
		std::string graph_dir;
		std::string referenceGenome;

		for(unsigned int i = 0; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--input_FASTQ")
			{
				input_FASTQ = arguments.at(i+1);
			}

			if(arguments.at(i) == "--graphDir")
			{
				graph_dir = arguments.at(i+1);
			}

			if(arguments.at(i) == "--referenceGenome")
			{
				referenceGenome = arguments.at(i+1);
			}
		}

		assert(input_FASTQ.length());
		assert(graph_dir.length());
		assert(referenceGenome.length());

		alignLongUnpairedReadsToHLAGraph(input_FASTQ, graph_dir, referenceGenome);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "simulateHLAreads"))
	{
		std::string graph_dir;
		std::string filename_qualityMatrix = "GraphAlignerUnique/predefinedQualityMatrices/I101_NA12878.txt";
		std::string outputDirectory;

		int nIndividuals = 10;
		double insertSize_mean = 200;
		double insertSize_sd = 15;
		double haploidCoverage = 15;

		int read_length = 101;
		bool readError = true;
		bool perturbHaplotypes = false;
		bool exon23 = true;
		
		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--graphDir")
			{
				graph_dir = arguments.at(i+1);
			}

			if(arguments.at(i) == "--filename_qualityMatrix")
			{
				filename_qualityMatrix = arguments.at(i+1);
			}

			if(arguments.at(i) == "--outputDirectory")
			{
				outputDirectory = arguments.at(i+1);
			}

			if(arguments.at(i) == "--nIndividuals")
			{
				nIndividuals = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--insertSize_mean")
			{
				insertSize_mean = Utilities::StrtoD(arguments.at(i+1));
			}
			if(arguments.at(i) == "--insertSize_sd")
			{
				insertSize_sd = Utilities::StrtoD(arguments.at(i+1));
			}

			if(arguments.at(i) == "--haploidCoverage")
			{
				haploidCoverage = Utilities::StrtoD(arguments.at(i+1));
			}

			if(arguments.at(i) == "--readLength")
			{
				read_length = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--readError")
			{
				readError = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--perturbHaplotypes")
			{
				perturbHaplotypes = Utilities::StrtoI(arguments.at(i+1));
			}
			
			if(arguments.at(i) == "--exon23")
			{
				exon23 = Utilities::StrtoI(arguments.at(i+1));
			}			
		}

		assert(graph_dir.length());
		assert(outputDirectory.length());

		assert(insertSize_mean > 0);
		assert(insertSize_mean < 1000);
		assert(insertSize_sd >= 0);
		assert(insertSize_sd <= 20);
		assert(nIndividuals > 0);

		simulateHLAreads(graph_dir, nIndividuals, exon23, perturbHaplotypes, readError, outputDirectory, filename_qualityMatrix, read_length, insertSize_mean, insertSize_sd, haploidCoverage);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "HLATypeInference"))
	{
		bool labelOnly = false;
		CONFIG.threads=32;
		omp_set_num_threads(CONFIG.threads);
				
		std::string input_alignedReads;
		std::string graph_dir;
		std::string sampleID;

		bool longUnpairedReads = false;
		bool MiSeq250bp = false;
		
		for(unsigned int i = 0; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--input_alignedReads")
			{
				input_alignedReads = arguments.at(i+1);
			}

			if(arguments.at(i) == "--graphDir")
			{
				graph_dir = arguments.at(i+1);
			}

			if(arguments.at(i) == "--sampleID")
			{
				sampleID = arguments.at(i+1);
			}
			
			if(arguments.at(i) == "--longUnpairedReads")
			{
				longUnpairedReads = true;
			}


			if(arguments.at(i) == "--MiSeq250bp")
			{
				MiSeq250bp = true;
			}
		}

		assert(input_alignedReads.length());
		assert(graph_dir.length());

 		std::string loci_string;
		std::string starting_haplotypes_perLocus_1_str;
		std::string starting_haplotypes_perLocus_2_str;

		// HLATypeInference(input_alignedReads, graph_dir, sampleID, true, loci_string, starting_haplotypes_perLocus_1_str, starting_haplotypes_perLocus_2_str);
  
 		// loci_string = "A,B,C,DQA1,DQB1,DRB1";
		// starting_haplotypes_perLocus_1_str = "A*02:124;A*02:81,B*39:29,C*03:04:04,DQA1*01:02:01:01;DQA1*01:02:01:02;DQA1*01:02:01:03;DQA1*01:02:01:04;DQA1*01:02:02;DQA1*01:02:03;DQA1*01:02:04;DQA1*01:08;DQA1*01:09;DQA1*01:11,DQB1*02:01:01;DQB1*02:01:08;DQB1*02:02:01;DQB1*02:02:02;DQB1*02:04;DQB1*02:06;DQB1*02:09;DQB1*02:10,DRB1*01:25";
		// starting_haplotypes_perLocus_2_str = "A*33:24,B*18:21,C*14:11,DQA1*01:01:01;DQA1*01:01:02;DQA1*01:04:01:01;DQA1*01:04:01:02;DQA1*01:04:02;DQA1*01:05;DQA1*01:12,DQB1*03:01:01:01;DQB1*03:01:01:02;DQB1*03:01:01:03;DQB1*03:01:04;DQB1*03:01:05;DQB1*03:01:09;DQB1*03:01:10;DQB1*03:01:11;DQB1*03:01:12;DQB1*03:01:20;DQB1*03:09;DQB1*03:19;DQB1*03:21;DQB1*03:22;DQB1*03:24;DQB1*03:29;DQB1*03:35;DQB1*03:42;DQB1*03:49;DQB1*03:50;DQB1*03:51;DQB1*03:84N;DQB1*03:94,DRB1*13:106";    
  
 		// loci_string = "A";
		// starting_haplotypes_perLocus_1_str = "A*02:03:01;A*02:264";
		// starting_haplotypes_perLocus_2_str = "A*24:02:01:01;A*24:02:01:02L;A*24:02:03Q;A*24:02:10;A*24:09N;A*24:11N";
			
		// HLAHaplotypeInference(input_alignedReads, graph_dir, sampleID, loci_string, starting_haplotypes_perLocus_1_str, starting_haplotypes_perLocus_2_str);
		
		// todo activate
		   
		HLATypeInference(input_alignedReads, graph_dir, sampleID, false, loci_string, starting_haplotypes_perLocus_1_str, starting_haplotypes_perLocus_2_str, longUnpairedReads, MiSeq250bp);
		
		// HLAHaplotypeInference(input_alignedReads, graph_dir, sampleID, loci_string, starting_haplotypes_perLocus_1_str, starting_haplotypes_perLocus_2_str, longUnpairedReads);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "nextGenContigValidation"))
	{
		bool labelOnly = false;
		omp_set_num_threads(40);
		CONFIG.threads=40;

		int cortex_height = 26;
		int cortex_width = 95;
		int kMer_size = 31;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--cortex_height")
			{
				cortex_height = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--cortex_width")
			{
				cortex_width = Utilities::StrtoI(arguments.at(i+1));
			}

			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		string graph_file = arguments.at(2);
		string sampleCount = arguments.at(3);
		int chromotypes_start = Utilities::StrtoI(arguments.at(4));
		int chromotypes_stop = Utilities::StrtoI(arguments.at(5));
		string vcfFile = arguments.at(6);
		int vcf_start = Utilities::StrtoI(arguments.at(7));
		int vcf_stop = Utilities::StrtoI(arguments.at(8));
		string referenceGenome = arguments.at(9);
		string deBruijnGraph = arguments.at(10);
		string outputDir_contigs = arguments.at(11);
		string contigsFile_fasta = arguments.at(12);
		std::string graphDir = arguments.at(13);

		string output_file_amendedHaplotypes = sampleCount+".amendedHaplotypes";
		string output_file_viterbi_genomeString = sampleCount+".viterbiGenomeString";

		alignContigsToAllChromotypes(output_file_viterbi_genomeString, output_file_amendedHaplotypes, chromotypes_start, chromotypes_stop, vcfFile, vcf_start, vcf_stop, referenceGenome, deBruijnGraph, kMer_size, cortex_height, cortex_width, outputDir_contigs, contigsFile_fasta, graphDir);

		//		validateAmendedChromotypesVsVCF(output_file_amendedHaplotypes, chromotypes_start, chromotypes_stop, vcfFile, vcf_start, vcf_stop, referenceGenome, deBruijnGraph, kMer_size, cortex_height, cortex_width);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "nextGenInference"))
	{

		//omp_set_num_threads(CONFIG.threads);
		bool labelOnly = false;
		omp_set_num_threads(40);
		CONFIG.threads=40;

		/*
		map<int, int> edge1;
		map<int, int> edge2;
		edge1[1] = 3;

		edge2[1] = 1;
		edge2[2] = 1;
		edge2[3] = 1;

		map<int, int> thresholds;
		double P_above_all_edgePresent;
		double P_above_all_edgeNotPresent;

		computeThresholds(edge1, 0.05, 20, thresholds, P_above_all_edgePresent, P_above_all_edgeNotPresent);
		cout << "Edge1:\n";
		for(map<int, int>::iterator tIt = thresholds.begin(); tIt != thresholds.end(); tIt++)
		{
			cout << "\tThreshold for kMer " << tIt->first << ": " << tIt->second << "\n";
		}
		cout << "\tP_above_all_edgePresent: " << P_above_all_edgePresent << "\n";
		cout << "\tP_above_all_edgeNotPresent: " << P_above_all_edgeNotPresent << " => power: " << (1-P_above_all_edgeNotPresent) << "\n\n";

		computeThresholds(edge2, 0.05, 20, thresholds, P_above_all_edgePresent, P_above_all_edgeNotPresent);
		cout << "Edge2:\n";
		for(map<int, int>::iterator tIt = thresholds.begin(); tIt != thresholds.end(); tIt++)
		{
			cout << "\tThreshold for kMer " << tIt->first << ": " << tIt->second << "\n";
		}
		cout << "\tP_above_all_edgePresent: " << P_above_all_edgePresent << "\n";
		cout << "\tP_above_all_edgeNotPresent: " << P_above_all_edgeNotPresent << " => power: " << (1-P_above_all_edgeNotPresent) << "\n\n";

		errEx("OK");
		*/

		string graph_file = arguments.at(2);
		string wholeGenomeButGraphCount = arguments.at(3);
		string sampleCount = arguments.at(4);

		string test1 = "A||B";
		vector<string> test1_split = Utilities::split(test1, "||");
		assert(test1_split.size() == 2);
		assert(test1_split.at(0) == "A");
		assert(test1_split.at(1) == "B");

		vector<string> test1_split_2 = Utilities::split(test1, "*");
		assert(test1_split_2.size() == 1);


		string test2 = "A||B||C";
		vector<string> test2_split = Utilities::split(test2, "||");
		assert(test2_split.size() == 3);

		string test3 = "A||B||C||dtdtdtd|dmdmd||nd||a||dkdkd|ksks";
		vector<string> test3_split = Utilities::split(test3, "||");
		assert(test3_split.size() == 7);

		int genotypingMode = 5;
		for(unsigned int i = 5; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--labelonly")
			{
				labelOnly = (bool) Utilities::StrtoI(arguments.at(i));
			}

			if(arguments.at(i) == "--genotypingMode")
			{
				genotypingMode = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		LargeGraph* kMerG = new LargeGraph();
		kMerG->readFromFile(graph_file);
		// kMerG->stats();

		/* more complicated model

		MultiGraph* multiG = multiBeautify(kMerG, wholeGenomeButGraphCount);
		multiG->stats();

		MultiHMM* mHMM = new MultiHMM(multiG);
		map<string, long long> estimatedEmissions = mHMM->estimateEmissions(sampleCount, wholeGenomeButGraphCount);
		mHMM->fillForwardBackwardTable(estimatedEmissions);

		*/



		MultiGraph* multiG = multiBeautifyForAlpha2(kMerG, "");
		set<int> emptySet;

		map<string, long long> estimatedEmissions = AlphaHMM::estimateEmissions(sampleCount, wholeGenomeButGraphCount, multiG->kMerSize);

		MultiGraph* multiGsimple = simplifyAccordingToCoverage(multiG, estimatedEmissions, emptySet);

		// multiG->stats();

		AlphaHMM* aHMM = new AlphaHMM(multiGsimple, genotypingMode);
		aHMM->fillForwardBackwardTable(estimatedEmissions);
		// aHMM->debug(estimatedEmissions);

		string output_file = sampleCount+".inference";
		string output_file_haplotypes = sampleCount+".haplotypes";
		string output_file_viterbi_haplotypes = sampleCount+".viterbiHaplotypes";
		string output_file_viterbi_genomeString = sampleCount+".viterbiGenomeString";

		vector<multiHaploLabelPair> samples;
		vector<multiHaploLabelPair> viterbiSamples;
		
		vector<diploidNucleotidePath> samplesAlignedHaplotypes;
		vector<diploidNucleotidePath> viterbiSamplesAlignedHaplotypes;

		multiHaplotypePair_and_P viterbiSample = aHMM->retrieveViterbiSample(labelOnly);
		diploidEdgePointerPath viterbiEdgePointers = viterbiSample.diploidEdgePointers;
		viterbiSamples.push_back(multiGsimple->edgePointerPathToLabels(viterbiEdgePointers));
		diploidEdgePointerPath viterbi_sample_lG = multiGsimple->MultiGraphEdgesToLargeGraphEdges(viterbiEdgePointers);
		diploidNucleotidePath viterbiNucleotidePath = kMerG->diploidPathToAlignedNucleotides(viterbi_sample_lG);
		viterbiSamplesAlignedHaplotypes.push_back(viterbiNucleotidePath);	
		
		diploidGenomeString viterbiGenomeString = kMerG->diploidPathToGenomeString(viterbi_sample_lG);
		diploidGenomeString viterbiGenomeString_compressed = compressGenomeString(viterbiGenomeString);
		storeGenomeStringInFile(viterbiGenomeString_compressed, output_file_viterbi_genomeString);   

		cout << "Viterbi P: " << viterbiSample.P << "\n";

		for(int sI = 0; sI < 10; sI++)
		{
			diploidEdgePointerPath sample_mG = aHMM->sampleFromPosterior(estimatedEmissions);
			samples.push_back(multiGsimple->edgePointerPathToLabels(sample_mG));

			diploidEdgePointerPath sample_lG = multiGsimple->MultiGraphEdgesToLargeGraphEdges(sample_mG);
			diploidNucleotidePath nucleotidePath = kMerG->diploidPathToAlignedNucleotides(sample_lG);
			samplesAlignedHaplotypes.push_back(nucleotidePath);
		}

		set<string> loci;
		vector< map<string, string> > h1_found;
		vector< map<string, string> > h2_found;

		for(int sample = 0; sample < (int)samples.size(); sample++ )
		{
			map<string, string> h1_localfound;
			map<string, string> h2_localfound;

			for(int samplePart = 0; samplePart < (int)samples.at(sample).h1.size(); samplePart++)
			{
				string sampleString_out =  samples.at(sample).h1.at(samplePart);
				// cout << "h1: " << sampleString_out << "\n";

				vector<string> parts = Utilities::split(sampleString_out, "||");
				for(int sI = 0; sI < (int)parts.size(); sI++)
				{
					string sampleString = parts.at(sI);

					int found=sampleString.find("*");
					if (found==(int)string::npos)
						errEx("Cannot determine locus in "+sampleString);

					//string locus = "HLA"+sampleString.substr(0,found);
					//string allele_colons = sampleString.substr(found+1);
					//assert(allele_colons.length() >= 5);
					//string allele = allele_colons.substr(0,2)+allele_colons.substr(3,2);

					string locus = sampleString.substr(0,found);
					string allele = sampleString.substr(found+1);
					int found_sep_allele = allele.find("||");
					if(found_sep_allele != (int)string::npos)
					{
						cout << "Allele has || symbol -- bad!\n\nallele: " << allele << "\n" << "parts.size(): " << parts.size() << "\nOriginal samplestring_out: " << sampleString_out << "\n";
					}
					assert(found_sep_allele == (int)string::npos);

					loci.insert(locus);
					if(h1_localfound.count(locus) > 0)
					{
						errEx("Same locus "+locus+" appears more than once in output?\nsampleString_out: "+sampleString_out+"\nlocus: "+locus+"\nallele: "+allele);
					}
					h1_localfound[locus] = allele;
				}
			}
			for(int samplePart = 0; samplePart < (int)samples.at(sample).h2.size(); samplePart++)
			{
				string sampleString_out =  samples.at(sample).h2.at(samplePart);
				// cout << "h2: " << sampleString_out << "\n";

				vector<string> parts = Utilities::split(sampleString_out, "||");
				for(int sI = 0; sI < (int)parts.size(); sI++)
				{
					string sampleString = parts.at(sI);

					int found=sampleString.find("*");
					if (found==(int)string::npos)
						errEx("Cannot determine locus in "+sampleString);

					//string locus = "HLA"+sampleString.substr(0,found);
					//string allele_colons = sampleString.substr(found+1);
					//assert(allele_colons.length() >= 5);
					//string allele = allele_colons.substr(0,2)+allele_colons.substr(3,2);

					string locus = sampleString.substr(0,found);
					string allele = sampleString.substr(found+1);
					int found_sep_allele = allele.find("||");
					if(found_sep_allele != (int)string::npos)
					{
						cout << "Allele has || symbol -- bad!\n\nallele: " << allele << "\n" << "parts.size(): " << parts.size() << "\nOriginal samplestring_out: " << sampleString_out << "\n";
					}
					assert(found_sep_allele == (int)string::npos);

					loci.insert(locus);
					if(h2_localfound.count(locus) > 0)
					{
						errEx("Same locus "+locus+" appears more than once in output?\nsampleString_out: "+sampleString_out+"\nlocus: "+locus+"\nallele: "+allele);
					}
					h2_localfound[locus] = allele;
				}
			}
			h1_found.push_back(h1_localfound);
			h2_found.push_back(h2_localfound);
		}

		vector<string> loci_in_order(loci.begin(), loci.end());

		ofstream output;
		output.open (output_file.c_str(), ios::out | ios::trunc);
		if (output.is_open())
		{
			output << "IndividualID Chromosome " << Utilities::join(loci_in_order, " ") << "\n";
			for(int sample = 0; sample < (int)samples.size(); sample++ )
			{
				string individualID = sampleCount+"-x-"+Utilities::ItoStr(sample+1);
				vector<string> fields_f1;
				vector<string> fields_f2;

				fields_f1.push_back(individualID);
				fields_f2.push_back(individualID);
				fields_f1.push_back(Utilities::ItoStr(1));
				fields_f2.push_back(Utilities::ItoStr(2));

				for(int fieldI = 0; fieldI < (int)loci_in_order.size(); fieldI++)
				{
					string fieldName = loci_in_order.at(fieldI);
					if(h1_found.at(sample).count(fieldName) > 0)
					{
						fields_f1.push_back(h1_found.at(sample)[fieldName]);
					}
					else
					{
						fields_f1.push_back("?");
					}

					if(h2_found.at(sample).count(fieldName) > 0)
					{
						fields_f2.push_back(h2_found.at(sample)[fieldName]);
					}
					else
					{
						fields_f2.push_back("?");
					}
				}

				assert(fields_f1.size() == fields_f2.size());

				output << Utilities::join(fields_f1, " ") << "\n";
				output << Utilities::join(fields_f2, " ") << "\n";
			}
		}
		else
		{
			errEx("Cannot open for writing: "+output_file);
		}

		ofstream output_haplotypes;
		output_haplotypes.open (output_file_haplotypes.c_str(), ios::out | ios::trunc);
		if (output_haplotypes.is_open())
		{
			for(int sample = 0; sample < (int)samples.size(); sample++ )
			{
				string individualID = sampleCount+"-x-"+Utilities::ItoStr(sample+1);
				vector<string> fields_f1;
				vector<string> fields_f2;

				fields_f1.push_back(individualID);
				fields_f2.push_back(individualID);
				fields_f1.push_back(Utilities::ItoStr(1));
				fields_f2.push_back(Utilities::ItoStr(2));

				for(int fieldI = 0; fieldI < (int)samplesAlignedHaplotypes.at(sample).h1.size(); fieldI++)
				{
					fields_f1.push_back(samplesAlignedHaplotypes.at(sample).h1.at(fieldI));
					fields_f2.push_back(samplesAlignedHaplotypes.at(sample).h2.at(fieldI));
				}

				assert(fields_f1.size() == fields_f2.size());

				output_haplotypes << Utilities::join(fields_f1, " ") << "\n";
				output_haplotypes << Utilities::join(fields_f2, " ") << "\n";
			}
		}
		else
		{
			errEx("Cannot open for writing: "+output_file);
		}
		
		ofstream output_viterbi_haplotypes;
		output_viterbi_haplotypes.open (output_file_viterbi_haplotypes.c_str(), ios::out | ios::trunc);
		if (output_viterbi_haplotypes.is_open())
		{
			for(int sample = 0; sample < (int)viterbiSamples.size(); sample++ )
			{
				string individualID = sampleCount+"-x-"+Utilities::ItoStr(sample+1);
				vector<string> fields_f1;
				vector<string> fields_f2;

				fields_f1.push_back(individualID);
				fields_f2.push_back(individualID);
				fields_f1.push_back(Utilities::ItoStr(1));
				fields_f2.push_back(Utilities::ItoStr(2));

				for(int fieldI = 0; fieldI < (int)viterbiSamplesAlignedHaplotypes.at(sample).h1.size(); fieldI++)
				{
					fields_f1.push_back(viterbiSamplesAlignedHaplotypes.at(sample).h1.at(fieldI));
					fields_f2.push_back(viterbiSamplesAlignedHaplotypes.at(sample).h2.at(fieldI));
				}

				assert(fields_f1.size() == fields_f2.size());

				output_viterbi_haplotypes << Utilities::join(fields_f1, " ") << "\n";
				output_viterbi_haplotypes << Utilities::join(fields_f2, " ") << "\n";
			}
		}
		else
		{
			errEx("Cannot open for writing: "+output_file_viterbi_haplotypes);
		}		

	}
	else if((arguments.size() > 0) && (arguments.at(1) == "loadKMerGraphHMM"))
	{
		string graph_file = arguments.at(2);

		LargeGraph* kMerG = new LargeGraph();
		kMerG->readFromFile(graph_file);

		kMerG->stats();
		errEx("OK");

		//LargeHMM* HMM = new LargeHMM(kMerG, true);

	}
	else if((arguments.size() > 0) && (arguments.at(1) == "kMerifyAndSimulate"))
	{
		string graph_file = arguments.at(2);
		Graph* g = new Graph();
		g->readFromFile(graph_file);
		LargeGraph* kMerG = kMerify(g, true);
		kMerG->simulateHaplotypes(500000);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "kMerify"))
	{
		string graph_file = arguments.at(2);

		int kMer_size = 55;
		bool protectPGF = true;
		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
			
			if(arguments.at(i) == "--noPGFprotection")
			{
				protectPGF = false;
			}
		}

		cout << "Using graph " << graph_file << ".\n";
		Graph* g = new Graph();
		g->readFromFile(graph_file);
		g->printComplexity(graph_file+".complexity");

		string kMerified_graph_file = graph_file+".kmers";
		kMerified_graph_file += ("_" + Utilities::ItoStr(kMer_size));
		LargeGraph* kMerG = kMerify(g, false, kMer_size, protectPGF);
		kMerG->writeToFile(kMerified_graph_file);

		kMerG->stats();

	}
	else if((arguments.size() > 0) && ((arguments.at(1) == "createConcatenatedVariationGraphs") || (arguments.at(1) == "createConcatenatedkMerGraphs")))
	{
		bool classicalPRG = (arguments.at(1) == "createConcatenatedVariationGraphs");

		omp_set_num_threads(CONFIG.threads);

		string haplotypes_files_dir = arguments.at(2);
		string positions_file = haplotypes_files_dir+"/positions.txt";
		string graph_output_file = haplotypes_files_dir+"/graph.txt";

		int kMer_size = 55;
		bool protectPGF = true;
		int suffixLength = 20;
		
		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
			if(arguments.at(i) == "--suffixLength")
			{
				suffixLength = Utilities::StrtoI(arguments.at(i+1));
			}			
			if(arguments.at(i) == "--noPGFprotection")
			{
				protectPGF = false;
			}			
		}

		string haplotypes_files_list = haplotypes_files_dir+"/segments.txt";

		if(classicalPRG)
		{
			cout << "Create variation graph based on haplotype specified in list files " << haplotypes_files_list << ", with positions" << positions_file << "\n\n" << flush;
			cout << "k = " << kMer_size << "\n\n" << flush;
			cout << "\t" << "suffixLength: " << suffixLength << "\n" << flush;
		}
		else
		{
			cout << "Create kMer graph based on haplotype specified in list files " << haplotypes_files_list << ", with positions" << positions_file << "\n\n" << flush;
			cout << "k = " << kMer_size << "\n\n" << flush;
		}

		Graph* combinedGraph = new Graph();
		int combinedLevel = -1;
		ifstream filesStream;
		filesStream.open (haplotypes_files_list.c_str(), ios::in);

		set<Node*> previousGraphLastNodes;
		if(filesStream.is_open())
		{
			string line;

			vector<string> files;

			while(filesStream.good())
			{
				getline (filesStream, line);
				Utilities::eraseNL(line);

				if(line.length() == 0)
					continue;

				files.push_back(line);
			}

			for(int fI = 0; fI < (int)files.size(); fI++)
			{
				string fN = files.at(fI);

				string haplotypes_file = haplotypes_files_dir+"/"+fN;

				std::cout << "Reading " << haplotypes_file << "\n" << std::flush;

				Graph* g = classicalPRG ? variationGraph(haplotypes_file, positions_file, protectPGF, suffixLength) : kMerGraph(haplotypes_file, positions_file, kMer_size);

				string output_file = haplotypes_file+".graph";
				g->writeToFile(output_file);

				if(fI < ((int)files.size() - 1))
				{
					g->CODE.removeLocus("END_PUFFER");
				}
				
				combinedGraph->CODE.takeLocusData(g->CODE);

				int levels = g->NodesPerLevel.size();
				int lastLevel = (fI == ((int)files.size() - 1)) ? g->NodesPerLevel.size() - 1 : g->NodesPerLevel.size() - 2;
				for(int l = 0; l <= lastLevel; l++)
				{
					for(set<Node*>::iterator nodeIt = g->NodesPerLevel.at(l).begin(); nodeIt != g->NodesPerLevel.at(l).end(); nodeIt++)
					{
						Node* n = *nodeIt;
						if(l == lastLevel)
						{
							//assert(n->Outgoing_Edges.size() == 0);
							if (fI != (files.size()-1))
							{
								previousGraphLastNodes.insert(n);
							}
							else
							{
								if(nodeIt == g->NodesPerLevel.at(l).begin())
									combinedLevel++;
								n->level = combinedLevel;
								n->Outgoing_Edges.clear();
								combinedGraph->registerNode(n, combinedLevel);
							}
						}
						else
						{
							if(nodeIt == g->NodesPerLevel.at(l).begin())
								combinedLevel++;

							n->level = combinedLevel;
							combinedGraph->registerNode(n, combinedLevel);

							for(set<Edge*>::iterator outgoingEdgesIt = n->Outgoing_Edges.begin(); outgoingEdgesIt != n->Outgoing_Edges.end(); outgoingEdgesIt++)
							{
								Edge* outgoingEdge = *outgoingEdgesIt;
								combinedGraph->registerEdge(outgoingEdge);
							}
						}
					}

					if(l == 0)
					{
						assert(g->NodesPerLevel.at(0).size() == 1);
						Node* n0 = *(g->NodesPerLevel.at(0).begin());
						for(set<Node*>::iterator lastNodeIt = previousGraphLastNodes.begin(); lastNodeIt != previousGraphLastNodes.end(); lastNodeIt++)
						{
							Node* lastLevelNode = *lastNodeIt;
							for(set<Edge*>::iterator incomingEdgesIt = lastLevelNode->Incoming_Edges.begin(); incomingEdgesIt != lastLevelNode->Incoming_Edges.end(); incomingEdgesIt++)
							{
								Edge* incomingEdge = *incomingEdgesIt;
								incomingEdge->To = n0;
								n0->Incoming_Edges.insert(incomingEdge);
							}
						}
						previousGraphLastNodes.clear();
					}
				}
			}
			filesStream.close();
		}

		combinedGraph->checkConsistency(false);

		combinedGraph->writeToFile(graph_output_file);
		combinedGraph->printComplexity(graph_output_file+".complexity");

		cout << "\tLevels: " << combinedGraph->NodesPerLevel.size() << "\n";
		cout << "\tNodes: " << combinedGraph->Nodes.size() << "\n";
		cout << "\tEdges: " << combinedGraph->Edges.size() << "\n";
		cout << flush;
		
		exit(0);

		string kMerified_graph_file = graph_output_file+".kmers";
		LargeGraph* kMerG = kMerify(combinedGraph, false, kMer_size);
		kMerG->writeToFile(kMerified_graph_file);

		kMerG->stats();
	}

	else if((arguments.size() > 0) && (arguments.at(1) == "createMHCVariationGraph"))
	{
		omp_set_num_threads(CONFIG.threads);


		string haplotypes_file = arguments.at(2);
		string positions_file = arguments.at(3);
		string graph_output_file = arguments.at(4);

		int kMer_size = 55;
		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		cout << "Create variation graph based on haplotypes from " << haplotypes_file << ", with positions" << positions_file << "\n\n" << flush;

		Graph* g = variationGraph(haplotypes_file, positions_file);
		g->writeToFile(graph_output_file);
		g->printComplexity(graph_output_file+".complexity");

		exit(0);

		string kMerified_graph_file = graph_output_file+".kmers";
		LargeGraph* kMerG = kMerify(g, false, kMer_size);
		kMerG->writeToFile(kMerified_graph_file);

		kMerG->stats();
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "createHLAAlleleshortAlignmentGraph"))
	{
		omp_set_num_threads(CONFIG.threads);

		string hla_allele_dir = arguments.at(2);
		string locus = arguments.at(3);
		string graph_output_file = arguments.at(4);

		int kMer_size = 55;
		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--kmer")
			{
				kMer_size = Utilities::StrtoI(arguments.at(i+1));
			}
		}

		cout << "Create graph for short HLA allele alignments for locus " << locus << ", using data from directory " << hla_allele_dir << "\n\n" << flush;

		Graph* g = HLAalignmentGraph(hla_allele_dir, locus);
		g->writeToFile(graph_output_file);

		string kMerified_graph_file = graph_output_file+".kmers";
		LargeGraph* kMerG = kMerify(g, false, kMer_size);
		kMerG->writeToFile(kMerified_graph_file);

		kMerG->stats();

		//LargeHMM* HMM = new LargeHMM(kMerG, true);
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "filterReads"))
	{
		std::string positiveFilter;
		std::string negativeFilter;

		std::string referenceGenome;

		double positiveThreshold = 0.3;
		double negativeThreshold = 0.45;

		vector<string> arguments (argv + 1, argv + argc + !argc);

		std::string input_BAM;
		std::string input_FASTQ;

		std::string output_FASTQ;

		int k = 25;

		int threads = 12;
		bool positiveUnique = false;
		bool negativePreserveUnique = false;
		bool HiSeq250bp = false;

		std::string uniqueness_base;
		std::string uniqueness_subtract;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--positiveFilter")
			{
				positiveFilter = arguments.at(i+1);
			}
			if(arguments.at(i) == "--negativeFilter")
			{
				negativeFilter = arguments.at(i+1);
			}
			if(arguments.at(i) == "--input_BAM")
			{
				input_BAM = arguments.at(i+1);
			}
			if(arguments.at(i) == "--input_FASTQ")
			{
				input_FASTQ = arguments.at(i+1);
			}
			if(arguments.at(i) == "--referenceGenome")
			{
				referenceGenome = arguments.at(i+1);
			}
			if(arguments.at(i) == "--output_FASTQ")
			{
				output_FASTQ = arguments.at(i+1);
			}
			if(arguments.at(i) == "--positiveThreshold")
			{
				positiveThreshold = Utilities::StrtoD(arguments.at(i+1));
			}
			if(arguments.at(i) == "--negativeThreshold")
			{
				negativeThreshold = Utilities::StrtoD(arguments.at(i+1));
			}
			if(arguments.at(i) == "--k")
			{
				k = Utilities::StrtoI(arguments.at(i+1));
			}
			if(arguments.at(i) == "--positiveUnique")
			{
				positiveUnique = true;
			}
			if(arguments.at(i) == "--negativePreserveUnique")
			{
				negativePreserveUnique = true;
			}
			if(arguments.at(i) == "--uniqueness_base")
			{
				uniqueness_base = arguments.at(i+1);
			}
			if(arguments.at(i) == "--uniqueness_subtract")
			{
				uniqueness_subtract = arguments.at(i+1);
			}
			if(arguments.at(i) == "--HiSeq250bp")
			{
				HiSeq250bp = true;
			}
			if(arguments.at(i) == "--threads")
			{
				threads = Utilities::StrtoD(arguments.at(i+1));
			}			
		}

		readFilter F;
		F.positiveFilter = positiveFilter;
		F.negativeFilter = negativeFilter;
		F.input_BAM = input_BAM;
		F.input_FASTQ = input_FASTQ;
		F.output_FASTQ = output_FASTQ;
		F.positiveThreshold = positiveThreshold;
		F.negativeThreshold = negativeThreshold;
		F.k = k;
		F.negativePreserveUnique = negativePreserveUnique;
		F.positiveUnique = positiveUnique;
		F.uniqueness_base = uniqueness_base;
		F.uniqueness_subtract = uniqueness_subtract;
		F.referenceGenomeFile = referenceGenome;
		F.HiSeq250bp = HiSeq250bp;
		F.threads = threads;
		
		F.doFilter();

	}
	else if((arguments.size() > 0) && (arguments.at(1) == "filterLongOverlappingReads"))
	{
		std::string referenceGenome;

		vector<string> arguments (argv + 1, argv + argc + !argc);

		std::string input_BAM;
		std::string output_FASTQ;
		std::string graphDir;

		int minimumOverlap = 0;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--input_BAM")
			{
				input_BAM = arguments.at(i+1);
			}
			if(arguments.at(i) == "--referenceGenome")
			{
				referenceGenome = arguments.at(i+1);
			}
			if(arguments.at(i) == "--output_FASTQ")
			{
				output_FASTQ = arguments.at(i+1);
			}
			if(arguments.at(i) == "--minimumOverlap")
			{
				minimumOverlap = Utilities::StrtoD(arguments.at(i+1));
			}

			if(arguments.at(i) == "--graphDir")
			{
				graphDir = arguments.at(i+1);
			}
		}

		filterLongOverlappingReads F;
		F.input_BAM = input_BAM;
		F.output_FASTQ = output_FASTQ;
		F.referenceGenomeFile = referenceGenome;
		F.graphDir = graphDir;
		F.minimumOverlap = minimumOverlap;

		F.doFilter();
	}
	else if((arguments.size() > 0) && (arguments.at(1) == "filterLongOverlappingReads2"))
	{
		std::string referenceGenome;

		vector<string> arguments (argv + 1, argv + argc + !argc);

		std::string input_BAM;
		std::string input_FASTQ;
		std::string output_FASTQ;
		std::string graphDir;

		int minimumOverlap = 0;

		for(unsigned int i = 2; i < arguments.size(); i++)
		{
			if(arguments.at(i) == "--input_BAM")
			{
				input_BAM = arguments.at(i+1);
			}
			if(arguments.at(i) == "--referenceGenome")
			{
				referenceGenome = arguments.at(i+1);
			}
			if(arguments.at(i) == "--input_FASTQ")
			{
				input_FASTQ = arguments.at(i+1);
			}			
			if(arguments.at(i) == "--output_FASTQ")
			{
				output_FASTQ = arguments.at(i+1);
			}
			if(arguments.at(i) == "--minimumOverlap")
			{
				minimumOverlap = Utilities::StrtoD(arguments.at(i+1));
			}

			if(arguments.at(i) == "--graphDir")
			{
				graphDir = arguments.at(i+1);
			}
		}

		filterLongOverlappingReads F;
		F.input_BAM = input_BAM;
		F.input_FASTQ = input_FASTQ;
		F.output_FASTQ = output_FASTQ;
		F.referenceGenomeFile = referenceGenome;
		F.graphDir = graphDir;
		F.minimumOverlap = minimumOverlap;

		F.doFilter2();

	}	
	else
	{
		errEx("Please specify valid mode.");
	}
	return 0;
}

void errEx(std::string message)
{
	cerr << message << "\n";
	exit(1);
}

void testing()
{
	LocusCodeAllocation CODE;
	unsigned char codedHLA = CODE.doCode("HLA", "0101");
	string originalAllele = CODE.deCode("HLA", codedHLA);
	// std::cout << "Coded HLA: " << codedHLA << " -- original allele: " << originalAllele << "\n";
	assert(originalAllele == "0101");

}
back to top