https://github.com/trvrb/PACT
Revision 9c04acffc876bb4791a0dcf61087c0d20bb1f9e9 authored by Trevor Bedford on 06 January 2010, 20:29:26 UTC, committed by Trevor Bedford on 06 January 2010, 20:29:26 UTC
1 parent b9a272f
Raw File
Tip revision: 9c04acffc876bb4791a0dcf61087c0d20bb1f9e9 authored by Trevor Bedford on 06 January 2010, 20:29:26 UTC
Error checking for 0 trees.
Tip revision: 9c04acf
coaltree.cpp
/* coaltree.cpp
Copyright 2009 Trevor Bedford <bedfordt@umich.edu>
Member function definitions for CoalescentTree class
*/

/*
This file is part of PACT.

PACT is free software: you can redistribute it and/or modify it under the terms of the GNU General 
Public License as published by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

PACT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the 
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
Public License for more details.

You should have received a copy of the GNU General Public License along with PACT.  If not, see 
<http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include <sstream>
#include <fstream>
using std::ofstream;
using std::stringstream;
using std::cout;
using std::endl;
using std::flush;
using std::ios;
using std::fixed;

#include <string>
using std::string;

#include <set>
using std::set;

#include <vector>
using std::vector;

#include <stdexcept>
using std::runtime_error;
using std::out_of_range;

#include <cstdlib>
using std::atof;
using std::atoi;

#include <cmath>

#include "coaltree.h"
#include "node.h"
#include "tree.hh"

/* Constructor function to initialize private data */
/* Takes NEWICK parentheses tree as string input */
CoalescentTree::CoalescentTree(string paren) {

	string::iterator is;
	tree<Node>:: iterator it, jt;
	
	// make sure that parentheses are matched, counting '(' and counting ')'
	int leftcount = 0;
	int rightcount = 0;
	for ( is=paren.begin(); is < paren.end(); ++is ) {
		if (*is == '(') { leftcount++; }
		if (*is == ')') { rightcount++; }
	}
	if (leftcount != rightcount) {
		throw runtime_error("unmatched parentheses in in.trees");
	}

	// STRIP PAREN STRING ///////////
	// strip spaces from paren string
	// strip & and following character, replace following : with =
	// assumes migration events follow the format [&M 5 3:8.49916e-05]
	is = paren.begin() ;
	bool mig = false;
	while (is < paren.end()) {
		if (*is == ' ')
			is = paren.erase(is);
		else if (*is == '&') {
			is = paren.erase(is);
			is = paren.erase(is);
			mig = true;
		}
		else if (*is == ':' && mig) {
			*is = '=';
			mig = false;
		}
		else
			is++;
	}

	// GATHER TIPS ////////////////
	// read in node names, filling tips vector
	// names can only be 0-9 A-Z a-z
	// exported tree renames tips with consecutive numbering starting at 1
	// go through paren string and collect tips, at the same time replace names with matching numbers in paren string
	// if first character of name is a number and the second character is a letter, assume first character is label

	vector<Node> tipsList;
	int current = 1;
	int stringPos = 0;
	string thisString = "";
	
	while (stringPos < paren.length()) {
		
		char thisChar = paren[stringPos];
		
		if ( (thisChar >= 'A' && thisChar <= 'Z') || (thisChar >= 'a' && thisChar <= 'z') || (thisChar >= '0' && thisChar <= '9') ) {
			thisString += thisChar;
		} 	  	
				
		else if (thisChar == ':' && thisString.length() > 0) {
							
			/* nodetree update */	
			Node thisNode(current);
			thisNode.setName(thisString);
			
			// label is the first character of node string, incremented by 1
			// only attempt this if first character is number and second character is letter
			// otherwise set to 1
			if ( (thisString[0] >= '0' && thisString[0] <= '9') &&
					( (thisString[1] >= 'A' && thisString[1] <= 'Z') || (thisString[1] >= 'a' && thisString[1] <= '1') ) ) {
				thisNode.setLabel(atoi(thisString.substr(0,1).c_str()) + 1);
			}
			
			thisNode.setLeaf(true);
			
			tipsList.push_back(thisNode);

			/* replace name with number */	
			stringstream out;
			out << current;
			paren = paren.substr(0,stringPos - thisString.size()) + out.str() + paren.substr(stringPos,paren.length());
			
			/* move counter back */
			/* need to take into acount the length of the digits */
			stringPos -= thisString.size() - (out.str()).length() + 1;		
			
			thisString = "";
			current++;
		
		}
		
		else {
			thisString = "";
		}
		
		stringPos++;
		
	}
		
	// STARTING TREE /////////////////
	// construct starting point for tree (multifurcation from root)
	it = nodetree.set_head(tipsList[0]);
	for(int i = 1; i < tipsList.size(); i++) {
		it = nodetree.insert_after(it, tipsList[i]);
   	}
  	
  	// CONSTRUCT TREE /////////////////////
	// read parentheses string from left to right, stop when a close parenthesis is encountered
	// push the left and right nodes onto their own branch
	// replace parenthesis string with their parent node ((1,2),3)  --->  (4,3)
	
	// end when all parentheses have been eliminated
	while (paren.at(0) == '(') {
	
//		cout << paren << endl;
					
		int left, right, from, to, openParen, closeParen, openMig, closeMig;	
		double leftLength, rightLength, migLength;
		stringPos = 0;
		thisString = "";
		left = 0;
		right = 0;
					
		for ( is=paren.begin(); is < paren.end(); ++is ) {

			if (*is == '(') {
				openParen = stringPos;
				openMig = stringPos;
			}
			
			if ( (*is >= '0' && *is <= '9') || (*is >= 'A' && *is <= 'Z') || (*is >= 'a' && *is <= 'z') || *is == '.' || *is == '-' ) {
				thisString += *is;
			}	
			
			else {
								
				if (thisString.length() > 0) {
				
					// branch length
					if (*is == '[' || *is == ',' || *is == ')') {		
						leftLength = rightLength;
						rightLength = atof(thisString.c_str());
					}					
					
					// node number
					if (*is == ':') {	
						left = right;
						right = atoi(thisString.c_str());		
					}
	
					if (*is == ',') {
						openMig = stringPos;
					}		
					
					// MIGRATION EVENTS ////////////////////
					
					/* need to extend ctree here */
					/* can only deal with migration events that effect a tip node */
					/* this section is only called when brackets follow a tip node */
					if (*is == '=') {
					
						/* grabbing migration event */
						string labelString = thisString;
						labelString.erase(0,1);
						from = atoi(labelString.c_str()) + 1;
						labelString = thisString;
						labelString.erase(1,1);		
						to = atoi(labelString.c_str()) + 1;
						
					}				
					
					if (*is == ']') {
					
						closeMig = stringPos; 
					
						migLength = atof(thisString.c_str());
			//			cout << tempN << " mig from " << from << " to " << to << ", at " << migLength << endl;
						
						// push child node back by distance equal to migLength
						it = findNode(right);	
						(*it).setLength( rightLength - migLength );
			
						// create new intermediate node
						Node migNode(current);
						migNode.setLabel(to);
						migNode.setLength(migLength);
						
						// wrap this new node so that it inherits the old node
						nodetree.wrap(it,migNode);		
								
						/* replace parenthesis with new node label */	
						/* code is set up to deal with the situation of two labels before a parenthesis */
						stringstream out;
						out << current << ":" << migLength;
						string insert = out.str();
						paren.replace(openMig + 1,closeMig - openMig,insert);
						
						current++;			
						break;
					
					}
				
				}
								
				thisString = "";
				
			}				
			
			// COALESCENT EVENTS //////////////////
			
			if (*is == ')') {
			
				// have to have stored left and right nodes
				if (left != 0 && right != 0) {
				
					closeParen = stringPos; 
					
					// append a new node
					// append this new node with two branches (left node and right node)
									
					tree<Node>:: iterator iterLeft, iterRight, iterNew;
					iterLeft = findNode(left);
					iterRight = findNode(right);	
	
					(*iterLeft).setLength(leftLength);
					(*iterRight).setLength(rightLength);				
					
					Node newNode(current);
					newNode.setLabel( (*iterLeft).getLabel() );
				
					iterNew = nodetree.wrap(iterLeft,newNode);		
					nodetree.move_after(iterLeft,iterRight);
					
					/* replace parenthesis with new node label */		
					stringstream out;
					out << current;
					string insert = out.str();
	
					// this creates a new string every cycle
			//		paren = paren.substr(0,openParen) + insert + paren.substr(closeParen+1,paren.length());
		
					// this modifies the paren string: 2% faster
					paren.replace(openParen,closeParen-openParen+1,insert);
					
					current++;			
					break;
				
				}
				
				// this will be encountered if the string is surrounded by an extra pair of parenthesis
				// LAMARC does this
				else {
					paren = ";";
				}
				
			}
	
		stringPos++;
			
		}
	
	}
			
	// adding branch length to the parent node's time to get the node's time
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {
			double t = (*jt).getTime() + (*it).getLength();
			(*it).setTime(t);
		}
	}	
	  			  		
	/* go through tree and append to trunk set */
	/* only the last 1/100 of the time span is considered */
	double presentTime = getPresentTime();
	double trunkTime = presentTime / (double) 100;
	it = nodetree.begin();
	(*it).setTrunk(true);
	while(it != nodetree.end()) {
		/* find nodes at present */
		if ((*it).getTime() > presentTime - trunkTime) {
			jt = it;
			/* move up tree adding nodes to trunk set */
			while (nodetree.is_valid(jt)) {
				(*jt).setTrunk(true);
				jt = nodetree.parent(jt);
			}
		}
		++it;
	}
	
	/* pushing the most recent sample up to time = 0 */
	pushTimesBack(0);
		
}

/* push dates to agree with a most recent sample date at endTime */
void CoalescentTree::pushTimesBack(double endTime) {
		
	// need to adjust times by this amount
	double diff = endTime - getPresentTime();
		
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		double t = (*it).getTime();
		(*it).setTime(t + diff);
	}	
		  	
}

/* push dates to agree with a most recent sample date at endTime and oldest sample date is startTime */
/* will fail if used on contempory samples */
void CoalescentTree::pushTimesBack(double startTime, double endTime) {
	
	tree<Node>::iterator it, jt;
	double presentTime = getPresentTime();
	
	if (startTime < endTime) {
	
		// STRETCH OR SHRINK //////////////	 
			 
		// find oldest sample	
		double oldestSample = presentTime;
		for (tree<Node>::leaf_iterator lit = nodetree.begin_leaf(); lit != nodetree.end_leaf(); ++lit) {
			if ((*lit).getTime() < oldestSample) { 
				oldestSample = (*lit).getTime(); 
			}
		}
		
		double mp = (endTime - startTime) / (presentTime - oldestSample);
		
		// go through tree and multiple lengths by mp	
		for (it = nodetree.begin(); it != nodetree.end(); ++it) {
			double l = (*it).getLength();
			(*it).setLength(l * mp);
		}	
		
		// update times in tree
		for (it = nodetree.begin(); it != nodetree.end(); ++it) {
			jt = nodetree.parent(it);
			if (nodetree.is_valid(jt)) {
				double t = (*jt).getTime() + (*it).getLength();
				(*it).setTime(t);
			}
		}	
	
	}

	// PUSH BACK /////////////////////

	// need to adjust times by this amount
	double diff = endTime - getPresentTime();
		
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		double t = (*it).getTime();
		(*it).setTime(t + diff);
	}		
		 
}

/* old version of renewTrunk.  This peels back from all current nodes. */
//void CoalescentTree::renewTrunk(double t) {
//
//	/* go through tree and append to trunk set */
//	double presentTime = getPresentTime();
//	tree<Node>::iterator it, jt;
//	
//	for(it = nodetree.begin(); it != nodetree.end(); ++it) {
//		(*it).setTrunk(false);
//	}
//	
//	it = nodetree.begin();
//	(*it).setTrunk(true);
//	while(it != nodetree.end()) {
//		/* find nodes at present */
//		if ((*it).getTime() > presentTime - t) {
//			jt = it;
//			/* move up tree adding nodes to trunk set */
//			while (nodetree.is_valid(jt)) {
//				(*jt).setTrunk(true);
//				jt = nodetree.parent(jt);
//			}
//		}
//		++it;
//	}	
//				
//}

/* reduces a tree to just its trunk, takes a single random sample from "current" tips and works backward from this */
void CoalescentTree::renewTrunk(double t) {

	/* go through tree and append to trunk set */
	double presentTime = getPresentTime();
	tree<Node>::iterator it, jt;
	
	/* count tips and set every node as non-trunk */
	int count = 0;	
	for(it = nodetree.begin(); it != nodetree.end(); ++it) {
		(*it).setTrunk(false);
		if ((*it).getTime() > presentTime - t && (*it).getLeaf()) {
			count++;
		}
	}

	int selection = rgen.uniform(0,count);
	count = 0;
	
	it = nodetree.begin();
	(*it).setTrunk(true);
	while(it != nodetree.end()) {
		/* find nodes at present */
		if ((*it).getTime() > presentTime - t && (*it).getLeaf()) {
			if (selection == count) {
				jt = it;
				/* move up tree adding nodes to trunk set */
				while (nodetree.is_valid(jt)) {
					(*jt).setTrunk(true);
					jt = nodetree.parent(jt);
				}
				break;
			}
			count++;
		}
		++it;
	}	
	
}

/* reduces a tree to just its trunk, takes most recent sample and works backward from this */
void CoalescentTree::pruneToTrunk() {
	
	/* erase other nodes from the tree */	
	tree<Node>::iterator it;
	it = nodetree.begin();
	while(it != nodetree.end()) {
		if ( !(*it).getTrunk() ) {
			it = nodetree.erase(it);
		}
		else {
    		++it;
    	}
    }
            
	reduce();
			
}


/* reduces a tree to samples with a single label */
void CoalescentTree::pruneToLabel(int label) {

	/* start by finding all tips with this label */
	set<int> labelset; 
	tree<Node>::iterator it, jt;
	
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ( (*it).getLabel() == label && (*it).getLeaf() ) {
		
			/* move up tree adding nodes to label set */
			jt = it;
			while (nodetree.is_valid(jt)) {
				labelset.insert( (*jt).getNumber() );
				jt = nodetree.parent(jt);
			}
		
		}
	}
			
	/* erase other nodes from the tree */
	it = nodetree.begin();
	while(it != nodetree.end()) {
		if (labelset.end() == labelset.find( (*it).getNumber() )) {
			it = nodetree.erase(it);
		}
		else {
    		++it;
    	}
    }
        
   	peelBack();     
	reduce();
				
}


/* sets all labels in tree to 1 */
void CoalescentTree::collapseLabels() {

	tree<Node>::iterator it, jt;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		(*it).setLabel(1);
	}
							
}

/* trims a tree at its edges 

   		   |-------	 			 |-----
from  ------			 	to	--
   		   |----------		 	 |-----

*/
void CoalescentTree::trimEnds(double start, double stop) {
			
	/* erase nodes from the tree where neither the node nor its parent are between start and stop */
	tree<Node>::iterator it, jt;
	it = nodetree.begin();
	while(it != nodetree.end()) {	
	
		jt = nodetree.parent(it);
	
		if (nodetree.is_valid(jt)) {
	
			/* if node > stop and parent < stop, erase children and prune node back to stop */
			/* this pruning causes an internal node to become an leaf node */
			if ((*it).getTime() > stop && (*jt).getTime() < stop) {
			
				(*it).setTime( stop );
				(*it).setLength( (*it).getTime() - (*jt).getTime() );
				//(*it).setLeaf(false);
				(*it).setLeaf(true);
				nodetree.erase_children(it);
				it = nodetree.begin();
			
			}
			
			/* if node > start and parent < start, push parent up to start */
			/* and reparent anc[node] to be a child of root */
			/* neither node nore anc[node] can be root */
			else if ((*it).getTime() > start && (*jt).getTime() < start) {
			
				(*jt).setTime(start);
				(*jt).setLength(0.0);
				(*jt).setInclude(false);
				nodetree.move_after(nodetree.begin(),jt);
				it = nodetree.begin();
			
			}
			
			else {
				++it;
			}
		
		}
		
		else {
    		++it;
    	}
    }
        
    /* second pass for nodes < start */
    it = nodetree.begin();   
	while(it != nodetree.end()) {	
		if ((*it).getTime() < start) {
			it = nodetree.erase(it);
		}
		else {
    		++it;
    	}
    }
        
	// go through tree and update lengths based on times
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {
			(*it).setLength( (*it).getTime() - (*jt).getTime() );
		}
	}	               
               
	reduce();
					
}

/* cuts up tree into multiple sections */
void CoalescentTree::sectionTree(double start, double window, double step) {

	tree<Node>::iterator it, jt;
	tree<Node> holdtree = nodetree;
	int current = 1;

	double rootTime = getRootTime();
	double presentTime = getPresentTime();

	/* newtree holds the growing tree structure */
	tree<Node> newtree;
	Node tempNode(-1);	
	newtree.set_head(tempNode);

	/* move window forward in time, make sure there are nodes in this window */
	for (double t = start; t < presentTime; t += step) {
		if (t > rootTime) {
		
			// operations all affect nodetree
			nodetree = holdtree;
			trimEnds(t,t + window);	
			current = renumber(current);			// need unique node numbers
			
			//	printTree();
					
			// need to move multiple sibling branches
			// need four sibling iterators, to1 to2 from1 from2
			tree<Node>::sibling_iterator to1, to2, from1, from2;
	
			from1 = nodetree.begin();
			from2 = nodetree.begin();
			while ( nodetree.is_valid(nodetree.next_sibling(from2)) ){
				from2 = nodetree.next_sibling(from2);
			}

			to1 = newtree.begin();
			to2 = newtree.begin();
			while ( newtree.is_valid(newtree.next_sibling(to2)) ){
				to2 = newtree.next_sibling(to2);
			}
					
			newtree.merge(to1,to2,from1,from2,true);
	
		}
	}

	nodetree = newtree;
	
}

/* Reduces tree to just the ancestors of a single slice in time */
/* Used to calcuate diversity, TMRCA and Tajima's D at a particular time */
void CoalescentTree::timeSlice(double slice) {

	/* desire only nodes spanning the time slice */
	/* find these nodes and add them and their ancestors to a set */
	set<int> sliceset; 
	tree<Node>::iterator it, jt, kt;
	it = nodetree.begin();
	while(it != nodetree.end()) {	
	
		jt = nodetree.parent(it);
	
		/* if node > slice and parent < slice, erase children and prune node back to stop */
		/* this pruning causes an internal node to become a leaf node */
		if ((*it).getTime() > slice && (*jt).getTime() <= slice) {
		
			// adjusting node
			(*it).setTime( slice );
			(*it).setLength( (*it).getTime() - (*jt).getTime() );
			(*it).setLeaf(true);
			nodetree.erase_children(it);
			
			// move up tree adding nodes to sliceset
			jt = it;
			while (nodetree.is_valid(jt)) {
				sliceset.insert( (*jt).getNumber() );
				jt = nodetree.parent(jt);
			}
			
			it = nodetree.begin();
		
		}
		
		else {
    		++it;
    	}
    	
    }
    
	/* erase other nodes from the tree */
	it = nodetree.begin();
	while(it != nodetree.end()) {
		if (sliceset.end() == sliceset.find( (*it).getNumber() )) {
			it = nodetree.erase(it);
		}
		else {
    		++it;
    	}
    }    
    
	peelBack();
	reduce();

}

/* padded with extra nodes at coalescent time points */
/* causing problems with migration tree */
void CoalescentTree::padTree() { 

	int current = getMaxNumber() + 1;

	tree<Node>::iterator it, end, iterTemp, iterN;
	
	/* construct set of coalescent times */
	set<double>::const_iterator is;
	set<double> tset;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		tset.insert( (*it).getTime() );
	}
	
	/* pad tree with extra nodes, make sure there is a node at each time slice correspoding to coalescent event */
	it = nodetree.begin();
	while(it != nodetree.end()) {
	
		/* finding what the correct depth of the node should be */
		int newDepth = -1;
		for (is = tset.begin(); is != tset.find( (*it).getTime() ); ++is) {
    		newDepth++;
    	}
    	
    	is++;
	
		if (newDepth > nodetree.depth(it)) {
		
			/* padding with number of nodes equal to the difference in depth levels */
			for(int i = 0; i < newDepth - nodetree.depth(it); i++) {

				Node newNode(current);
				newNode.setLabel( (*it).getLabel() );
				newNode.setTime( *is );
				newNode.setLength( *is - (*it).getTime() );
				
				nodetree.wrap(it,newNode);
	
				current++;
				it = nodetree.begin();
				
			}
	
		}
		
		++it;
	
	}
			  	
}

/* Print indented tree */
void CoalescentTree::printTree() { 

	int rootdepth = nodetree.depth(nodetree.begin());
		
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		for(int i=0; i<nodetree.depth(it)-rootdepth; ++i) 
			cout << "  ";
		cout << (*it).getNumber();
		if ((*it).getName() != "") { 
			cout << " " << (*it).getName();
		}
		cout << " (" << (*it).getTime() << ")";
		cout << " [" << (*it).getLabel() << "]";			
		cout << " {" << (*it).getLength() << "}";		
		if ( !(*it).getInclude()) { 
			cout << " *";
		}		
		cout << endl << flush;
	}
		
}

/* print tree in Mathematica suitable format
Output is:
	leaf list
	trunk list
	tree rules
	label rules
	coordinate rules 
	tip name rules
*/	
void CoalescentTree::printRuleList(string outputFile) {

	/* initializing output stream */
	ofstream outStream;
	outStream.open( outputFile.c_str(),ios::app);

	/* setting up y-axis ordering, x-axis is date */
	adjustCoords();
	
	tree<Node>::iterator it, jt;
	
	/* print leaf nodes */
	/* a node may be a leaf on the current tree, but not a leaf on the original tree */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getLeaf()) {
			outStream << (*it).getNumber() << " ";
		}
	}
	outStream << endl;	
	
	/* print trunk nodes */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getTrunk()) {
			outStream << (*it).getNumber() << " ";
		}
	}
	outStream << endl;	
			
	/* print the tree in rule list (Mathematica-ready) format */
	/* print only upward links */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {		// increment past root
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {
			outStream << (*it).getNumber() << "->" << (*jt).getNumber() << " ";
		}
	}
	outStream << endl;
	
	
	/* print mapping of nodes to labels */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {	
		outStream << (*it).getNumber() << "->" << (*it).getLabel() << " ";
	}
	outStream << endl;
	
	/* print mapping of nodes to coordinates */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		outStream << (*it).getNumber() << "->{" << fixed << (*it).getTime() << "," << (*it).getCoord() << "} ";	
	}
	outStream << endl;
		  	  	
	/* print mapping of nodes to names */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {	
		if ((*it).getName() != "")
			outStream << (*it).getNumber() << "->\"" << (*it).getName() << "\" ";
	}
	outStream << endl;  	  	
	  	  	
	outStream.close();
	  	  	
}

/* Print parentheses tree */
void CoalescentTree::printParen() { 

	tree<Node>::post_order_iterator it;
	it = nodetree.begin_post();
   	
	int currentDepth = nodetree.depth(it);
	for (int i = 0; i < currentDepth; i++) { 
		cout << "("; 
	} 
	cout << (*it).getNumber() << ":" << (*it).getLength(); 
	++it;
	
	/* need to add a '(' whenever the depth increases and a ')' whenever the depth decreases */
	/* only print leaf nodes */
	while(it != nodetree.end_post()) {
		if (nodetree.depth(it) > currentDepth) { 
			cout << ", ("; 
			for (int i = 0; i < nodetree.depth(it) - currentDepth - 1; i++) { 
				cout << "("; 
			}
			if (nodetree.number_of_children(it) == 0) { 
				cout << (*it).getNumber() << ":" << (*it).getLength(); 
			}
		}
		if (nodetree.depth(it) == currentDepth) { 
			if (nodetree.number_of_children(it) == 0) { 
				cout << ", " << (*it).getNumber() << ":" << (*it).getLength(); ; 
			}
		}
		if (nodetree.depth(it) < currentDepth) {
			if (nodetree.number_of_children(it) == 0) { 
				cout << (*it).getNumber() << ":" << (*it).getLength(); 
				cout << ")";		
			}
			else {
				cout << ")";	
				cout << ":" << (*it).getLength();
			}
		}
		currentDepth = nodetree.depth(it);
		++it;
		
	}
	
	cout << endl;

	
}


/* most recent node in tree, will always be a leaf */
double CoalescentTree::getPresentTime() {
	
	double t = (*nodetree.begin()).getTime();
	for (tree<Node>::leaf_iterator it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		if ((*it).getTime() > t) { 
			t = (*it).getTime(); 
		}
	}
	return t;

}

/* most ancient node in tree */
double CoalescentTree::getRootTime() {

	double t = (*nodetree.begin()).getTime();
	for (tree<Node>::leaf_iterator it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		if ((*it).getTime() < t) { 
			t = (*it).getTime(); 
		}
	}
	return t;
}

/* amount of time it takes for all samples to coalesce */
double CoalescentTree::getTMRCA() {
	return getPresentTime() - getRootTime();
}

/* number of labels 1 to n */
int CoalescentTree::getMaxLabel() {

	double n = 0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it ) {
		if ( (*it).getLabel() > n ) {
			n = (*it).getLabel();
		}
	}	
	return n;

}

/* number of leaf nodes */
int CoalescentTree::getLeafCount() {

	double n = 0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it ) {
		if ( (*it).getLeaf() ) {
			n++;
		}
	}	
	return n;

}

/* total number of nodes */
int CoalescentTree::getNodeCount() {
	return nodetree.size();
}

/* total length of the tree */
double CoalescentTree::getLength() {

	double length = 0.0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it ) {
		if ( (*it).getInclude() ) {
			length += (*it).getLength();
		}
	}	
	return length;

}

/* length of the tree with label l */
double CoalescentTree::getLength(int l) {

	double length = 0.0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it ) {
		if ( (*it).getInclude() && (*it).getLabel() == l ) {
			length += (*it).getLength();
		}
	}	
	return length;

}

/* get proportion of tree with label */
double CoalescentTree::getLabelPro(int l) { 

	return getLength(l) / getLength();
	
}

/* proportion of tree that can trace its history forward to present day samples */
/* trunk traced back from the last 1/100 of the time width */
double CoalescentTree::getTrunkPro() { 

	double totalLength = getLength();

	double trunkLength = 0.0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it ) {
		if ( (*it).getInclude() && (*it).getTrunk()) {
			trunkLength += (*it).getLength();
		}
	}	
	
	return trunkLength / totalLength;
    	
}

/* returns the count of coalescent events */
int CoalescentTree::getCoalCount() {

	/* count coalescent events, these are nodes with two children */
	int count = 0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getInclude() && nodetree.number_of_children(it) == 2) {		
			count++;
		}
	}
	return count;

}

/* returns the count of coalescent events with label */
int CoalescentTree::getCoalCount(int l) {

	/* count coalescent events, these are nodes with two children */
	int count = 0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getInclude() && nodetree.number_of_children(it) == 2 && (*it).getLabel() == l ) {		
			count++;
		}
	}
	return count;

}

/* returns the opportunity for coalescence over the whole tree */
/* running this will padTree() may be faster and more accurate */
double CoalescentTree::getCoalWeight() {

	// setting step to be 1/1000 of the total length of the tree
	double start = getRootTime();
	double stop = getPresentTime();
	double step = (stop - start) / (double) 1000;
	
	// step through tree counting concurrent lineages
	double weight = 0.0;
	for (double t = start; t <= stop; t += step) {
	
		int lineages = 0;
		tree<Node>::iterator it, jt;
		for (it = nodetree.begin(); it != nodetree.end(); ++it) {
			jt = nodetree.parent(it);
			if ( (*it).getInclude() && nodetree.is_valid(jt) && (*it).getTime() >= t && (*jt).getTime() < t) {		
				lineages++;
			}
		}
		
		if (lineages > 0) {
			weight += ( ( lineages * (lineages - 1) ) / 2 ) * step;
		}
		
	}	
	
	return weight;

}

/* returns the opportunity for coalescence for label */
double CoalescentTree::getCoalWeight(int l) {

	// setting step to be 1/1000 of the total length of the tree
	double start = getRootTime();
	double stop = getPresentTime();
	double step = (stop - start) / (double) 1000;
		
	// step through tree counting concurrent lineages
	double weight = 0.0;
	for (double t = start; t <= stop; t += step) {
	
		int lineages = 0;
		tree<Node>::iterator it, jt;
		for (it = nodetree.begin(); it != nodetree.end(); ++it) {
			jt = nodetree.parent(it);
			if ( (*it).getInclude() && nodetree.is_valid(jt) && (*it).getTime() >= t && (*jt).getTime() < t && (*it).getLabel() == l ) {		
				lineages++;
			}
		}
				
		if (lineages > 0) {
			weight += ( ( lineages * (lineages - 1) ) / 2 ) * step;
		}
		
	}	
	
	return weight;

}

double CoalescentTree::getCoalRate() {
	return getCoalCount() / getCoalWeight();
}

double CoalescentTree::getCoalRate(int l) {
	return getCoalCount(l) / getCoalWeight(l);
}

/* returns the count of migration events over entire tree */
int CoalescentTree::getMigCount() {

	/* count migration events, these are nodes in which the parent label differs from child label */
	tree<Node>::iterator it, jt;
	int count = 0;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {		
			if ( (*it).getInclude() && (*jt).getInclude() && (*it).getLabel() != (*jt).getLabel() ) {		
				count++;
			}
		}
	}
	return count;

}

/* returns the count of migration events from label to label */
int CoalescentTree::getMigCount(int from, int to) {

	/* count migration events, these are nodes in which the parent label differs from child label */
	tree<Node>::iterator it, jt;
	int count = 0;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {
			if ( (*it).getInclude() && (*jt).getInclude() && (*it).getLabel() == to && (*jt).getLabel() == from ) {		
				count++;
			}
		}
	}
	return count;

}

/* returns the overall rate of migration */
double CoalescentTree::getMigRate() {
	return getMigCount() / getLength();
}

/* returns the rate of migration from label to label */
/* this is important to check on */
/* currently, this is set up as calculating the rate from working backwards in time */
/* i.e. the migration rate from 1->2 is measured from the count going backwards on 2->1 divided */
/* by the backward opportunity of 2 */
/* getMigCount(from,to) / getLength(to) */
/* this needs attention */
/* seems to match with empirical estimates with getMigCount(from,to) / getLength() */
/* seems wrong however */
double CoalescentTree::getMigRate(int from, int to) {
	return getMigCount(from,to) / getLength(to);
}

/* return mean of (2 * time to common ancestor) for every pair of leaf nodes */
double CoalescentTree::getDiversity() {

	double div = 0.0;
	int count = 0;

	/* iterating over every pair of leaf nodes */
	tree<Node>::leaf_iterator it, jt, kt;
	for (it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		for (jt = it; jt != nodetree.end_leaf(); ++jt) {
			if ((*it).getInclude() && (*jt).getInclude() && it != jt) {
	
				/* find common ancestor and calculate time from it to jt via common ancestor */
				kt = commonAncestor(it,jt);
				div += ( (*it).getTime() - (*kt).getTime() ) + ( (*jt).getTime() - (*kt).getTime() );
				count++;
			
			}
		}
	}
	
	div /= (double) count;
	return div;
	
}

/* return mean of (2 * time to common ancestor) for pairs of leaf nodes with labels a and b */
double CoalescentTree::getDiversity(int l) {

	double div = 0.0;
	int count = 0;

	/* iterating over every pair of leaf nodes */
	tree<Node>::leaf_iterator it, jt, kt;
	for (it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		for (jt = it; jt != nodetree.end_leaf(); ++jt) {
			if ((*it).getInclude() && (*jt).getInclude() && it != jt && (*it).getLabel() == l && (*jt).getLabel() == l ) {
	
				/* find common ancestor and calculate time from it to jt via common ancestor */
				kt = commonAncestor(it,jt);
				div += ( (*it).getTime() - (*kt).getTime() ) + ( (*jt).getTime() - (*kt).getTime() );
				count++;
			
			}
		}
	}
	
	div /= (double) count;
	return div;
	
}

/* return mean of (2 * time to common ancestor) for pairs of leaf nodes with identical labels */
double CoalescentTree::getDiversityWithin() {

	double div = 0.0;
	int count = 0;

	/* iterating over every pair of leaf nodes */
	tree<Node>::leaf_iterator it, jt, kt;
	for (it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		for (jt = it; jt != nodetree.end_leaf(); ++jt) {
			if ((*it).getInclude() && (*jt).getInclude() && it != jt && (*it).getLabel() == (*jt).getLabel() ) {
	
				/* find common ancestor and calculate time from it to jt via common ancestor */
				kt = commonAncestor(it,jt);
				div += ( (*it).getTime() - (*kt).getTime() ) + ( (*jt).getTime() - (*kt).getTime() );
				count++;
			
			}
		}
	}
	
	div /= (double) count;
	return div;
	
}

/* return mean of (2 * time to common ancestor) for pairs of leaf nodes with different labels */
double CoalescentTree::getDiversityBetween() {

	double div = 0.0;
	int count = 0;

	/* iterating over every pair of leaf nodes */
	tree<Node>::leaf_iterator it, jt, kt;
	for (it = nodetree.begin_leaf(); it != nodetree.end_leaf(); ++it) {
		for (jt = it; jt != nodetree.end_leaf(); ++jt) {
			if ((*it).getInclude() && (*jt).getInclude() && it != jt && (*it).getLabel() != (*jt).getLabel() ) {
	
				/* find common ancestor and calculate time from it to jt via common ancestor */
				kt = commonAncestor(it,jt);
				div += ( (*it).getTime() - (*kt).getTime() ) + ( (*jt).getTime() - (*kt).getTime() );
				count++;
			
			}
		}
	}
	
	div /= (double) count;
	return div;
	
}

/* returns population subdivision Fst = (divBetween - divWithin) / divBetween */
double CoalescentTree::getFst() {

	double divWithin = getDiversityWithin();
	double divBetween = getDiversityBetween();
	double fst = (divBetween - divWithin) / divBetween;
	return fst;

}

/* return D = pi - S/a1, where pi is diversity, S is the total tree length, and a1 is a normalization factor */
/* expect D = 0 under neutrality */
double CoalescentTree::getTajimaD() {

	double div = getDiversity();
	double S = getLength();

	double a1 = 0.0;
	double a2 = 0.0;	
	int n = getLeafCount();
	for (int i = 1; i < n; i++) {
		a1 += 1 / (double) i;
		a2 += 1 / (double) (i*i);		
	}
		
	double e1 = (1.0/a1) * ((double)(n+1) / (3*(n-1)) - (1.0/a1));
	double e2 = (1.0 / (a1*a1 + a2) ) * ( (double)(2*(n*n+n+3)) / (9*n*(n-1)) - (double)(n+2) / (n*a1) + a2/(a1*a1) );
	double denom = sqrt(e1*S + e2*S*(S-1));

	double tajima = (div - S/a1) / denom;	
	return tajima;

}


/* returns vector of tip names */
vector<string> CoalescentTree::getTipNames() {

	vector<string> names;
	for (tree<Node>::leaf_iterator lit = nodetree.begin_leaf(); lit != nodetree.end_leaf(); ++lit) {
		names.push_back( (*lit).getName() );
	}
	return names;

}

double CoalescentTree::getTime(string name) {
	tree<Node>::iterator it = findNode(name);	
	return (*it).getTime();
}

int CoalescentTree::getLabel(string name) {
	tree<Node>::iterator it = findNode(name);	
	return (*it).getLabel();
}

/* time it takes for a named tip to coalesce with the trunk */
double CoalescentTree::timeToTrunk(string name) {

	tree<Node>::iterator it, jt;
	it = findNode(name);
	jt = it;
	
	/* walk back from this node until trunk is reached */
	while ( !(*jt).getTrunk() ) {
		jt = nodetree.parent(jt);
	}
	
	return (*it).getTime() - (*jt).getTime();	

}

/* removes extraneous nodes from tree */
void CoalescentTree::reduce() {

	tree<Node>::iterator it, jt, kt;

	/* removing pointless nodes, ie nodes that have no coalecent
	events or migration events associated with them */
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if (nodetree.is_valid(jt)) {
			if (nodetree.number_of_children(it) == 1) {								// no coalescence	
				kt = nodetree.child(it,0);
				if ((*kt).getLabel() == (*it).getLabel()) { 						// mo migration
	//				cout << "it = " << *it << ", kt = " << *kt << endl;
					(*kt).setLength( (*kt).getLength() + (*it).getLength() );	
					nodetree.reparent(jt,it);										// push child node up to be sibling of node
					nodetree.erase(it);												// erase node									
					it = nodetree.begin();
				}
			}
		}
	}

}

/* peels back trunk. works from root forward, stopping when first split is reached */
void CoalescentTree::peelBack() {

	tree<Node>::iterator it, jt, kt;

	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		jt = nodetree.parent(it);
		if ( nodetree.is_valid(jt) && nodetree.number_of_children(it) == 1) {	
			kt = nodetree.child(it,0);
			(*kt).setLength( (*kt).getLength() + (*it).getLength() );	
			nodetree.reparent(jt,it);								// push child node up to be sibling of node
			nodetree.erase(it);										// erase node									
			it = nodetree.begin();
		}
		if (nodetree.number_of_children(it) == 2) {
			break;
		}
	}

	// adjust root    
	it = nodetree.begin();
	if (nodetree.number_of_children(it) == 1) {
		nodetree.move_after(nodetree.begin(),++nodetree.begin());
		nodetree.erase(nodetree.begin());
		(*nodetree.begin()).setLength(0.0);
	}
	
}	
	
void CoalescentTree::adjustCoords() {

	tree<Node>::iterator it, jt;

	/* reorder tree so that the bottom node of two sister nodes always has the most recent child more children */
	/* this combined with preorder traversal will insure the trunk follows a rough diagonal */
	it = nodetree.begin();
	while(it != nodetree.end()) {
		jt = nodetree.next_sibling(it);
		if (nodetree.is_valid(jt)) {
			int cit = nodetree.size(it);
			int cjt = nodetree.size(jt);
			if (cit > cjt) {
				nodetree.swap(jt,it);
				it = nodetree.begin();
			}
		}
		++it;
	}

	/* set coords of tips according to preorder traversal */
  	int count = 0;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
	//	if (nodetree.depth(it) == 0) { count = 0; }		// this resets count for each subtree
		if ( (*it).getLeaf() ) {
			(*it).setCoord(count);	
			count++;
		}
	}
	
	/* revise coords of internal nodes according to postorder traversal */
  	tree<Node>::post_order_iterator post_it, post_jt, post_kt;
  	for (post_it = nodetree.begin_post(); post_it != nodetree.end_post(); post_it++) {
  		if (nodetree.number_of_children(post_it) == 1) {
  			post_jt = nodetree.child(post_it,0);
  			(*post_it).setCoord((*post_jt).getCoord());	
  		}  		  	
  		if (nodetree.number_of_children(post_it) == 2) {
  			post_jt = nodetree.child(post_it,0);
  			post_kt = nodetree.child(post_it,1);
  			double avg = ( (*post_jt).getCoord() + (*post_kt).getCoord() ) / (double) 2;
  			(*post_it).setCoord(avg);	
  		}
	}	

}	
	
/* returns maximium node associated with a node in the tree */
int CoalescentTree::getMaxNumber() {

	int n = 0;
	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getNumber() > n) {
			n = (*it).getNumber();
		}
	}
	return n;

}

/* renumber tree via preorder traversal starting from n */
int CoalescentTree::renumber(int n) {

	for (tree<Node>::iterator it = nodetree.begin(); it != nodetree.end(); ++it) {
		(*it).setNumber(n);
		n++;
	}
	return n;

}

/* given a number, returns iterator to associated node, or if not found, returns iterator to end of tree */
tree<Node>::iterator CoalescentTree::findNode(int n) {
	
	tree<Node>::iterator it;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getNumber() == n)
			break;
	}
	return it;
	
}

/* given a name, returns iterator to associated node, or if not found, returns iterator to end of tree */
tree<Node>::iterator CoalescentTree::findNode(string name) {
	
	tree<Node>::iterator it;
	for (it = nodetree.begin(); it != nodetree.end(); ++it) {
		if ((*it).getName() == name)
			break;
	}
	return it;
	
}

/* given two iterators, returns an iterator to their most recent common ancestor */
tree<Node>::iterator CoalescentTree::commonAncestor(tree<Node>::iterator ia, tree<Node>::iterator ib) {

	/* make a set */
	set<int> nodeSet;
	
	/* walk down from first node to root, appending to nodeSet */
	tree<Node>::iterator it;
	it = ia;
	while (nodetree.is_valid(it)) {
		nodeSet.insert( (*it).getNumber() );
		it = nodetree.parent(it);
	}
	
	/* walk down from second node, stopping when a member of nodeSet is encountered */
	it = ib;	
	while (nodetree.is_valid(it)) {
		if (nodeSet.end() == nodeSet.find( (*it).getNumber() )) {
			it = nodetree.parent(it);
		}
		else {
			break;
		}
	}

//	cout << "a = " << (*ia).getNumber() << ", b = " << (*ib).getNumber() << ", anc = " << (*it).getNumber() << endl;
	
	return it;
	
}
back to top