https://github.com/stamatak/standard-RAxML
Revision a3d300633918f16910238f90924d1cfe09c59b13 authored by stamatak on 23 March 2016, 09:41:50 UTC, committed by stamatak on 23 March 2016, 09:41:50 UTC
1 parent 0173e9f
Raw File
Tip revision: a3d300633918f16910238f90924d1cfe09c59b13 authored by stamatak on 23 March 2016, 09:41:50 UTC
integrated Sarah's code for randomly subsampling quartets without replacement.
Tip revision: a3d3006
treeIO.c
/*  RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees 
 *  Copyright August 2006 by Alexandros Stamatakis
 *
 *  Partially derived from
 *  fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
 *  
 *  and 
 *
 *  Programs of the PHYLIP package by Joe Felsenstein.
 *
 *  This program is free software; you may redistribute it and/or modify its
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 * 
 *
 *  For any other enquiries send an Email to Alexandros Stamatakis
 *  Alexandros.Stamatakis@epfl.ch
 *
 *  When publishing work that is based on the results from RAxML-VI-HPC please cite:
 *
 *  Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". 
 *  Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
 */

#ifndef WIN32
#include <sys/times.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h> 
#endif

#include <math.h>
#include <time.h> 
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>

#include "axml.h"

extern FILE *INFILE;
extern char infoFileName[1024];
extern char tree_file[1024];
extern char *likelihood_key;
extern char *ntaxa_key;
extern char *smoothed_key;
extern double masterTime;





stringHashtable *initStringHashTable(hashNumberType n)
{
  /* 
     init with primes 
  */
    
  static const hashNumberType initTable[] = {53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
					     196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
					     50331653, 100663319, 201326611, 402653189, 805306457, 1610612741};
 

  /* init with powers of two

  static const  hashNumberType initTable[] = {64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
					      32768, 65536, 131072, 262144, 524288, 1048576, 2097152,
					      4194304, 8388608, 16777216, 33554432, 67108864, 134217728,
					      268435456, 536870912, 1073741824, 2147483648U};
  */
  
  stringHashtable *h = (stringHashtable*)rax_malloc(sizeof(stringHashtable));
  
  hashNumberType
    tableSize,
    i,
    primeTableLength = sizeof(initTable)/sizeof(initTable[0]),
    maxSize = (hashNumberType)-1;    

  assert(n <= maxSize);

  i = 0;

  while(initTable[i] < n && i < primeTableLength)
    i++;

  assert(i < primeTableLength);

  tableSize = initTable[i];  

  h->table = (stringEntry**)rax_calloc(tableSize, sizeof(stringEntry*));
  h->tableSize = tableSize;    

  return h;
}


static hashNumberType  hashString(char *p, hashNumberType tableSize)
{
  hashNumberType h = 0;
  
  for(; *p; p++)
    h = 31 * h + *p;
  
  return (h % tableSize);
}

 

void addword(char *s, stringHashtable *h, int nodeNumber)
{
  hashNumberType position = hashString(s, h->tableSize);
  stringEntry *p = h->table[position];
  
  

  for(; p!= NULL; p = p->next)
    {
      if(strcmp(s, p->word) == 0)		 
	return;	  	
    }

  p = (stringEntry *)rax_malloc(sizeof(stringEntry));

  assert(p);
  
  p->nodeNumber = nodeNumber;
  p->word = (char *)rax_malloc((strlen(s) + 1) * sizeof(char));

  strcpy(p->word, s);
  
  p->next =  h->table[position];
  
  h->table[position] = p;
}

int lookupWord(char *s, stringHashtable *h)
{
  hashNumberType position = hashString(s, h->tableSize);
  stringEntry *p = h->table[position];

  for(; p!= NULL; p = p->next)
    {
      if(strcmp(s, p->word) == 0)		 
	return p->nodeNumber;	  	
    }

  return -1;
}


int countTips(nodeptr p, int numsp)
{
  if(isTip(p->number, numsp))  
    return 1;    
  {
    nodeptr q;
    int tips = 0;

    q = p->next;
    while(q != p)
      { 
	tips += countTips(q->back, numsp);
	q = q->next;
      } 
    
    return tips;
  }
}



static double getBranchLength(tree *tr, int perGene, nodeptr p)
{
  double 
    z = 0.0,
    x = 0.0;

  assert(perGene != NO_BRANCHES);
	      
  if(!tr->multiBranch)
    {   
      z = p->z[0];
      if (z < zmin) 
	z = zmin;      	 
      
      x = -log(z);           
    }
  else
    {
      if(perGene == SUMMARIZE_LH)
	{
	  int 
	    i;
	  
	  double 
	    avgX = 0.0;
		      
	  for(i = 0; i < tr->numBranches; i++)
	    {
	      assert(tr->partitionContributions[i] != -1.0);
	      //assert(fs[i] != -1.0);
	      z = p->z[i];
	      if(z < zmin) 
		z = zmin;      	 
	      x = -log(z);// * fs[i];
	      avgX += x * tr->partitionContributions[i];
	    }

	  x = avgX;
	}
      else
	{	
	  //assert(fs[perGene] != -1.0);
	  assert(perGene >= 0 && perGene < tr->numBranches);
	  
	  z = p->z[perGene];
	  
	  if(z < zmin) 
	    z = zmin;      	 
	  
	  x = -log(z);// * fs[perGene];	  
	}
    }

  return x;
}

  


static char *Tree2StringREC(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, 
			    boolean printLikelihood, boolean rellTree, boolean finalPrint, int perGene, boolean branchLabelSupport, boolean printSHSupport, boolean printIC, boolean printSHSupports)
{
  char  *nameptr;            
      
  if(isTip(p->number, tr->rdta->numsp)) 
    {	       	  
      if(printNames)
	{
	  nameptr = tr->nameList[p->number];     
	  sprintf(treestr, "%s", nameptr);
	}
      else
	sprintf(treestr, "%d", p->number);    
	
      while (*treestr) treestr++;
    }
  else 
    {                 	 
      *treestr++ = '(';
      treestr = Tree2StringREC(treestr, tr, p->next->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			       finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
      *treestr++ = ',';
      treestr = Tree2StringREC(treestr, tr, p->next->next->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			       finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
      if(p == tr->start->back) 
	{
	  *treestr++ = ',';
	  treestr = Tree2StringREC(treestr, tr, p->back, printBranchLengths, printNames, printLikelihood, rellTree, 
				   finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
	}
      *treestr++ = ')';                    
    }

  if(p == tr->start->back) 
    {	      	 
      if(printBranchLengths && !rellTree)
	sprintf(treestr, ":0.0;\n");
      else
	sprintf(treestr, ";\n");	 	  	
    }
  else 
    {                   
      if(rellTree || branchLabelSupport || printSHSupport || printIC || printSHSupports)
	{	 	 
	  if(( !isTip(p->number, tr->rdta->numsp)) && 
	     ( !isTip(p->back->number, tr->rdta->numsp)))
	    {			      
	      assert(p->bInf != (branchInfo *)NULL);	      	    
	      
	      assert(rellTree + branchLabelSupport + printSHSupport + printSHSupports == 1);

	      if(rellTree)
		{
		  if(printIC)
		    sprintf(treestr, "%1.3f:%8.20f", p->bInf->ic, p->z[0]);
		  else
		    sprintf(treestr, "%d:%8.20f", p->bInf->support, p->z[0]);
		}
	      
	      if(branchLabelSupport)
		{
		  if(printIC)
		    sprintf(treestr, ":%8.20f[%1.3f,%1.3f]", p->z[0], p->bInf->ic, p->bInf->icAll);
		  else		    
		    sprintf(treestr, ":%8.20f[%d]", p->z[0], p->bInf->support);
		}
	      
	      if(printSHSupport)
		sprintf(treestr, ":%8.20f[%d]", getBranchLength(tr, perGene, p), p->bInf->support);

	      if(printSHSupports)
		{
		  int 
		    model;
		  
		  sprintf(treestr, ":%8.20f[", getBranchLength(tr, perGene, p));
		  while(*treestr) 
		    treestr++;
		  
		  for(model = 0; model < tr->NumberOfModels - 1; model++)		    
		    {
		      sprintf(treestr, "%d,", p->bInf->supports[model]);
		       while(*treestr) 
			 treestr++;
		    }

		  sprintf(treestr, "%d]", p->bInf->supports[model]);
		}
	      
	    }
	  else		
	    {
	      if(rellTree || branchLabelSupport)
		sprintf(treestr, ":%8.20f", p->z[0]);	
	      if(printSHSupport || printSHSupports)
		sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
	    }
	}
      else
	{
	  if(printBranchLengths)	    
	    sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));	      	   
	  else	    
	    sprintf(treestr, "%s", "\0");	    
	}      
    }
  
  while (*treestr) treestr++;
  return  treestr;
}


void collectSubtrees(tree *tr, nodeptr *subtrees, int *count, int ogn)
{
  int i;
  for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 2; i++)
    {
      nodeptr p, q;
      p = tr->nodep[i];
      if(countTips(p, tr->rdta->numsp) == ogn)
	{
	  subtrees[*count] = p;
	  *count = *count + 1;
	}
      q = p->next;
      while(q != p)
	{
	  if(countTips(q, tr->rdta->numsp) == ogn)
	    {
	      subtrees[*count] = q;
	      *count = *count + 1;
	    }
	  q = q->next;
	}
    }
}

static void checkOM(nodeptr p, int *n, int *c, tree *tr)
{
  if(isTip(p->number, tr->rdta->numsp))
    {
      n[*c] = p->number;
      *c = *c + 1;     
    }
  else
    {
      nodeptr q = p->next;

      while(q != p)
	{
	  checkOM(q->back, n, c, tr);
	  q = q->next;
	}
    }
}
    
static char *rootedTreeREC(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, 
			   boolean printLikelihood, boolean rellTree, 
			   boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport)
{
  char  *nameptr;

  if(isTip(p->number, tr->rdta->numsp)) 
    {	     
      if(printNames)
	{
	  nameptr = tr->nameList[p->number];     
	  sprintf(treestr, "%s", nameptr);
	}
      else
	sprintf(treestr, "%d", p->number);
      
      while (*treestr) 
	treestr++;
    }
  else 
    {
      *treestr++ = '(';
      treestr = rootedTreeREC(treestr, tr, p->next->back, printBranchLengths, printNames, printLikelihood, 
			      rellTree, finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
      *treestr++ = ',';
      treestr = rootedTreeREC(treestr, tr, p->next->next->back, printBranchLengths, printNames, printLikelihood, 
			      rellTree, finalPrint, adef, perGene, branchLabelSupport, printSHSupport);      
      *treestr++ = ')';            
    }

  if(rellTree || branchLabelSupport || printSHSupport)
    {	 	 
      if(( !isTip(p->number, tr->rdta->numsp)) && 
	 ( !isTip(p->back->number, tr->rdta->numsp)))
	{			      
	  assert(p->bInf != (branchInfo *)NULL);
	      
	  if(rellTree)
	    sprintf(treestr, "%d:%8.20f", p->bInf->support, p->z[0]);     
	  if(branchLabelSupport)
	    sprintf(treestr, ":%8.20f[%d]", p->z[0], p->bInf->support);
	  if(printSHSupport)
	    sprintf(treestr, ":%8.20f[%d]", getBranchLength(tr, perGene, p), p->bInf->support);	      
	}
      else		
	{
	  if(rellTree || branchLabelSupport)
	    sprintf(treestr, ":%8.20f", p->z[0]);	
	  if(printSHSupport)
	    sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
	}
    }
  else
    {
      if(printBranchLengths)	    
	sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));	      	   
      else	    
	sprintf(treestr, "%s", "\0");	    
    }     

  while (*treestr) treestr++;
  return  treestr;
}

static char *rootedTree(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, 
			boolean printLikelihood, boolean rellTree, 
			boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport)
{
  double oldz[NUM_BRANCHES];
  int i;
  
  for(i = 0; i < tr->numBranches; i++)
    oldz[i] = p->z[i];

  if(rellTree)    
    {    
      p->z[0] = p->back->z[0] = oldz[0] * 0.5;
      /*printf("%f\n",  p->z[0]);*/
    }
  else
    {
      if(printBranchLengths)
	{
	  double rz, z;
	  assert(perGene != NO_BRANCHES);

	  if(!tr->multiBranch)
	    {	    
	      z = -log(p->z[0]);
	      rz = exp(-(z * 0.5));
	      p->z[0] = p->back->z[0] = rz;
	    }
	  else
	    {
	      if(perGene == SUMMARIZE_LH)
		{				  		
		  int i;	      
		  
		  for(i = 0; i < tr->numBranches; i++)
		    {			    
		      z    = -log(p->z[i]);
		      rz   = exp(-(z * 0.5));
		      p->z[i] = p->back->z[i] = rz;		    
		    }		 
		}	     	     
	      else
		{				
		  assert(perGene >= 0 && perGene < tr->numBranches);
		  z = -log(p->z[perGene]);
		  rz = exp(-(z * 0.5));
		  p->z[perGene] = p->back->z[perGene] = rz;	       	      	      
		}
	    }
	}
    }

  *treestr = '(';
  treestr++;

  treestr = rootedTreeREC(treestr, tr, p,  printBranchLengths, printNames, printLikelihood, rellTree, finalPrint, 
			  adef, perGene, branchLabelSupport, printSHSupport);

  *treestr = ',';
  treestr++;
  
  treestr = rootedTreeREC(treestr, tr, p->back,  printBranchLengths, printNames, printLikelihood, rellTree, finalPrint, 
			  adef, perGene, branchLabelSupport, printSHSupport);  
  sprintf(treestr, ");\n");
  
  while(*treestr) 
    treestr++;


  for(i = 0; i < tr->numBranches; i++)
    p->z[i] = p->back->z[i] = oldz[i];  
    
  return  treestr;
}



void Tree2String(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, boolean printLikelihood, 
		 boolean rellTree, 
		 boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport, boolean printIC, boolean printSHSupports)
{ 
  //make sure the tree string is clean in the beginning 
  memset(treestr, 0, sizeof(char) * tr->treeStringLength);
  //printf("Tree string: %s\n", tr->tree_string);

  if(rellTree)
    assert(!branchLabelSupport && !printSHSupport);

  if(branchLabelSupport)
    assert(!rellTree && !printSHSupport);

  if(printSHSupport)
    assert(!branchLabelSupport && !rellTree);

  if(finalPrint && adef->outgroup)
    {
      nodeptr startNode = tr->start;

      if(tr->numberOfOutgroups > 1)
	{
	  nodeptr root;
	  nodeptr *subtrees = (nodeptr *)rax_malloc(sizeof(nodeptr) * tr->mxtips);
	  int i, k, count = 0;
	  int *nodeNumbers = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
	  int *foundVector = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
	  boolean monophyletic = FALSE;

	  collectSubtrees(tr, subtrees, &count, tr->numberOfOutgroups);

	  /*printf("Found %d subtrees of size  %d\n", count, tr->numberOfOutgroups);*/
	  
	  for(i = 0; (i < count) && (!monophyletic); i++)
	    {
	      int l, sum, nc = 0;
	      for(k = 0; k <  tr->numberOfOutgroups; k++)
		{
		  nodeNumbers[k] = -1;
		  foundVector[k] = 0;
		}

	      checkOM(subtrees[i], nodeNumbers, &nc, tr);	      
	      
	      for(l = 0; l < tr->numberOfOutgroups; l++)
		for(k = 0; k < tr->numberOfOutgroups; k++)
		  {
		    if(nodeNumbers[l] == tr->outgroupNums[k])
		      foundVector[l] = 1;
		  }
	      
	      sum = 0;
	      for(l = 0; l < tr->numberOfOutgroups; l++)
		sum += foundVector[l];
	      
	      if(sum == tr->numberOfOutgroups)
		{	       		  
		  root = subtrees[i];
		  tr->start = root;		
		  /*printf("outgroups are monphyletic!\n");*/
		  monophyletic = TRUE;		  
		}
	      else
		{
		  if(sum > 0)
		    {
		      /*printf("outgroups are NOT monophyletic!\n");*/
		      monophyletic = FALSE;
		    }	     
		}	
	    }
	  
	  if(!monophyletic)
	    {
	      printf("WARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
	      printf("from the list to root the tree!\n");
	     

	      {
		FILE *infoFile = myfopen(infoFileName, "ab");

		fprintf(infoFile, "\nWARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
		fprintf(infoFile, "from the list to root the tree!\n");
		
		fclose(infoFile);
	      }


	      tr->start = tr->nodep[tr->outgroupNums[0]];
	      
	      rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			 finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
	    }
	  else
	    {	     
	      if(isTip(tr->start->number, tr->rdta->numsp))
		{
		  printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
		  errorExit(-1);
		}
	      if(isTip(tr->start->back->number, tr->rdta->numsp))
	      	{
		  printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
		  errorExit(-1);
		}
	      	      
	      rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			 finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
	    }
	  
	  rax_free(foundVector);
	  rax_free(nodeNumbers);
	  rax_free(subtrees);
	}
      else
	{	  
	  tr->start = tr->nodep[tr->outgroupNums[0]];	 
	  rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
		     finalPrint, adef, perGene, branchLabelSupport, printSHSupports);
	}      

      tr->start = startNode;
    }
  else    
    Tree2StringREC(treestr, tr, p, printBranchLengths, printNames, printLikelihood, rellTree, 
		   finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);  
    
  
  return;
}


void printTreePerGene(tree *tr, analdef *adef, char *fileName, char *permission)
{  
  FILE *treeFile;
  char extendedTreeFileName[1024];
  char buf[16];
  int i;

  assert(adef->perGeneBranchLengths);
     
  for(i = 0; i < tr->numBranches; i++)	
    {
      strcpy(extendedTreeFileName, fileName);
      sprintf(buf,"%d", i);
      strcat(extendedTreeFileName, ".PARTITION.");
      strcat(extendedTreeFileName, buf);
      /*printf("Partitiuon %d file %s\n", i, extendedTreeFileName);*/
      Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, i, FALSE, FALSE, FALSE, FALSE);
      treeFile = myfopen(extendedTreeFileName, permission);
      fprintf(treeFile, "%s", tr->tree_string);
      fclose(treeFile);
    }  
    
}



/*=======================================================================*/
/*                         Read a tree from a file                       */
/*=======================================================================*/


/*  1.0.A  Processing of quotation marks in comment removed
 */

static int treeFinishCom (FILE *fp, char **strp)
{
  int  ch;
  
  while ((ch = getc(fp)) != EOF && ch != ']') {
    if (strp != NULL) *(*strp)++ = ch;    /* save character  */
    if (ch == '[') {                      /* nested comment; find its end */
      if ((ch = treeFinishCom(fp, strp)) == EOF)  break;
      if (strp != NULL) *(*strp)++ = ch;  /* save closing ]  */
    }
  }
  
  if (strp != NULL) **strp = '\0';        /* terminate string  */
  return  ch;
} /* treeFinishCom */


static int treeGetCh (FILE *fp)         /* get next nonblank, noncomment character */
{ /* treeGetCh */
  int  ch;

  while ((ch = getc(fp)) != EOF) {
    if (whitechar(ch)) ;
    else if (ch == '[') {                   /* comment; find its end */
      if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF)  break;
    }
    else  break;
  }
  
  return  ch;
} /* treeGetCh */


static boolean treeLabelEnd (int ch)
{
  switch (ch) 
    {
    case EOF:  
    case '\0':  
    case '\t':  
    case '\n':  
    case '\r': 
    case ' ':
    case ':':  
    case ',':   
    case '(':   
    case ')':  
    case ';':
      return TRUE;
    default:
      break;
    }
  return FALSE;
} 

static int treeEchoContext (FILE *fp1, FILE *fp2, int n);

static boolean  treeGetLabel (FILE *fp, char *lblPtr, int maxlen, boolean taxonLabel)
{
  int      ch;
  boolean  done, quoted, lblfound;

  if (--maxlen < 0) 
    lblPtr = (char *) NULL; 
  else 
    if (lblPtr == NULL) 
      maxlen = 0;

  ch = getc(fp);
  done = treeLabelEnd(ch);

  if(done && taxonLabel)
    {
      printf("RAxML expects to read a taxon label in the tree file\n");
      printf("but the taxon label is an empty string.\n\n");
      printf("RAxML will print the context of the error and then exit:\n\n");
      
      treeEchoContext(fp, stdout, 40);
      printf("\n                  ^^\n\n");
      errorExit(-1);     
    }

  lblfound = ! done;
  quoted = (ch == '\'');
  if (quoted && ! done) 
    {
      ch = getc(fp); 
      done = (ch == EOF);
    }

  while (! done) 
    {
      if (quoted) 
	{
	  if (ch == '\'') 
	    {
	      ch = getc(fp); 
	      if (ch != '\'') 
		break;
	    }
        }
      else 
	if (treeLabelEnd(ch)) break;     

      if (--maxlen >= 0) *lblPtr++ = ch;
      ch = getc(fp);
      if (ch == EOF) break;
    }

  if (ch != EOF)  (void) ungetc(ch, fp);

  if (lblPtr != NULL) *lblPtr = '\0';

  return lblfound;
}


static boolean  treeFlushLabel (FILE *fp)
{ 
  return  treeGetLabel(fp, (char *) NULL, (int) 0, FALSE);
} 




int treeFindTipByLabelString(char  *str, tree *tr, boolean check)                    
{
  int 
    lookup = lookupWord(str, tr->nameHash);

  if(lookup > 0)
    {
      if(check)
	assert(! tr->nodep[lookup]->back);
      return lookup;
    }
  else
    { 
      printf("ERROR: Cannot find tree species: %s\n", str);
      printf("The species names in the input tree and alignment file may not match, please check!\n");
      return  0;
    }
}


int treeFindTipName(FILE *fp, tree *tr, boolean check)
{
  char    str[nmlngth+2];
  int      n;

  if(treeGetLabel(fp, str, nmlngth+2, TRUE))
    n = treeFindTipByLabelString(str, tr, check);
  else
    n = 0;
   

  return  n;
} 



static int treeEchoContext (FILE *fp1, FILE *fp2, int n)
{ /* treeEchoContext */
  
  int 
    offset = 0,
    ch;

  int64_t 
    current = ftell(fp1);  

  boolean  
    waswhite = TRUE;  
  
  fpos_t 
    pos;

  fgetpos(fp1, &pos);

  fseek(fp1, MAX(current - (int64_t)n / 2, 0), SEEK_SET);
       
  if((current - (int64_t)n / 2) < 0)
    offset = (int)(current - (int64_t)n / 2);
  
  while (n > 0 && ((ch = getc(fp1)) != EOF)) 
    {
      if (whitechar(ch)) 
	{
	  ch = waswhite ? '\0' : ' ';
	  waswhite = TRUE;
	}
    else 
      {
	waswhite = FALSE;
      }
    
      if(ch > '\0') 
	{
	  putc(ch, fp2); 
	  n--;
	}
    }

  fsetpos(fp1, &pos);

  return offset;

  /*boolean  
    waswhite = TRUE;
  
  while (n > 0 && ((ch = getc(fp1)) != EOF)) 
    {
      if (whitechar(ch)) 
	{
	  ch = waswhite ? '\0' : ' ';
	  waswhite = TRUE;
	}
    else 
      {
	waswhite = FALSE;
      }
    
      if(ch > '\0') 
	{
	  putc(ch, fp2); 
	  n--;
	}
	}*/
} /* treeEchoContext */

static boolean treeNeedCh (FILE *fp, int c1, char *where);


static boolean treeProcessLength (FILE *fp, double *dptr, int *branchLabel, boolean storeBranchLabels, tree *tr)
{
  int
    ch;
  
  if((ch = treeGetCh(fp)) == EOF)  
    return FALSE;    /*  Skip comments */
  (void) ungetc(ch, fp);
  
  if(fscanf(fp, "%lf", dptr) != 1) 
    {
      printf("ERROR: treeProcessLength: Problem reading branch length\n");
      treeEchoContext(fp, stdout, 40);
      printf("\n");
      return  FALSE;
    }
    
  if((ch = getc(fp)) != EOF)
    {
      if(ch == '[')
	{
	  if(fscanf(fp, "%d", branchLabel) != 1)
	    goto handleError;	      

	  //printf("Branch label: %d\n", *branchLabel);
	  
	  if((ch = getc(fp)) != ']')
	    {
	    handleError:
	      printf("ERROR: treeProcessLength: Problem reading branch label\n");
	      treeEchoContext(fp, stdout, 40);
	      printf("\n");
	      return FALSE;
	    }	    

	  if(storeBranchLabels)
	    tr->branchLabelCounter = tr->branchLabelCounter + 1;

	}
      else
	(void)ungetc(ch, fp);
    }
  
  return  TRUE;
}


static int treeFlushLen (FILE  *fp, tree *tr)
{
  double  
    dummy;  
  int 
    dummyLabel,     
    ch;
  
  ch = treeGetCh(fp);
  
  if (ch == ':') 
    {
      ch = treeGetCh(fp);
      
      ungetc(ch, fp);
      if(!treeProcessLength(fp, &dummy, &dummyLabel, FALSE, tr)) 
	return 0;
      return 1;	  
    }
  
  
  
  if (ch != EOF) (void) ungetc(ch, fp);
  return 1;
} 





static boolean treeNeedCh (FILE *fp, int c1, char *where)
{
  int  
    c2;
  
  if((c2 = treeGetCh(fp)) == c1)  
    return TRUE;
  
  printf("ERROR: Expecting '%c' %s tree; found: character '%c'\n\n", c1, where, c2);
  
  if (c2 == EOF) 
    {
      printf("End-of-File\n");
    }
  else 
    {      	
      ungetc(c2, fp);
      treeEchoContext(fp, stdout, 40);
      printf("\n");
      printf("                    ^\n\n");
    }
  //putchar('\n');
    
  if(c1 == ')' || c1 == '(')
    printf("RAxML may be expecting to read a strictly bifurcating tree!\n\n");
  else
    printf("RAxML may be expecting to read a tree that contains branch lengths\n\n");

  return FALSE;
} 



static boolean addElementLen (FILE *fp, tree *tr, nodeptr p, boolean readBranchLengths, boolean readNodeLabels, int *lcount, analdef *adef, boolean storeBranchLabels)
{   
  nodeptr  q;
  int      n, ch, fres;
  
  if ((ch = treeGetCh(fp)) == '(') 
    { 
      n = (tr->nextnode)++;
      if (n > 2*(tr->mxtips) - 2) 
	{
	  if (tr->rooted || n > 2*(tr->mxtips) - 1) 
	    {
	      printf("ERROR: Too many internal nodes.  Is tree rooted?\n");
	      printf("       Deepest splitting should be a trifurcation.\n");
	      return FALSE;
	    }
	  else 
	    {
	      if(readNodeLabels)
		{
		  printf("The program will exit with an error in the next source code line\n");
		  printf("You are probably trying to read in rooted trees with a RAxML option \n");
		  printf("that for some reason expects unrooted binary trees\n\n");
		}

	      assert(!readNodeLabels);
	      tr->rooted = TRUE;
	    }
	}
      
      q = tr->nodep[n];

      if (! addElementLen(fp, tr, q->next, readBranchLengths, readNodeLabels, lcount, adef, storeBranchLabels))        return FALSE;
      if (! treeNeedCh(fp, ',', "in"))             return FALSE;
      if (! addElementLen(fp, tr, q->next->next, readBranchLengths, readNodeLabels, lcount, adef, storeBranchLabels))  return FALSE;
      if (! treeNeedCh(fp, ')', "in"))             return FALSE;
      
      if(readNodeLabels)
	{
	  char label[64];
	  int support;

	  if(treeGetLabel (fp, label, 10, FALSE))
	    {	
	      int val = sscanf(label, "%d", &support);
      
	      assert(val == 1);

	      /*printf("LABEL %s Number %d\n", label, support);*/
	      p->support = q->support = support;
	      /*printf("%d %d %d %d\n", p->support, q->support, p->number, q->number);*/
	      assert(p->number > tr->mxtips && q->number > tr->mxtips);
	      *lcount = *lcount + 1;
	    }
	}
      else	
	(void) treeFlushLabel(fp);
    }
  else 
    {   
      ungetc(ch, fp);
      if ((n = treeFindTipName(fp, tr, TRUE)) <= 0)          return FALSE;
      q = tr->nodep[n];
      if (tr->start->number > n)  tr->start = q;
      (tr->ntips)++;
    }
  
  if(readBranchLengths)
    {
      double 
	branch;
      
      int 
	startCounter = tr->branchLabelCounter,
	endCounter,
	branchLabel = -1;
      
      if (! treeNeedCh(fp, ':', "in"))                 
	{
	  printf("ERROR: problem reading branch length ... RAxML will abort with a failing assertion\n\n");
	  return FALSE;
	}
      if (! treeProcessLength(fp, &branch, &branchLabel, storeBranchLabels, tr)) 
	{
	  printf("ERROR: problem reading branch length ... RAxML will abort with a failing assertion\n\n");
	  return FALSE;
	}

      endCounter = tr->branchLabelCounter;
      
      /*printf("Branch %8.20f %d\n", branch, tr->numBranches);*/
      if(adef->mode == CLASSIFY_ML)
	{
	  double 
	    x[NUM_BRANCHES];
	  
	  assert(tr->NumberOfModels == 1);
	  assert(adef->useBinaryModelFile);
	  assert(tr->numBranches == 1);

	  x[0] = exp(-branch);

	  hookup(p, q, x, tr->numBranches);
	}
      else
	hookup(p, q, &branch, tr->numBranches);

      if(storeBranchLabels && (endCounter > startCounter))
	{
	  assert(!isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips));
	  assert(branchLabel >= 0);
	  p->support = q->support = branchLabel;
	}
    }
  else
    {
      fres = treeFlushLen(fp, tr);
      if(!fres) return FALSE;
      
      hookupDefault(p, q, tr->numBranches);
    }
  return TRUE;          
} 











static nodeptr uprootTree (tree *tr, nodeptr p, boolean readBranchLengths, boolean readConstraint)
{
  nodeptr  q, r, s, start;
  int      n, i;              

  for(i = tr->mxtips + 1; i < 2 * tr->mxtips - 1; i++)
    assert(i == tr->nodep[i]->number);

  
 

  if(isTip(p->number, tr->mxtips) || p->back) 
    {
      printf("ERROR: Unable to uproot tree.\n");
      printf("       Inappropriate node marked for removal.\n");
      assert(0);
    }
  
  assert(p->back == (nodeptr)NULL);
  
  tr->nextnode = tr->nextnode - 1;

  assert(tr->nextnode < 2 * tr->mxtips);
  
  n = tr->nextnode;               
  
  assert(tr->nodep[tr->nextnode]);

  if (n != tr->mxtips + tr->ntips - 1) 
    {
      printf("ERROR: Unable to uproot tree.  Inconsistent\n");
      printf("       number of tips and nodes for rooted tree.\n");
      assert(0);
    }

  q = p->next->back;                  /* remove p from tree */
  r = p->next->next->back;
  assert(p->back == (nodeptr)NULL);
    
  if(readBranchLengths)
    {
      double b[NUM_BRANCHES];
      int i;
      for(i = 0; i < tr->numBranches; i++)
	{
	  b[i] = (r->z[i] + q->z[i]);	  
	}
      hookup (q, r, b, tr->numBranches);
    }
  else    
    hookupDefault(q, r, tr->numBranches);    

  tr->leftRootNode = p->next->back;
  tr->rightRootNode = p->next->next->back;

  if(readConstraint && tr->grouped)
    {    
      if(tr->constraintVector[p->number] != 0)
	{
	  printf("Root node to remove should have top-level grouping of 0\n");
	  assert(0);
	}
    }  
 
  assert(!(isTip(r->number, tr->mxtips) && isTip(q->number, tr->mxtips))); 

  assert(p->number > tr->mxtips);

  if(tr->ntips > 2 && p->number != n) 
    {          	     
      q = tr->nodep[n];            /* transfer last node's conections to p */
      r = q->next;
      s = q->next->next;
           
      if(readConstraint && tr->grouped)	
	tr->constraintVector[p->number] = tr->constraintVector[q->number];       
      
      hookup(p,             q->back, q->z, tr->numBranches);   /* move connections to p */
      hookup(p->next,       r->back, r->z, tr->numBranches);
      hookup(p->next->next, s->back, s->z, tr->numBranches); 

      if(q == tr->leftRootNode || q == tr->rightRootNode)
	{
	  if(q == tr->leftRootNode)
	    {
	      if(p->back == tr->rightRootNode)
		tr->leftRootNode = p;
	      else
		{
		   if(p->next->back == tr->rightRootNode)
		     tr->leftRootNode = p->next;
		   else
		     {
		       if(p->next->next->back == tr->rightRootNode)
			 tr->leftRootNode = p->next->next;
		       else
			 assert(0);
		     }
		}
	    }
	  else
	    {
	      assert(q == tr->rightRootNode);

	      if(p->back == tr->leftRootNode)
		tr->rightRootNode = p;
	      else
		{
		   if(p->next->back == tr->leftRootNode)
		     tr->rightRootNode = p->next;
		   else
		     {
		       if(p->next->next->back == tr->leftRootNode)
			 tr->rightRootNode = p->next->next;
		       else
			 assert(0);
		     }
		}
	    }
	}
      
      q->back = q->next->back = q->next->next->back = (nodeptr) NULL;
    }
  else    
    {
      assert(tr->ntips > 2);     
      p->back = p->next->back = p->next->next->back = (nodeptr) NULL;
    }

  
  
  assert(tr->ntips > 2);
  
  start = findAnyTip(tr->nodep[tr->mxtips + 1], tr->mxtips);
  
  assert(isTip(start->number, tr->mxtips));
  tr->rooted = FALSE;
  return  start;
}


int treeReadLen (FILE *fp, tree *tr, boolean readBranches, boolean readNodeLabels, boolean topologyOnly, analdef *adef, boolean completeTree, boolean storeBranchLabels)
{
  nodeptr  
    p;
  
  int 
    i, 
    ch, 
    lcount = 0; 

  tr->branchLabelCounter = 0;

  for (i = 1; i <= tr->mxtips; i++) 
    {
      tr->nodep[i]->back = (node *) NULL; 
      if(topologyOnly)
	tr->nodep[i]->support = -1;
    }

  for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++)
    {
      tr->nodep[i]->back = (nodeptr)NULL;
      tr->nodep[i]->next->back = (nodeptr)NULL;
      tr->nodep[i]->next->next->back = (nodeptr)NULL;
      tr->nodep[i]->number = i;
      tr->nodep[i]->next->number = i;
      tr->nodep[i]->next->next->number = i;

      if(topologyOnly)
	{
	  tr->nodep[i]->support = -2;
	  tr->nodep[i]->next->support = -2;
	  tr->nodep[i]->next->next->support = -2;
	}
    }

  if(topologyOnly)
    tr->start       = tr->nodep[tr->mxtips];
  else
    tr->start       = tr->nodep[1];

  tr->ntips       = 0;
  tr->nextnode    = tr->mxtips + 1;      
 
  for(i = 0; i < tr->numBranches; i++)
    tr->partitionSmoothed[i] = FALSE;
  
  tr->rooted      = FALSE;  
  tr->wasRooted   = FALSE;

  p = tr->nodep[(tr->nextnode)++]; 
  
  while((ch = treeGetCh(fp)) != '(')
    {
      if(ch == EOF)
	{
	  printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
	  printf("RAxML will exit now, please check your tree file!\n");
	  printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
	  treeEchoContext(fp, stdout, 100);
	  printf("\n\n");
	  errorExit(-1);
	}            
    };

  

 
  if(!topologyOnly)
    {
      if(adef->mode != CLASSIFY_ML)
	{
	  if(adef->mode != OPTIMIZE_BR_LEN_SCALER)
	    assert(readBranches == FALSE && readNodeLabels == FALSE);
	  else		 
	    assert(readBranches == TRUE && readNodeLabels == FALSE);		
	}
      else
	{
	  if(adef->useBinaryModelFile)
	    assert(readBranches == TRUE && readNodeLabels == FALSE);		
	  else
	    assert(readBranches == FALSE && readNodeLabels == FALSE);
	}
    }
  
       
  if (! addElementLen(fp, tr, p, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))                 
    assert(0);
  if (! treeNeedCh(fp, ',', "in"))                
    assert(0);
  if (! addElementLen(fp, tr, p->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))
    assert(0);
  if (! tr->rooted) 
    {
      if ((ch = treeGetCh(fp)) == ',') 
	{ 
	  if (! addElementLen(fp, tr, p->next->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))
	    assert(0);	    
	}
      else 
	{  	  
	  /*  A rooted format */
	  
	  tr->rooted = TRUE;
	  tr->wasRooted     = TRUE;
	  
	  if (ch != EOF)  (void) ungetc(ch, fp);
	}	
    }
  else 
    {            
      p->next->next->back = (nodeptr) NULL;
      tr->wasRooted     = TRUE;    
    }

  if(!tr->rooted && adef->mode == ANCESTRAL_STATES)
    {
      printf("Error: The ancestral state computation mode requires a rooted tree as input, exiting ....\n");
      exit(0);
    }

  if (! treeNeedCh(fp, ')', "in"))                
    assert(0);

  if(topologyOnly)
    assert(!(tr->rooted && readNodeLabels));

  (void) treeFlushLabel(fp);
  
  if (! treeFlushLen(fp, tr))                         
    assert(0);
 
  if (! treeNeedCh(fp, ';', "at end of"))       
    assert(0);
  
  if (tr->rooted) 
    {     
      assert(!readNodeLabels);

      p->next->next->back = (nodeptr) NULL;      
      tr->start = uprootTree(tr, p->next->next, readBranches, FALSE);      

       
      /*tr->leftRootNode  = p->back;
	tr->rightRootNode = p->next->back;   
      */

      if (! tr->start)                              
	{
	  printf("FATAL ERROR UPROOTING TREE\n");
	  assert(0);
	}    
    }
  else    
    tr->start = findAnyTip(p, tr->rdta->numsp);    
  
   if(!topologyOnly || adef->mode == CLASSIFY_MP)
    {      
      assert(tr->ntips <= tr->mxtips);
      

      if(tr->ntips < tr->mxtips)
	{
	  if(completeTree)
	    {
	      printBothOpen("Hello this is your friendly RAxML tree parsing routine\n");
	      printBothOpen("The RAxML option you are using requires to read in only complete trees\n");
	      printBothOpen("with %d taxa, there is at least one tree with %d taxa though ... exiting\n", tr->mxtips, tr->ntips);
	      exit(-1);
	    }
	  else
	    {
	      if(adef->computeDistance)
		{
		  printBothOpen("Error: pairwise distance computation only allows for complete, i.e., containing all taxa\n");
		  printBothOpen("bifurcating starting trees\n");
		  exit(-1);
		}     
	      if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP)
		{	 
		  printBothOpen("RAxML placement algorithm: You provided a reference tree with %d taxa; alignmnet has %d taxa\n", tr->ntips, tr->mxtips);		  
		  printBothOpen("%d query taxa will be placed using %s\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony");
		  if(adef->mode == CLASSIFY_ML)
		    classifyML(tr, adef);	  
		  else
		    {
		      assert(adef->mode == CLASSIFY_MP);
		      classifyMP(tr, adef);
		    }
		}
	      else
		{
		  printBothOpen("You provided an incomplete starting tree %d alignmnet has %d taxa\n", tr->ntips, tr->mxtips);	  
		  makeParsimonyTreeIncomplete(tr, adef);	 		 
		}    
	    }
	}
      else
	{
	  if(adef->mode == PARSIMONY_ADDITION)
	    {
	      printBothOpen("Error you want to add sequences to a trees via MP stepwise addition, but \n");
	      printBothOpen("you have provided an input tree that already contains all taxa\n");
	      exit(-1);
	    }
	  if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP)
	    {
	      printBothOpen("Error you want to place query sequences into a tree using %s, but\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony");
	      printBothOpen("you have provided an input tree that already contains all taxa\n");
	      exit(-1);
	    }
	}
   
      onlyInitrav(tr, tr->start);
    }
 
  
  return lcount;
}



/********************************MULTIFURCATIONS************************************************/


static boolean  addElementLenMULT (FILE *fp, tree *tr, nodeptr p, int partitionCounter, analdef *adef, int *partCount)
{ 
  nodeptr  q, r, s;
  int      n, ch, fres;
  double randomResolution;
  int old;
  
  fpos_t start_pos;

  fgetpos(fp, &start_pos);
     

    
  tr->constraintVector[p->number] = partitionCounter; 

  if ((ch = treeGetCh(fp)) == '(') 
    {
      *partCount = *partCount + 1;
      old = *partCount;       
      
      n = (tr->nextnode)++;
      if (n > 2*(tr->mxtips) - 2) 
	{
	  if (tr->rooted || n > 2*(tr->mxtips) - 1) 
	    {
	      printf("ERROR: Too many internal nodes.  Is tree rooted?\n");
	      printf("       Deepest splitting should be a trifurcation.\n");
	      return FALSE;
	    }
	  else 
	    {
	      tr->rooted = TRUE;	    
	    }
	}
      q = tr->nodep[n];
      tr->constraintVector[q->number] = *partCount;
      if (! addElementLenMULT(fp, tr, q->next, old, adef, partCount))        return FALSE;
    
      if (! treeNeedCh(fp, ',', "in")) 
	{ 
	  int 
	    c2 = treeGetCh(fp);
	  
	  if(c2 == ')')
	    {
	      int 
		offset; 
	      
	      printf("Could it be that you are using excess parenthesis starting here:\n\n");
	      fsetpos(fp, &start_pos);	 
	      offset = treeEchoContext(fp, stdout, 40);
	      printf("\n");
	      if(offset == 0)
		printf("                    ^\n\n");
	      else
		{
		  int 
		    spaces = 20 + offset,
		    k;

		  assert(offset < 0);
		  
		  for(k = 0; k < spaces; k++)
		    printf(" ");
		  printf("^\n\n");		  
		}
	    }
	  
	  ungetc(c2, fp);
	  
	  errorExit(-1);	 
	}
      
      if (! addElementLenMULT(fp, tr, q->next->next, old, adef, partCount))  return FALSE;
                 
      hookupDefault(p, q, tr->numBranches);

      while((ch = treeGetCh(fp)) == ',')
	{ 
	  n = (tr->nextnode)++;
	  if (n > 2*(tr->mxtips) - 2) 
	    {
	      if (tr->rooted || n > 2*(tr->mxtips) - 1) 
		{
		  printf("ERROR: Too many internal nodes.  Is tree rooted?\n");
		  printf("       Deepest splitting should be a trifurcation.\n");
		  return FALSE;
		}
	      else 
		{
		  tr->rooted = TRUE;
		}
	    }
	  r = tr->nodep[n];
	  tr->constraintVector[r->number] = *partCount;	  

	  randomResolution = randum(&adef->constraintSeed);	 
	   	  
	  if(randomResolution < 0.5)
	    {	    
	      s = q->next->back;	      
	      r->back = q->next;
	      q->next->back = r;	      
	      r->next->back = s;
	      s->back = r->next;	      
	      addElementLenMULT(fp, tr, r->next->next, old, adef, partCount);	     
	    }
	  else
	    {	  
	      s = q->next->next->back;	      
	      r->back = q->next->next;
	      q->next->next->back = r;	      
	      r->next->back = s;
	      s->back = r->next;	      
	      addElementLenMULT(fp, tr, r->next->next, old, adef, partCount);	     
	    }	    	  	  
	}       
      
      //ungetc(ch, fp);
	  
      if(ch != ')')
	{
	  printf("Missing \")\" or \",\" in treeReadLenMULT, RAxML will print the context of the error and exit\n\n");	  
	  treeEchoContext(fp, stdout, 40);
	  printf("\n\n");
	  errorExit(-1);
	}
     
	


      (void) treeFlushLabel(fp);
    }
  else 
    {                             
      ungetc(ch, fp);
      if ((n = treeFindTipName(fp, tr, TRUE)) <= 0)          return FALSE;
      q = tr->nodep[n];      
      tr->constraintVector[q->number] = partitionCounter;

      if (tr->start->number > n)  tr->start = q;
      (tr->ntips)++;
      hookupDefault(p, q, tr->numBranches);
    }
  
  fres = treeFlushLen(fp, tr);
  if(!fres) return FALSE;
    
  return TRUE;          
} 





boolean treeReadLenMULT (FILE *fp, tree *tr, analdef *adef)
{
  nodeptr  p, r, s;
  int      i, ch, n;
  int partitionCounter = 0, partCount = 0;
  double randomResolution;
  
  assert(adef->constraintSeed > 0);

  for(i = 0; i < 2 * tr->mxtips; i++)
    tr->constraintVector[i] = -1;

  for (i = 1; i <= tr->mxtips; i++) 
    tr->nodep[i]->back = (node *) NULL;

  for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++)
    {
      tr->nodep[i]->back = (nodeptr)NULL;
      tr->nodep[i]->next->back = (nodeptr)NULL;
      tr->nodep[i]->next->next->back = (nodeptr)NULL;
      tr->nodep[i]->number = i;
      tr->nodep[i]->next->number = i;
      tr->nodep[i]->next->next->number = i;
    }


  tr->start       = tr->nodep[tr->mxtips];
  tr->ntips       = 0;
  tr->nextnode    = tr->mxtips + 1;
 
  for(i = 0; i < tr->numBranches; i++)
    tr->partitionSmoothed[i] = FALSE;

  tr->rooted      = FALSE;
 
  p = tr->nodep[(tr->nextnode)++]; 
  while((ch = treeGetCh(fp)) != '(')
    {
       if(ch == EOF)
	 {
	   printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
	   printf("RAxML will exit now, please check your tree file!\n");
	   printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
	   treeEchoContext(fp, stdout, 100);
	   printf("\n\n");
	   errorExit(-1);
	 }
    };
      
  if (! addElementLenMULT(fp, tr, p, partitionCounter, adef, &partCount))                 return FALSE;
  if (! treeNeedCh(fp, ',', "in"))                return FALSE;
  if (! addElementLenMULT(fp, tr, p->next, partitionCounter, adef, &partCount))           return FALSE;
  if (! tr->rooted) 
    {
      if ((ch = treeGetCh(fp)) == ',') 
	{       
	  if (! addElementLenMULT(fp, tr, p->next->next, partitionCounter, adef, &partCount)) return FALSE;

	  while((ch = treeGetCh(fp)) == ',')
	    { 
	      n = (tr->nextnode)++;
	      assert(n <= 2*(tr->mxtips) - 2);
	
	      r = tr->nodep[n];	
	      tr->constraintVector[r->number] = partitionCounter;	   
	      	      
	      randomResolution = randum(&(adef->constraintSeed));

	      if(randomResolution < 0.5)
		{	
		  s = p->next->next->back;		  
		  r->back = p->next->next;
		  p->next->next->back = r;		  
		  r->next->back = s;
		  s->back = r->next;		  
		  addElementLenMULT(fp, tr, r->next->next, partitionCounter, adef, &partCount);	
		}
	      else
		{
		  s = p->next->back;		  
		  r->back = p->next;
		  p->next->back = r;		  
		  r->next->back = s;
		  s->back = r->next;		  
		  addElementLenMULT(fp, tr, r->next->next, partitionCounter, adef, &partCount);
		}
	    }	  	  	      	  
	 

	  if(ch != ')')
	    {
	      printf("Missing \")\" or \",\" in treeReadLenMULT, RAxML will print the context of the error and exit\n\n");
	      treeEchoContext(fp, stdout, 40);
	      printf("\n\n");
	      errorExit(-1);	      
	    }
	  else
	    ungetc(ch, fp);
	}
      else 
	{ 
	  tr->rooted = TRUE;
	  if (ch != EOF)  (void) ungetc(ch, fp);
	}       
    }
  else 
    {
      p->next->next->back = (nodeptr) NULL;
    }
    
  if (! treeNeedCh(fp, ')', "in"))                return FALSE;
  (void) treeFlushLabel(fp);
  if (! treeFlushLen(fp, tr))                         return FALSE;
   
  if (! treeNeedCh(fp, ';', "at end of"))       return FALSE;
  

  if (tr->rooted) 
    {        
      p->next->next->back = (nodeptr) NULL;
      tr->start = uprootTree(tr, p->next->next, FALSE, TRUE);
      if (! tr->start)                              return FALSE;
    }
  else 
    {     
      tr->start = findAnyTip(p, tr->rdta->numsp);
    }

  
  

  if(tr->ntips < tr->mxtips)         
    makeParsimonyTreeIncomplete(tr, adef);          


  if(!adef->rapidBoot)
    onlyInitrav(tr, tr->start);
  return TRUE; 
}






void getStartingTree(tree *tr, analdef *adef)
{
  tr->likelihood = unlikely;
  
  if(adef->restart) 
    {	 	     	     
      INFILE = myfopen(tree_file, "rb");	
                 		
      if(!adef->grouping)	
	{
	  switch(adef->mode)
	    {
	    case ANCESTRAL_STATES:	    
	      assert(!tr->saveMemory);

	      tr->leftRootNode  = (nodeptr)NULL;
	      tr->rightRootNode = (nodeptr)NULL;

	      treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);

	      assert(tr->leftRootNode && tr->rightRootNode);
	      break;
	    case CLASSIFY_MP:
	      treeReadLen(INFILE, tr, TRUE, FALSE, TRUE, adef, FALSE, FALSE);
	      break;
	    case OPTIMIZE_BR_LEN_SCALER:
	      treeReadLen(INFILE, tr, TRUE, FALSE, FALSE, adef, TRUE, FALSE);
	      break;
	    case CLASSIFY_ML:
	      if(adef->useBinaryModelFile)
		{
		  if(tr->saveMemory)				 
		    treeReadLen(INFILE, tr, TRUE, FALSE, TRUE, adef, FALSE, FALSE);	          	       
		  else		   
		    treeReadLen(INFILE, tr, TRUE, FALSE, FALSE, adef, FALSE, FALSE);
		}
	      else
		{
		  if(tr->saveMemory)				 
		    treeReadLen(INFILE, tr, FALSE, FALSE, TRUE, adef, FALSE, FALSE);	          	       
		  else		   
		    treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, FALSE, FALSE);
		}
	      break;
	    default:	     
	      if(tr->saveMemory)				 
		treeReadLen(INFILE, tr, FALSE, FALSE, TRUE, adef, FALSE, FALSE);	          	       
	      else		   
		treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, FALSE, FALSE);
	      break;
	    }
	}
      else
	{
	  assert(adef->mode != ANCESTRAL_STATES);
	 
	  if (! treeReadLenMULT(INFILE, tr, adef))
	    exit(-1);
	}                                                                         

      if(adef->mode == PARSIMONY_ADDITION)
	return; 

      if(adef->mode != CLASSIFY_MP)
	{
	  if(adef->mode == OPTIMIZE_BR_LEN_SCALER)
	    {
	      assert(tr->numBranches == tr->NumberOfModels);
	      scaleBranches(tr, TRUE);
	      evaluateGenericInitrav(tr, tr->start); 				      
	    }
	  else
	    {
	      evaluateGenericInitrav(tr, tr->start); 
	      treeEvaluate(tr, 1);
	    }
	}
               
      fclose(INFILE);
    }
  else
    { 
      assert(adef->mode != PARSIMONY_ADDITION &&
	     adef->mode != MORPH_CALIBRATOR   &&
	     adef->mode != ANCESTRAL_STATES   &&
	     adef->mode != OPTIMIZE_BR_LEN_SCALER);

      if(adef->randomStartingTree)	  
	makeRandomTree(tr, adef);       	   	 	   	  
      else
	makeParsimonyTree(tr, adef);	   	    	      		      	
      
      if(adef->startingTreeOnly)
	{
	  printStartingTree(tr, adef, TRUE);
	  exit(0);
	}
      else   	         
	printStartingTree(tr, adef, FALSE);     	         
            
      
      evaluateGenericInitrav(tr, tr->start);   

     
      
      treeEvaluate(tr, 1);        	 

      
     
    }         

  tr->start = tr->nodep[1];
}



/****** functions for reading true multi-furcating trees *****/

/****************************************************************************/

static void nextNodeOutOfBounds(tree *tr, int nextnode)
{
  if(nextnode >= tr->maxNodes)
    {
      printf("The tree RAxML is trying to parse contains more nodes than expected.\n");
      printf("RAxML will exit now, please check your input trees!\n");
      assert(0);
    }
}

static void addMultifurcation (FILE *fp, tree *tr, nodeptr _p, analdef *adef, int *nextnode)
{   
  nodeptr  
    p, 
    initial_p;
  
  int      
    n, 
    ch, 
    fres;
  
  if ((ch = treeGetCh(fp)) == '(') 
    { 
      int 
	i = 0;     
      
      nextNodeOutOfBounds(tr, *nextnode);      
      initial_p = p = tr->nodep[*nextnode];      
      *nextnode = *nextnode + 1;

      do
	{  
	  nextNodeOutOfBounds(tr, *nextnode); 	  	 		  
	  p->next = tr->nodep[*nextnode];	 
	  *nextnode = *nextnode + 1;

	  p = p->next;
	
	  addMultifurcation(fp, tr, p, adef, nextnode);	  
	  i++;
	}  
      while((ch = treeGetCh(fp)) == ',');

      ungetc(ch, fp);

      p->next = initial_p;
           
      if (! treeNeedCh(fp, ')', "in"))                
	assert(0);                   

      treeFlushLabel(fp);
    }
  else 
    {   
      ungetc(ch, fp);
      if ((n = treeFindTipName(fp, tr, FALSE)) <= 0)          
	assert(0);
      p = tr->nodep[n];
      initial_p = p;
      tr->start = p;
      (tr->ntips)++;
    }
  
 
  fres = treeFlushLen(fp, tr);
  if(!fres) 
    assert(0);
      
  hookupDefault(initial_p, _p, tr->numBranches);
} 

static void printMultiFurc(nodeptr p, tree *tr)
{ 
  if(isTip(p->number, tr->mxtips))  
    {     
      printf("%s", tr->nameList[p->number]);   
    }
  else
    {
      nodeptr 
	q = p->next;     
      
      printf("(");

      while(q != p)
	{	 
	  printMultiFurc(q->back, 
			 tr);
	  q = q->next;
	  if(q != p)
	    printf(",");
	}

      printf(")");
    }
}

void allocateMultifurcations(tree *tr, tree *smallTree)
{ 
  int
    i,
    tips,
    inter; 

  smallTree->numBranches = tr->numBranches;

  smallTree->mxtips = tr->mxtips;

  //printf("Small tree tiups: %d\n", smallTree->mxtips);

  smallTree->nameHash = tr->nameHash;

  smallTree->nameList = tr->nameList;

  tips  = tr->mxtips;
  inter = tr->mxtips - 1;
  
  smallTree->nodep = (nodeptr *)rax_malloc((tips + 3 * inter) * sizeof(nodeptr));

  smallTree->maxNodes = tips + 3 * inter;

  smallTree->nodep[0] = (node *) NULL;

  for (i = 1; i <= tips; i++)
    {
      smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));      
      memcpy(smallTree->nodep[i], tr->nodep[i], sizeof(node));
      smallTree->nodep[i]->back = (node *) NULL;
      smallTree->nodep[i]->next = (node *) NULL;        
    }

  for(i = tips + 1; i < tips + 3 * inter; i++)
    {     
      smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));
      smallTree->nodep[i]->number = i;     
      smallTree->nodep[i]->back = (node *) NULL;
      smallTree->nodep[i]->next = (node *) NULL;
    }
  
}

void freeMultifurcations(tree *tr)
{
  int
    i,
    tips  = tr->mxtips,
    inter = tr->mxtips - 1; 

  for (i = 1; i < tips + 3 * inter; i++)
    rax_free(tr->nodep[i]);
  
  rax_free(tr->nodep);
}

static void relabelInnerNodes(nodeptr p, tree *tr, int *number, int *nodeCounter)
{
  if(isTip(p->number, tr->mxtips))
    {
      assert(0);
    }
  else
    {
      nodeptr 
	q = p->next;

      int 
	_n = *number;      
      
      tr->nodep[p->number]->number = _n;
      p->x = 1;

     *number = *number + 1;
      
      while(q != p)
	{
	  tr->nodep[q->number]->number = _n;	
	  q->x = 0;
	  
	  if(!isTip(q->back->number, tr->mxtips))
	    {	    
	      *nodeCounter = *nodeCounter + 1;
	      relabelInnerNodes(q->back, tr, number, nodeCounter);
	    }
	  q = q->next;
	}  
    }
}


int readMultifurcatingTree(FILE *fp, tree *tr, analdef *adef, boolean fastParse)
{
  nodeptr  
    p,
    initial_p;
  
  int    
    innerNodeNumber,
    innerBranches = 0,
    nextnode,
    i, 
    ch,
    tips  = tr->mxtips,
    inter = tr->mxtips - 1;  
 
  //clean up before parsing !
    
  if(!fastParse)
    {
      for (i = 1; i < tips + 3 * inter; i++)     
	{     
	  tr->nodep[i]->back = (node *) NULL;
	  tr->nodep[i]->next = (node *) NULL;    
	  tr->nodep[i]->x = 0;
	}  
      for(i = tips + 1; i < tips + 3 * inter; i++)   
	tr->nodep[i]->number = i;
    }
  
  
   

  

  tr->ntips = 0;
  nextnode  = tr->mxtips + 1;         

  while((ch = treeGetCh(fp)) != '(')
    {
      if(ch == EOF)
	{
	  printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
	  printf("RAxML will exit now, please check your tree file!\n");
	  printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
	  treeEchoContext(fp, stdout, 100);
	  printf("\n\n");
	  errorExit(-1);
	}
    };            

  i = 0;

  do
    {         
      if(i == 0)
	{
	  nextNodeOutOfBounds(tr, nextnode);
	  initial_p = p = tr->nodep[nextnode];	 
	  nextnode++;
	}
      else
	{
	  nextNodeOutOfBounds(tr, nextnode);
	  p->next = tr->nodep[nextnode];	 	  
	  p = p->next; 
	  nextnode++;
	}
      
      addMultifurcation(fp, tr, p, adef, &nextnode);       
                   
      i++;
    }  
  while((ch = treeGetCh(fp)) == ',');
   

  if(i < 2)
    assert(0);
  else
    {
      if(i == 2)
	{
	  nodeptr 
	    q = initial_p->back,
	    r = initial_p->next->back;	  

	  //printBothOpen("you provided a rooted tree, we need an unrooted one, RAxML will remove the root!\n");
	  
	  if(!fastParse)
	    assert(initial_p->next->next == (node *)NULL);

	  hookupDefault(q, r, tr->numBranches);	  
	  
	  if(tr->start == initial_p ||
	     tr->start == initial_p->next ||
	     tr->start->back == initial_p ||
	     tr->start->back == initial_p->next
	     )
	    {
	      tr->start = findAnyTip(q, tr->mxtips);
	    }
	  
	  assert(tr->start != initial_p);
	  assert(tr->start != initial_p->next);
	  assert(tr->start->back != initial_p);
	  assert(tr->start->back != initial_p->next);
	  
	}
    }
      

  /*  
      if(i < 3)
      {
      printBothOpen("You need to provide unrooted input trees!\n");      
      assert(0);
      }
  */
 
  ungetc(ch, fp);
  
  if(i > 2)
    p->next = initial_p;
  
  if (! treeNeedCh(fp, ')', "in"))                
    assert(0);  

  (void)treeFlushLabel(fp);
  
  if (! treeFlushLen(fp, tr))                         
    assert(0);
 
  if (! treeNeedCh(fp, ';', "at end of"))       
    assert(0);
    
  //printf("%d tips found, %d inner nodes used start %d maxtips %d\n", tr->ntips, nextnode - tr->mxtips, tr->start->number, tr->mxtips);
  
  if(fastParse)
    for(i = tips + 1; i < tips + 3 * tr->ntips; i++)   
      tr->nodep[i]->number = i;

  assert(isTip(tr->start->number, tr->mxtips)); 
  
  innerNodeNumber = tr->mxtips + 1;

  relabelInnerNodes(tr->start->back, tr, &innerNodeNumber, &innerBranches);
      
  //printf("Inner node number: %d\n", innerNodeNumber);

  //printf("Inner branches %d\n", innerBranches); 

  if(0)
    {      
      printf("(");      
      printMultiFurc(tr->start, tr);      
      printf(",");
      printMultiFurc(tr->start->back, tr);
      printf(");\n");
    }
 
  return innerBranches;
}

back to top