https://github.com/stamatak/standard-RAxML
Tip revision: 14e313c5191793580099cc2cde371e68bbce7e87 authored by stamatak on 17 November 2014, 10:32:56 UTC
fixed a bug in the parser that woould enable ASC bias correction for protein susbst models by default
fixed a bug in the parser that woould enable ASC bias correction for protein susbst models by default
Tip revision: 14e313c
treeIO.c
/* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees
* Copyright August 2006 by Alexandros Stamatakis
*
* Partially derived from
* fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
*
* and
*
* Programs of the PHYLIP package by Joe Felsenstein.
*
* This program is free software; you may redistribute it and/or modify its
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
*
* For any other enquiries send an Email to Alexandros Stamatakis
* Alexandros.Stamatakis@epfl.ch
*
* When publishing work that is based on the results from RAxML-VI-HPC please cite:
*
* Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models".
* Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
*/
#ifndef WIN32
#include <sys/times.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#endif
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "axml.h"
extern FILE *INFILE;
extern char infoFileName[1024];
extern char tree_file[1024];
extern char *likelihood_key;
extern char *ntaxa_key;
extern char *smoothed_key;
extern double masterTime;
stringHashtable *initStringHashTable(hashNumberType n)
{
/*
init with primes
*/
static const hashNumberType initTable[] = {53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457, 1610612741};
/* init with powers of two
static const hashNumberType initTable[] = {64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
32768, 65536, 131072, 262144, 524288, 1048576, 2097152,
4194304, 8388608, 16777216, 33554432, 67108864, 134217728,
268435456, 536870912, 1073741824, 2147483648U};
*/
stringHashtable *h = (stringHashtable*)rax_malloc(sizeof(stringHashtable));
hashNumberType
tableSize,
i,
primeTableLength = sizeof(initTable)/sizeof(initTable[0]),
maxSize = (hashNumberType)-1;
assert(n <= maxSize);
i = 0;
while(initTable[i] < n && i < primeTableLength)
i++;
assert(i < primeTableLength);
tableSize = initTable[i];
h->table = (stringEntry**)rax_calloc(tableSize, sizeof(stringEntry*));
h->tableSize = tableSize;
return h;
}
static hashNumberType hashString(char *p, hashNumberType tableSize)
{
hashNumberType h = 0;
for(; *p; p++)
h = 31 * h + *p;
return (h % tableSize);
}
void addword(char *s, stringHashtable *h, int nodeNumber)
{
hashNumberType position = hashString(s, h->tableSize);
stringEntry *p = h->table[position];
for(; p!= NULL; p = p->next)
{
if(strcmp(s, p->word) == 0)
return;
}
p = (stringEntry *)rax_malloc(sizeof(stringEntry));
assert(p);
p->nodeNumber = nodeNumber;
p->word = (char *)rax_malloc((strlen(s) + 1) * sizeof(char));
strcpy(p->word, s);
p->next = h->table[position];
h->table[position] = p;
}
int lookupWord(char *s, stringHashtable *h)
{
hashNumberType position = hashString(s, h->tableSize);
stringEntry *p = h->table[position];
for(; p!= NULL; p = p->next)
{
if(strcmp(s, p->word) == 0)
return p->nodeNumber;
}
return -1;
}
int countTips(nodeptr p, int numsp)
{
if(isTip(p->number, numsp))
return 1;
{
nodeptr q;
int tips = 0;
q = p->next;
while(q != p)
{
tips += countTips(q->back, numsp);
q = q->next;
}
return tips;
}
}
static double getBranchLength(tree *tr, int perGene, nodeptr p)
{
double
z = 0.0,
x = 0.0,
f,
*fs;
assert(perGene != NO_BRANCHES);
#ifdef _HET
if(isTip(p->number, tr->mxtips) || isTip(p->back->number, tr->mxtips))
{
//printf("tip model \n");
f = tr->fracchange_TIP;
fs = tr->fracchanges_TIP;
}
else
{
//printf("inner model\n");
f = tr->fracchange;
fs = tr->fracchanges;
}
#else
f = tr->fracchange;
fs = tr->fracchanges;
#endif
if(!tr->multiBranch)
{
assert(f != -1.0);
z = p->z[0];
if (z < zmin)
z = zmin;
x = -log(z) * f;
}
else
{
if(perGene == SUMMARIZE_LH)
{
int
i;
double
avgX = 0.0;
for(i = 0; i < tr->numBranches; i++)
{
assert(tr->partitionContributions[i] != -1.0);
assert(fs[i] != -1.0);
z = p->z[i];
if(z < zmin)
z = zmin;
x = -log(z) * fs[i];
avgX += x * tr->partitionContributions[i];
}
x = avgX;
}
else
{
assert(fs[perGene] != -1.0);
assert(perGene >= 0 && perGene < tr->numBranches);
z = p->z[perGene];
if(z < zmin)
z = zmin;
x = -log(z) * fs[perGene];
}
}
return x;
}
static char *Tree2StringREC(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames,
boolean printLikelihood, boolean rellTree, boolean finalPrint, int perGene, boolean branchLabelSupport, boolean printSHSupport, boolean printIC, boolean printSHSupports)
{
char *nameptr;
if(isTip(p->number, tr->rdta->numsp))
{
if(printNames)
{
nameptr = tr->nameList[p->number];
sprintf(treestr, "%s", nameptr);
}
else
sprintf(treestr, "%d", p->number);
while (*treestr) treestr++;
}
else
{
*treestr++ = '(';
treestr = Tree2StringREC(treestr, tr, p->next->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
*treestr++ = ',';
treestr = Tree2StringREC(treestr, tr, p->next->next->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
if(p == tr->start->back)
{
*treestr++ = ',';
treestr = Tree2StringREC(treestr, tr, p->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
}
*treestr++ = ')';
}
if(p == tr->start->back)
{
if(printBranchLengths && !rellTree)
sprintf(treestr, ":0.0;\n");
else
sprintf(treestr, ";\n");
}
else
{
if(rellTree || branchLabelSupport || printSHSupport || printIC || printSHSupports)
{
if(( !isTip(p->number, tr->rdta->numsp)) &&
( !isTip(p->back->number, tr->rdta->numsp)))
{
assert(p->bInf != (branchInfo *)NULL);
assert(rellTree + branchLabelSupport + printSHSupport + printSHSupports == 1);
if(rellTree)
{
if(printIC)
sprintf(treestr, "%1.3f:%8.20f", p->bInf->ic, p->z[0]);
else
sprintf(treestr, "%d:%8.20f", p->bInf->support, p->z[0]);
}
if(branchLabelSupport)
{
if(printIC)
sprintf(treestr, ":%8.20f[%1.3f,%1.3f]", p->z[0], p->bInf->ic, p->bInf->icAll);
else
sprintf(treestr, ":%8.20f[%d]", p->z[0], p->bInf->support);
}
if(printSHSupport)
sprintf(treestr, ":%8.20f[%d]", getBranchLength(tr, perGene, p), p->bInf->support);
if(printSHSupports)
{
int
model;
sprintf(treestr, ":%8.20f[", getBranchLength(tr, perGene, p));
while(*treestr)
treestr++;
for(model = 0; model < tr->NumberOfModels - 1; model++)
{
sprintf(treestr, "%d,", p->bInf->supports[model]);
while(*treestr)
treestr++;
}
sprintf(treestr, "%d]", p->bInf->supports[model]);
}
}
else
{
if(rellTree || branchLabelSupport)
sprintf(treestr, ":%8.20f", p->z[0]);
if(printSHSupport || printSHSupports)
sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
}
}
else
{
if(printBranchLengths)
sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
else
sprintf(treestr, "%s", "\0");
}
}
while (*treestr) treestr++;
return treestr;
}
void collectSubtrees(tree *tr, nodeptr *subtrees, int *count, int ogn)
{
int i;
for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 2; i++)
{
nodeptr p, q;
p = tr->nodep[i];
if(countTips(p, tr->rdta->numsp) == ogn)
{
subtrees[*count] = p;
*count = *count + 1;
}
q = p->next;
while(q != p)
{
if(countTips(q, tr->rdta->numsp) == ogn)
{
subtrees[*count] = q;
*count = *count + 1;
}
q = q->next;
}
}
}
static void checkOM(nodeptr p, int *n, int *c, tree *tr)
{
if(isTip(p->number, tr->rdta->numsp))
{
n[*c] = p->number;
*c = *c + 1;
}
else
{
nodeptr q = p->next;
while(q != p)
{
checkOM(q->back, n, c, tr);
q = q->next;
}
}
}
static char *rootedTreeREC(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames,
boolean printLikelihood, boolean rellTree,
boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport)
{
char *nameptr;
if(isTip(p->number, tr->rdta->numsp))
{
if(printNames)
{
nameptr = tr->nameList[p->number];
sprintf(treestr, "%s", nameptr);
}
else
sprintf(treestr, "%d", p->number);
while (*treestr)
treestr++;
}
else
{
*treestr++ = '(';
treestr = rootedTreeREC(treestr, tr, p->next->back, printBranchLengths, printNames, printLikelihood,
rellTree, finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
*treestr++ = ',';
treestr = rootedTreeREC(treestr, tr, p->next->next->back, printBranchLengths, printNames, printLikelihood,
rellTree, finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
*treestr++ = ')';
}
if(rellTree || branchLabelSupport || printSHSupport)
{
if(( !isTip(p->number, tr->rdta->numsp)) &&
( !isTip(p->back->number, tr->rdta->numsp)))
{
assert(p->bInf != (branchInfo *)NULL);
if(rellTree)
sprintf(treestr, "%d:%8.20f", p->bInf->support, p->z[0]);
if(branchLabelSupport)
sprintf(treestr, ":%8.20f[%d]", p->z[0], p->bInf->support);
if(printSHSupport)
sprintf(treestr, ":%8.20f[%d]", getBranchLength(tr, perGene, p), p->bInf->support);
}
else
{
if(rellTree || branchLabelSupport)
sprintf(treestr, ":%8.20f", p->z[0]);
if(printSHSupport)
sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
}
}
else
{
if(printBranchLengths)
sprintf(treestr, ":%8.20f", getBranchLength(tr, perGene, p));
else
sprintf(treestr, "%s", "\0");
}
while (*treestr) treestr++;
return treestr;
}
static char *rootedTree(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames,
boolean printLikelihood, boolean rellTree,
boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport)
{
double oldz[NUM_BRANCHES];
int i;
for(i = 0; i < tr->numBranches; i++)
oldz[i] = p->z[i];
if(rellTree)
{
p->z[0] = p->back->z[0] = oldz[0] * 0.5;
/*printf("%f\n", p->z[0]);*/
}
else
{
if(printBranchLengths)
{
double rz, z;
assert(perGene != NO_BRANCHES);
if(!tr->multiBranch)
{
assert(tr->fracchange != -1.0);
z = -log(p->z[0]) * tr->fracchange;
rz = exp(-(z * 0.5)/ tr->fracchange);
p->z[0] = p->back->z[0] = rz;
}
else
{
if(perGene == SUMMARIZE_LH)
{
int i;
for(i = 0; i < tr->numBranches; i++)
{
assert(tr->fracchanges[i] != -1.0);
z = -log(p->z[i]) * tr->fracchanges[i];
rz = exp(-(z * 0.5)/ tr->fracchanges[i]);
p->z[i] = p->back->z[i] = rz;
}
}
else
{
assert(tr->fracchanges[perGene] != -1.0);
assert(perGene >= 0 && perGene < tr->numBranches);
z = -log(p->z[perGene]) * tr->fracchanges[perGene];
rz = exp(-(z * 0.5)/ tr->fracchanges[perGene]);
p->z[perGene] = p->back->z[perGene] = rz;
}
}
}
}
*treestr = '(';
treestr++;
treestr = rootedTreeREC(treestr, tr, p, printBranchLengths, printNames, printLikelihood, rellTree, finalPrint,
adef, perGene, branchLabelSupport, printSHSupport);
*treestr = ',';
treestr++;
treestr = rootedTreeREC(treestr, tr, p->back, printBranchLengths, printNames, printLikelihood, rellTree, finalPrint,
adef, perGene, branchLabelSupport, printSHSupport);
sprintf(treestr, ");\n");
while(*treestr)
treestr++;
for(i = 0; i < tr->numBranches; i++)
p->z[i] = p->back->z[i] = oldz[i];
return treestr;
}
void Tree2String(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, boolean printLikelihood,
boolean rellTree,
boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport, boolean printIC, boolean printSHSupports)
{
//make sure the tree string is clean in the beginning
memset(treestr, 0, sizeof(char) * tr->treeStringLength);
//printf("Tree string: %s\n", tr->tree_string);
if(rellTree)
assert(!branchLabelSupport && !printSHSupport);
if(branchLabelSupport)
assert(!rellTree && !printSHSupport);
if(printSHSupport)
assert(!branchLabelSupport && !rellTree);
if(finalPrint && adef->outgroup)
{
nodeptr startNode = tr->start;
if(tr->numberOfOutgroups > 1)
{
nodeptr root;
nodeptr *subtrees = (nodeptr *)rax_malloc(sizeof(nodeptr) * tr->mxtips);
int i, k, count = 0;
int *nodeNumbers = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
int *foundVector = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
boolean monophyletic = FALSE;
collectSubtrees(tr, subtrees, &count, tr->numberOfOutgroups);
/*printf("Found %d subtrees of size %d\n", count, tr->numberOfOutgroups);*/
for(i = 0; (i < count) && (!monophyletic); i++)
{
int l, sum, nc = 0;
for(k = 0; k < tr->numberOfOutgroups; k++)
{
nodeNumbers[k] = -1;
foundVector[k] = 0;
}
checkOM(subtrees[i], nodeNumbers, &nc, tr);
for(l = 0; l < tr->numberOfOutgroups; l++)
for(k = 0; k < tr->numberOfOutgroups; k++)
{
if(nodeNumbers[l] == tr->outgroupNums[k])
foundVector[l] = 1;
}
sum = 0;
for(l = 0; l < tr->numberOfOutgroups; l++)
sum += foundVector[l];
if(sum == tr->numberOfOutgroups)
{
root = subtrees[i];
tr->start = root;
/*printf("outgroups are monphyletic!\n");*/
monophyletic = TRUE;
}
else
{
if(sum > 0)
{
/*printf("outgroups are NOT monophyletic!\n");*/
monophyletic = FALSE;
}
}
}
if(!monophyletic)
{
printf("WARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
printf("from the list to root the tree!\n");
{
FILE *infoFile = myfopen(infoFileName, "ab");
fprintf(infoFile, "\nWARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
fprintf(infoFile, "from the list to root the tree!\n");
fclose(infoFile);
}
tr->start = tr->nodep[tr->outgroupNums[0]];
rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
}
else
{
if(isTip(tr->start->number, tr->rdta->numsp))
{
printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
errorExit(-1);
}
if(isTip(tr->start->back->number, tr->rdta->numsp))
{
printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
errorExit(-1);
}
rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
}
rax_free(foundVector);
rax_free(nodeNumbers);
rax_free(subtrees);
}
else
{
tr->start = tr->nodep[tr->outgroupNums[0]];
rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, adef, perGene, branchLabelSupport, printSHSupports);
}
tr->start = startNode;
}
else
Tree2StringREC(treestr, tr, p, printBranchLengths, printNames, printLikelihood, rellTree,
finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);
return;
}
void printTreePerGene(tree *tr, analdef *adef, char *fileName, char *permission)
{
FILE *treeFile;
char extendedTreeFileName[1024];
char buf[16];
int i;
assert(adef->perGeneBranchLengths);
for(i = 0; i < tr->numBranches; i++)
{
strcpy(extendedTreeFileName, fileName);
sprintf(buf,"%d", i);
strcat(extendedTreeFileName, ".PARTITION.");
strcat(extendedTreeFileName, buf);
/*printf("Partitiuon %d file %s\n", i, extendedTreeFileName);*/
Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, i, FALSE, FALSE, FALSE, FALSE);
treeFile = myfopen(extendedTreeFileName, permission);
fprintf(treeFile, "%s", tr->tree_string);
fclose(treeFile);
}
}
/*=======================================================================*/
/* Read a tree from a file */
/*=======================================================================*/
/* 1.0.A Processing of quotation marks in comment removed
*/
static int treeFinishCom (FILE *fp, char **strp)
{
int ch;
while ((ch = getc(fp)) != EOF && ch != ']') {
if (strp != NULL) *(*strp)++ = ch; /* save character */
if (ch == '[') { /* nested comment; find its end */
if ((ch = treeFinishCom(fp, strp)) == EOF) break;
if (strp != NULL) *(*strp)++ = ch; /* save closing ] */
}
}
if (strp != NULL) **strp = '\0'; /* terminate string */
return ch;
} /* treeFinishCom */
static int treeGetCh (FILE *fp) /* get next nonblank, noncomment character */
{ /* treeGetCh */
int ch;
while ((ch = getc(fp)) != EOF) {
if (whitechar(ch)) ;
else if (ch == '[') { /* comment; find its end */
if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF) break;
}
else break;
}
return ch;
} /* treeGetCh */
static boolean treeLabelEnd (int ch)
{
switch (ch)
{
case EOF:
case '\0':
case '\t':
case '\n':
case '\r':
case ' ':
case ':':
case ',':
case '(':
case ')':
case ';':
return TRUE;
default:
break;
}
return FALSE;
}
static void treeEchoContext (FILE *fp1, FILE *fp2, int n);
static boolean treeGetLabel (FILE *fp, char *lblPtr, int maxlen, boolean taxonLabel)
{
int ch;
boolean done, quoted, lblfound;
if (--maxlen < 0)
lblPtr = (char *) NULL;
else
if (lblPtr == NULL)
maxlen = 0;
ch = getc(fp);
done = treeLabelEnd(ch);
if(done && taxonLabel)
{
printf("RAxML expects to read a taxon label in the tree file\n");
printf("but the taxon label is an empty string.\n\n");
printf("RAxML will print the context of the error and then exit:\n\n");
treeEchoContext(fp, stdout, 40);
printf("\n ^^\n\n");
errorExit(-1);
}
lblfound = ! done;
quoted = (ch == '\'');
if (quoted && ! done)
{
ch = getc(fp);
done = (ch == EOF);
}
while (! done)
{
if (quoted)
{
if (ch == '\'')
{
ch = getc(fp);
if (ch != '\'')
break;
}
}
else
if (treeLabelEnd(ch)) break;
if (--maxlen >= 0) *lblPtr++ = ch;
ch = getc(fp);
if (ch == EOF) break;
}
if (ch != EOF) (void) ungetc(ch, fp);
if (lblPtr != NULL) *lblPtr = '\0';
return lblfound;
}
static boolean treeFlushLabel (FILE *fp)
{
return treeGetLabel(fp, (char *) NULL, (int) 0, FALSE);
}
int treeFindTipByLabelString(char *str, tree *tr, boolean check)
{
int
lookup = lookupWord(str, tr->nameHash);
if(lookup > 0)
{
if(check)
assert(! tr->nodep[lookup]->back);
return lookup;
}
else
{
printf("ERROR: Cannot find tree species: %s\n", str);
return 0;
}
}
int treeFindTipName(FILE *fp, tree *tr, boolean check)
{
char str[nmlngth+2];
int n;
if(treeGetLabel(fp, str, nmlngth+2, TRUE))
n = treeFindTipByLabelString(str, tr, check);
else
n = 0;
return n;
}
static void treeEchoContext (FILE *fp1, FILE *fp2, int n)
{ /* treeEchoContext */
int
ch;
int64_t
current = ftell(fp1);
boolean
waswhite = TRUE;
fpos_t
pos;
fgetpos(fp1, &pos);
fseek(fp1, MAX(current - n / 2, 0), SEEK_SET);
while (n > 0 && ((ch = getc(fp1)) != EOF))
{
if (whitechar(ch))
{
ch = waswhite ? '\0' : ' ';
waswhite = TRUE;
}
else
{
waswhite = FALSE;
}
if(ch > '\0')
{
putc(ch, fp2);
n--;
}
}
fsetpos(fp1, &pos);
/*boolean
waswhite = TRUE;
while (n > 0 && ((ch = getc(fp1)) != EOF))
{
if (whitechar(ch))
{
ch = waswhite ? '\0' : ' ';
waswhite = TRUE;
}
else
{
waswhite = FALSE;
}
if(ch > '\0')
{
putc(ch, fp2);
n--;
}
}*/
} /* treeEchoContext */
static boolean treeNeedCh (FILE *fp, int c1, char *where);
static boolean treeProcessLength (FILE *fp, double *dptr, int *branchLabel, boolean storeBranchLabels, tree *tr)
{
int
ch;
if((ch = treeGetCh(fp)) == EOF)
return FALSE; /* Skip comments */
(void) ungetc(ch, fp);
if(fscanf(fp, "%lf", dptr) != 1)
{
printf("ERROR: treeProcessLength: Problem reading branch length\n");
treeEchoContext(fp, stdout, 40);
printf("\n");
return FALSE;
}
if((ch = getc(fp)) != EOF)
{
if(ch == '[')
{
if(fscanf(fp, "%d", branchLabel) != 1)
goto handleError;
//printf("Branch label: %d\n", *branchLabel);
if((ch = getc(fp)) != ']')
{
handleError:
printf("ERROR: treeProcessLength: Problem reading branch label\n");
treeEchoContext(fp, stdout, 40);
printf("\n");
return FALSE;
}
if(storeBranchLabels)
tr->branchLabelCounter = tr->branchLabelCounter + 1;
}
else
(void)ungetc(ch, fp);
}
return TRUE;
}
static int treeFlushLen (FILE *fp, tree *tr)
{
double
dummy;
int
dummyLabel,
ch;
ch = treeGetCh(fp);
if (ch == ':')
{
ch = treeGetCh(fp);
ungetc(ch, fp);
if(!treeProcessLength(fp, &dummy, &dummyLabel, FALSE, tr))
return 0;
return 1;
}
if (ch != EOF) (void) ungetc(ch, fp);
return 1;
}
static boolean treeNeedCh (FILE *fp, int c1, char *where)
{
int
c2;
if((c2 = treeGetCh(fp)) == c1)
return TRUE;
printf("ERROR: Expecting '%c' %s tree; found: character '%c'\n\n", c1, where, c2);
if (c2 == EOF)
{
printf("End-of-File\n");
}
else
{
ungetc(c2, fp);
treeEchoContext(fp, stdout, 40);
printf("\n");
printf(" ^\n\n");
}
//putchar('\n');
if(c1 == ')' || c1 == '(')
printf("RAxML may be expecting to read a strictly bifurcating tree!\n\n");
else
printf("RAxML may be expecting to read a tree that contains branch lengths\n\n");
return FALSE;
}
static boolean addElementLen (FILE *fp, tree *tr, nodeptr p, boolean readBranchLengths, boolean readNodeLabels, int *lcount, analdef *adef, boolean storeBranchLabels)
{
nodeptr q;
int n, ch, fres;
if ((ch = treeGetCh(fp)) == '(')
{
n = (tr->nextnode)++;
if (n > 2*(tr->mxtips) - 2)
{
if (tr->rooted || n > 2*(tr->mxtips) - 1)
{
printf("ERROR: Too many internal nodes. Is tree rooted?\n");
printf(" Deepest splitting should be a trifurcation.\n");
return FALSE;
}
else
{
if(readNodeLabels)
{
printf("The program will exit with an error in the next source code line\n");
printf("You are probably trying to read in rooted trees with a RAxML option \n");
printf("that for some reason expects unrooted binary trees\n\n");
}
assert(!readNodeLabels);
tr->rooted = TRUE;
}
}
q = tr->nodep[n];
if (! addElementLen(fp, tr, q->next, readBranchLengths, readNodeLabels, lcount, adef, storeBranchLabels)) return FALSE;
if (! treeNeedCh(fp, ',', "in")) return FALSE;
if (! addElementLen(fp, tr, q->next->next, readBranchLengths, readNodeLabels, lcount, adef, storeBranchLabels)) return FALSE;
if (! treeNeedCh(fp, ')', "in")) return FALSE;
if(readNodeLabels)
{
char label[64];
int support;
if(treeGetLabel (fp, label, 10, FALSE))
{
int val = sscanf(label, "%d", &support);
assert(val == 1);
/*printf("LABEL %s Number %d\n", label, support);*/
p->support = q->support = support;
/*printf("%d %d %d %d\n", p->support, q->support, p->number, q->number);*/
assert(p->number > tr->mxtips && q->number > tr->mxtips);
*lcount = *lcount + 1;
}
}
else
(void) treeFlushLabel(fp);
}
else
{
ungetc(ch, fp);
if ((n = treeFindTipName(fp, tr, TRUE)) <= 0) return FALSE;
q = tr->nodep[n];
if (tr->start->number > n) tr->start = q;
(tr->ntips)++;
}
if(readBranchLengths)
{
double
branch;
int
startCounter = tr->branchLabelCounter,
endCounter,
branchLabel = -1;
if (! treeNeedCh(fp, ':', "in")) return FALSE;
if (! treeProcessLength(fp, &branch, &branchLabel, storeBranchLabels, tr))
return FALSE;
endCounter = tr->branchLabelCounter;
/*printf("Branch %8.20f %d\n", branch, tr->numBranches);*/
if(adef->mode == CLASSIFY_ML)
{
double
x[NUM_BRANCHES];
assert(tr->NumberOfModels == 1);
assert(adef->useBinaryModelFile);
assert(tr->numBranches == 1);
x[0] = exp(-branch / tr->fracchange);
hookup(p, q, x, tr->numBranches);
}
else
hookup(p, q, &branch, tr->numBranches);
if(storeBranchLabels && (endCounter > startCounter))
{
assert(!isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips));
assert(branchLabel >= 0);
p->support = q->support = branchLabel;
}
}
else
{
fres = treeFlushLen(fp, tr);
if(!fres) return FALSE;
hookupDefault(p, q, tr->numBranches);
}
return TRUE;
}
static nodeptr uprootTree (tree *tr, nodeptr p, boolean readBranchLengths, boolean readConstraint)
{
nodeptr q, r, s, start;
int n, i;
for(i = tr->mxtips + 1; i < 2 * tr->mxtips - 1; i++)
assert(i == tr->nodep[i]->number);
if(isTip(p->number, tr->mxtips) || p->back)
{
printf("ERROR: Unable to uproot tree.\n");
printf(" Inappropriate node marked for removal.\n");
assert(0);
}
assert(p->back == (nodeptr)NULL);
tr->nextnode = tr->nextnode - 1;
assert(tr->nextnode < 2 * tr->mxtips);
n = tr->nextnode;
assert(tr->nodep[tr->nextnode]);
if (n != tr->mxtips + tr->ntips - 1)
{
printf("ERROR: Unable to uproot tree. Inconsistent\n");
printf(" number of tips and nodes for rooted tree.\n");
assert(0);
}
q = p->next->back; /* remove p from tree */
r = p->next->next->back;
assert(p->back == (nodeptr)NULL);
if(readBranchLengths)
{
double b[NUM_BRANCHES];
int i;
for(i = 0; i < tr->numBranches; i++)
{
b[i] = (r->z[i] + q->z[i]);
}
hookup (q, r, b, tr->numBranches);
}
else
hookupDefault(q, r, tr->numBranches);
tr->leftRootNode = p->next->back;
tr->rightRootNode = p->next->next->back;
if(readConstraint && tr->grouped)
{
if(tr->constraintVector[p->number] != 0)
{
printf("Root node to remove should have top-level grouping of 0\n");
assert(0);
}
}
assert(!(isTip(r->number, tr->mxtips) && isTip(q->number, tr->mxtips)));
assert(p->number > tr->mxtips);
if(tr->ntips > 2 && p->number != n)
{
q = tr->nodep[n]; /* transfer last node's conections to p */
r = q->next;
s = q->next->next;
if(readConstraint && tr->grouped)
tr->constraintVector[p->number] = tr->constraintVector[q->number];
hookup(p, q->back, q->z, tr->numBranches); /* move connections to p */
hookup(p->next, r->back, r->z, tr->numBranches);
hookup(p->next->next, s->back, s->z, tr->numBranches);
if(q == tr->leftRootNode || q == tr->rightRootNode)
{
if(q == tr->leftRootNode)
{
if(p->back == tr->rightRootNode)
tr->leftRootNode = p;
else
{
if(p->next->back == tr->rightRootNode)
tr->leftRootNode = p->next;
else
{
if(p->next->next->back == tr->rightRootNode)
tr->leftRootNode = p->next->next;
else
assert(0);
}
}
}
else
{
assert(q == tr->rightRootNode);
if(p->back == tr->leftRootNode)
tr->rightRootNode = p;
else
{
if(p->next->back == tr->leftRootNode)
tr->rightRootNode = p->next;
else
{
if(p->next->next->back == tr->leftRootNode)
tr->rightRootNode = p->next->next;
else
assert(0);
}
}
}
}
q->back = q->next->back = q->next->next->back = (nodeptr) NULL;
}
else
{
assert(tr->ntips > 2);
p->back = p->next->back = p->next->next->back = (nodeptr) NULL;
}
assert(tr->ntips > 2);
start = findAnyTip(tr->nodep[tr->mxtips + 1], tr->mxtips);
assert(isTip(start->number, tr->mxtips));
tr->rooted = FALSE;
return start;
}
int treeReadLen (FILE *fp, tree *tr, boolean readBranches, boolean readNodeLabels, boolean topologyOnly, analdef *adef, boolean completeTree, boolean storeBranchLabels)
{
nodeptr
p;
int
i,
ch,
lcount = 0;
tr->branchLabelCounter = 0;
for (i = 1; i <= tr->mxtips; i++)
{
tr->nodep[i]->back = (node *) NULL;
if(topologyOnly)
tr->nodep[i]->support = -1;
}
for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++)
{
tr->nodep[i]->back = (nodeptr)NULL;
tr->nodep[i]->next->back = (nodeptr)NULL;
tr->nodep[i]->next->next->back = (nodeptr)NULL;
tr->nodep[i]->number = i;
tr->nodep[i]->next->number = i;
tr->nodep[i]->next->next->number = i;
if(topologyOnly)
{
tr->nodep[i]->support = -2;
tr->nodep[i]->next->support = -2;
tr->nodep[i]->next->next->support = -2;
}
}
if(topologyOnly)
tr->start = tr->nodep[tr->mxtips];
else
tr->start = tr->nodep[1];
tr->ntips = 0;
tr->nextnode = tr->mxtips + 1;
for(i = 0; i < tr->numBranches; i++)
tr->partitionSmoothed[i] = FALSE;
tr->rooted = FALSE;
tr->wasRooted = FALSE;
p = tr->nodep[(tr->nextnode)++];
while((ch = treeGetCh(fp)) != '(')
{
if(ch == EOF)
{
printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
printf("RAxML will exit now, please check your tree file!\n");
printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
treeEchoContext(fp, stdout, 100);
printf("\n\n");
errorExit(-1);
}
};
if(!topologyOnly)
{
if(adef->mode != CLASSIFY_ML)
{
if(adef->mode != OPTIMIZE_BR_LEN_SCALER)
assert(readBranches == FALSE && readNodeLabels == FALSE);
else
assert(readBranches == TRUE && readNodeLabels == FALSE);
}
else
{
if(adef->useBinaryModelFile)
assert(readBranches == TRUE && readNodeLabels == FALSE);
else
assert(readBranches == FALSE && readNodeLabels == FALSE);
}
}
if (! addElementLen(fp, tr, p, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))
assert(0);
if (! treeNeedCh(fp, ',', "in"))
assert(0);
if (! addElementLen(fp, tr, p->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))
assert(0);
if (! tr->rooted)
{
if ((ch = treeGetCh(fp)) == ',')
{
if (! addElementLen(fp, tr, p->next->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels))
assert(0);
}
else
{
/* A rooted format */
tr->rooted = TRUE;
tr->wasRooted = TRUE;
if (ch != EOF) (void) ungetc(ch, fp);
}
}
else
{
p->next->next->back = (nodeptr) NULL;
tr->wasRooted = TRUE;
}
if(!tr->rooted && adef->mode == ANCESTRAL_STATES)
{
printf("Error: The ancestral state computation mode requires a rooted tree as input, exiting ....\n");
exit(0);
}
if (! treeNeedCh(fp, ')', "in"))
assert(0);
if(topologyOnly)
assert(!(tr->rooted && readNodeLabels));
(void) treeFlushLabel(fp);
if (! treeFlushLen(fp, tr))
assert(0);
if (! treeNeedCh(fp, ';', "at end of"))
assert(0);
if (tr->rooted)
{
assert(!readNodeLabels);
p->next->next->back = (nodeptr) NULL;
tr->start = uprootTree(tr, p->next->next, readBranches, FALSE);
/*tr->leftRootNode = p->back;
tr->rightRootNode = p->next->back;
*/
if (! tr->start)
{
printf("FATAL ERROR UPROOTING TREE\n");
assert(0);
}
}
else
tr->start = findAnyTip(p, tr->rdta->numsp);
if(!topologyOnly || adef->mode == CLASSIFY_MP)
{
assert(tr->ntips <= tr->mxtips);
if(tr->ntips < tr->mxtips)
{
if(completeTree)
{
printBothOpen("Hello this is your friendly RAxML tree parsing routine\n");
printBothOpen("The RAxML option you are uisng requires to read in only complete trees\n");
printBothOpen("with %d taxa, there is at least one tree with %d taxa though ... exiting\n", tr->mxtips, tr->ntips);
exit(-1);
}
else
{
if(adef->computeDistance)
{
printBothOpen("Error: pairwise distance computation only allows for complete, i.e., containing all taxa\n");
printBothOpen("bifurcating starting trees\n");
exit(-1);
}
if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP)
{
printBothOpen("RAxML placement algorithm: You provided a reference tree with %d taxa; alignmnet has %d taxa\n", tr->ntips, tr->mxtips);
printBothOpen("%d query taxa will be placed using %s\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony");
if(adef->mode == CLASSIFY_ML)
classifyML(tr, adef);
else
{
assert(adef->mode == CLASSIFY_MP);
classifyMP(tr, adef);
}
}
else
{
printBothOpen("You provided an incomplete starting tree %d alignmnet has %d taxa\n", tr->ntips, tr->mxtips);
makeParsimonyTreeIncomplete(tr, adef);
}
}
}
else
{
if(adef->mode == PARSIMONY_ADDITION)
{
printBothOpen("Error you want to add sequences to a trees via MP stepwise addition, but \n");
printBothOpen("you have provided an input tree that already contains all taxa\n");
exit(-1);
}
if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP)
{
printBothOpen("Error you want to place query sequences into a tree using %s, but\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony");
printBothOpen("you have provided an input tree that already contains all taxa\n");
exit(-1);
}
}
onlyInitrav(tr, tr->start);
}
return lcount;
}
/********************************MULTIFURCATIONS************************************************/
static boolean addElementLenMULT (FILE *fp, tree *tr, nodeptr p, int partitionCounter, analdef *adef, int *partCount)
{
nodeptr q, r, s;
int n, ch, fres;
double randomResolution;
int old;
tr->constraintVector[p->number] = partitionCounter;
if ((ch = treeGetCh(fp)) == '(')
{
*partCount = *partCount + 1;
old = *partCount;
n = (tr->nextnode)++;
if (n > 2*(tr->mxtips) - 2)
{
if (tr->rooted || n > 2*(tr->mxtips) - 1)
{
printf("ERROR: Too many internal nodes. Is tree rooted?\n");
printf(" Deepest splitting should be a trifurcation.\n");
return FALSE;
}
else
{
tr->rooted = TRUE;
}
}
q = tr->nodep[n];
tr->constraintVector[q->number] = *partCount;
if (! addElementLenMULT(fp, tr, q->next, old, adef, partCount)) return FALSE;
if (! treeNeedCh(fp, ',', "in")) return FALSE;
if (! addElementLenMULT(fp, tr, q->next->next, old, adef, partCount)) return FALSE;
hookupDefault(p, q, tr->numBranches);
while((ch = treeGetCh(fp)) == ',')
{
n = (tr->nextnode)++;
if (n > 2*(tr->mxtips) - 2)
{
if (tr->rooted || n > 2*(tr->mxtips) - 1)
{
printf("ERROR: Too many internal nodes. Is tree rooted?\n");
printf(" Deepest splitting should be a trifurcation.\n");
return FALSE;
}
else
{
tr->rooted = TRUE;
}
}
r = tr->nodep[n];
tr->constraintVector[r->number] = *partCount;
randomResolution = randum(&adef->constraintSeed);
if(randomResolution < 0.5)
{
s = q->next->back;
r->back = q->next;
q->next->back = r;
r->next->back = s;
s->back = r->next;
addElementLenMULT(fp, tr, r->next->next, old, adef, partCount);
}
else
{
s = q->next->next->back;
r->back = q->next->next;
q->next->next->back = r;
r->next->back = s;
s->back = r->next;
addElementLenMULT(fp, tr, r->next->next, old, adef, partCount);
}
}
//ungetc(ch, fp);
if(ch != ')')
{
printf("Missing \")\" or \",\" in treeReadLenMULT, RAxML will print the context of the error and exit\n\n");
treeEchoContext(fp, stdout, 40);
printf("\n\n");
errorExit(-1);
}
(void) treeFlushLabel(fp);
}
else
{
ungetc(ch, fp);
if ((n = treeFindTipName(fp, tr, TRUE)) <= 0) return FALSE;
q = tr->nodep[n];
tr->constraintVector[q->number] = partitionCounter;
if (tr->start->number > n) tr->start = q;
(tr->ntips)++;
hookupDefault(p, q, tr->numBranches);
}
fres = treeFlushLen(fp, tr);
if(!fres) return FALSE;
return TRUE;
}
boolean treeReadLenMULT (FILE *fp, tree *tr, analdef *adef)
{
nodeptr p, r, s;
int i, ch, n;
int partitionCounter = 0, partCount = 0;
double randomResolution;
assert(adef->constraintSeed > 0);
for(i = 0; i < 2 * tr->mxtips; i++)
tr->constraintVector[i] = -1;
for (i = 1; i <= tr->mxtips; i++)
tr->nodep[i]->back = (node *) NULL;
for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++)
{
tr->nodep[i]->back = (nodeptr)NULL;
tr->nodep[i]->next->back = (nodeptr)NULL;
tr->nodep[i]->next->next->back = (nodeptr)NULL;
tr->nodep[i]->number = i;
tr->nodep[i]->next->number = i;
tr->nodep[i]->next->next->number = i;
}
tr->start = tr->nodep[tr->mxtips];
tr->ntips = 0;
tr->nextnode = tr->mxtips + 1;
for(i = 0; i < tr->numBranches; i++)
tr->partitionSmoothed[i] = FALSE;
tr->rooted = FALSE;
p = tr->nodep[(tr->nextnode)++];
while((ch = treeGetCh(fp)) != '(')
{
if(ch == EOF)
{
printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
printf("RAxML will exit now, please check your tree file!\n");
printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
treeEchoContext(fp, stdout, 100);
printf("\n\n");
errorExit(-1);
}
};
if (! addElementLenMULT(fp, tr, p, partitionCounter, adef, &partCount)) return FALSE;
if (! treeNeedCh(fp, ',', "in")) return FALSE;
if (! addElementLenMULT(fp, tr, p->next, partitionCounter, adef, &partCount)) return FALSE;
if (! tr->rooted)
{
if ((ch = treeGetCh(fp)) == ',')
{
if (! addElementLenMULT(fp, tr, p->next->next, partitionCounter, adef, &partCount)) return FALSE;
while((ch = treeGetCh(fp)) == ',')
{
n = (tr->nextnode)++;
assert(n <= 2*(tr->mxtips) - 2);
r = tr->nodep[n];
tr->constraintVector[r->number] = partitionCounter;
randomResolution = randum(&(adef->constraintSeed));
if(randomResolution < 0.5)
{
s = p->next->next->back;
r->back = p->next->next;
p->next->next->back = r;
r->next->back = s;
s->back = r->next;
addElementLenMULT(fp, tr, r->next->next, partitionCounter, adef, &partCount);
}
else
{
s = p->next->back;
r->back = p->next;
p->next->back = r;
r->next->back = s;
s->back = r->next;
addElementLenMULT(fp, tr, r->next->next, partitionCounter, adef, &partCount);
}
}
if(ch != ')')
{
printf("Missing \")\" or \",\" in treeReadLenMULT, RAxML will print the context of the error and exit\n\n");
treeEchoContext(fp, stdout, 40);
printf("\n\n");
errorExit(-1);
}
else
ungetc(ch, fp);
}
else
{
tr->rooted = TRUE;
if (ch != EOF) (void) ungetc(ch, fp);
}
}
else
{
p->next->next->back = (nodeptr) NULL;
}
if (! treeNeedCh(fp, ')', "in")) return FALSE;
(void) treeFlushLabel(fp);
if (! treeFlushLen(fp, tr)) return FALSE;
if (! treeNeedCh(fp, ';', "at end of")) return FALSE;
if (tr->rooted)
{
p->next->next->back = (nodeptr) NULL;
tr->start = uprootTree(tr, p->next->next, FALSE, TRUE);
if (! tr->start) return FALSE;
}
else
{
tr->start = findAnyTip(p, tr->rdta->numsp);
}
if(tr->ntips < tr->mxtips)
makeParsimonyTreeIncomplete(tr, adef);
if(!adef->rapidBoot)
onlyInitrav(tr, tr->start);
return TRUE;
}
void getStartingTree(tree *tr, analdef *adef)
{
tr->likelihood = unlikely;
if(adef->restart)
{
INFILE = myfopen(tree_file, "rb");
if(!adef->grouping)
{
switch(adef->mode)
{
case ANCESTRAL_STATES:
assert(!tr->saveMemory);
tr->leftRootNode = (nodeptr)NULL;
tr->rightRootNode = (nodeptr)NULL;
treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE);
assert(tr->leftRootNode && tr->rightRootNode);
break;
case CLASSIFY_MP:
treeReadLen(INFILE, tr, TRUE, FALSE, TRUE, adef, FALSE, FALSE);
break;
case OPTIMIZE_BR_LEN_SCALER:
treeReadLen(INFILE, tr, TRUE, FALSE, FALSE, adef, TRUE, FALSE);
break;
case CLASSIFY_ML:
if(adef->useBinaryModelFile)
{
if(tr->saveMemory)
treeReadLen(INFILE, tr, TRUE, FALSE, TRUE, adef, FALSE, FALSE);
else
treeReadLen(INFILE, tr, TRUE, FALSE, FALSE, adef, FALSE, FALSE);
}
else
{
if(tr->saveMemory)
treeReadLen(INFILE, tr, FALSE, FALSE, TRUE, adef, FALSE, FALSE);
else
treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, FALSE, FALSE);
}
break;
default:
if(tr->saveMemory)
treeReadLen(INFILE, tr, FALSE, FALSE, TRUE, adef, FALSE, FALSE);
else
treeReadLen(INFILE, tr, FALSE, FALSE, FALSE, adef, FALSE, FALSE);
break;
}
}
else
{
assert(adef->mode != ANCESTRAL_STATES);
if (! treeReadLenMULT(INFILE, tr, adef))
exit(-1);
}
if(adef->mode == PARSIMONY_ADDITION)
return;
if(adef->mode != CLASSIFY_MP)
{
if(adef->mode == OPTIMIZE_BR_LEN_SCALER)
{
assert(tr->numBranches == tr->NumberOfModels);
scaleBranches(tr, TRUE);
evaluateGenericInitrav(tr, tr->start);
}
else
{
evaluateGenericInitrav(tr, tr->start);
treeEvaluate(tr, 1);
}
}
fclose(INFILE);
}
else
{
assert(adef->mode != PARSIMONY_ADDITION &&
adef->mode != MORPH_CALIBRATOR &&
adef->mode != ANCESTRAL_STATES &&
adef->mode != OPTIMIZE_BR_LEN_SCALER);
if(adef->randomStartingTree)
makeRandomTree(tr, adef);
else
makeParsimonyTree(tr, adef);
if(adef->startingTreeOnly)
{
printStartingTree(tr, adef, TRUE);
exit(0);
}
else
printStartingTree(tr, adef, FALSE);
evaluateGenericInitrav(tr, tr->start);
treeEvaluate(tr, 1);
}
tr->start = tr->nodep[1];
}
/****** functions for reading true multi-furcating trees *****/
/****************************************************************************/
static void nextNodeOutOfBounds(tree *tr, int nextnode)
{
if(nextnode >= tr->maxNodes)
{
printf("The tree RAxML is trying to parse contains more nodes than expected.\n");
printf("RAxML will exit now, please check your input trees!\n");
assert(0);
}
}
static void addMultifurcation (FILE *fp, tree *tr, nodeptr _p, analdef *adef, int *nextnode)
{
nodeptr
p,
initial_p;
int
n,
ch,
fres;
if ((ch = treeGetCh(fp)) == '(')
{
int
i = 0;
nextNodeOutOfBounds(tr, *nextnode);
initial_p = p = tr->nodep[*nextnode];
*nextnode = *nextnode + 1;
do
{
nextNodeOutOfBounds(tr, *nextnode);
p->next = tr->nodep[*nextnode];
*nextnode = *nextnode + 1;
p = p->next;
addMultifurcation(fp, tr, p, adef, nextnode);
i++;
}
while((ch = treeGetCh(fp)) == ',');
ungetc(ch, fp);
p->next = initial_p;
if (! treeNeedCh(fp, ')', "in"))
assert(0);
treeFlushLabel(fp);
}
else
{
ungetc(ch, fp);
if ((n = treeFindTipName(fp, tr, FALSE)) <= 0)
assert(0);
p = tr->nodep[n];
initial_p = p;
tr->start = p;
(tr->ntips)++;
}
fres = treeFlushLen(fp, tr);
if(!fres)
assert(0);
hookupDefault(initial_p, _p, tr->numBranches);
}
static void printMultiFurc(nodeptr p, tree *tr)
{
if(isTip(p->number, tr->mxtips))
{
printf("%s", tr->nameList[p->number]);
}
else
{
nodeptr
q = p->next;
printf("(");
while(q != p)
{
printMultiFurc(q->back,
tr);
q = q->next;
if(q != p)
printf(",");
}
printf(")");
}
}
void allocateMultifurcations(tree *tr, tree *smallTree)
{
int
i,
tips,
inter;
smallTree->numBranches = tr->numBranches;
smallTree->mxtips = tr->mxtips;
//printf("Small tree tiups: %d\n", smallTree->mxtips);
smallTree->nameHash = tr->nameHash;
smallTree->nameList = tr->nameList;
tips = tr->mxtips;
inter = tr->mxtips - 1;
smallTree->nodep = (nodeptr *)rax_malloc((tips + 3 * inter) * sizeof(nodeptr));
smallTree->maxNodes = tips + 3 * inter;
smallTree->nodep[0] = (node *) NULL;
for (i = 1; i <= tips; i++)
{
smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));
memcpy(smallTree->nodep[i], tr->nodep[i], sizeof(node));
smallTree->nodep[i]->back = (node *) NULL;
smallTree->nodep[i]->next = (node *) NULL;
}
for(i = tips + 1; i < tips + 3 * inter; i++)
{
smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));
smallTree->nodep[i]->number = i;
smallTree->nodep[i]->back = (node *) NULL;
smallTree->nodep[i]->next = (node *) NULL;
}
}
void freeMultifurcations(tree *tr)
{
int
i,
tips = tr->mxtips,
inter = tr->mxtips - 1;
for (i = 1; i < tips + 3 * inter; i++)
rax_free(tr->nodep[i]);
rax_free(tr->nodep);
}
static void relabelInnerNodes(nodeptr p, tree *tr, int *number, int *nodeCounter)
{
if(isTip(p->number, tr->mxtips))
{
assert(0);
}
else
{
nodeptr
q = p->next;
int
_n = *number;
tr->nodep[p->number]->number = _n;
p->x = 1;
*number = *number + 1;
while(q != p)
{
tr->nodep[q->number]->number = _n;
q->x = 0;
if(!isTip(q->back->number, tr->mxtips))
{
*nodeCounter = *nodeCounter + 1;
relabelInnerNodes(q->back, tr, number, nodeCounter);
}
q = q->next;
}
}
}
int readMultifurcatingTree(FILE *fp, tree *tr, analdef *adef, boolean fastParse)
{
nodeptr
p,
initial_p;
int
innerNodeNumber,
innerBranches = 0,
nextnode,
i,
ch,
tips = tr->mxtips,
inter = tr->mxtips - 1;
//clean up before parsing !
if(!fastParse)
{
for (i = 1; i < tips + 3 * inter; i++)
{
tr->nodep[i]->back = (node *) NULL;
tr->nodep[i]->next = (node *) NULL;
tr->nodep[i]->x = 0;
}
for(i = tips + 1; i < tips + 3 * inter; i++)
tr->nodep[i]->number = i;
}
tr->ntips = 0;
nextnode = tr->mxtips + 1;
while((ch = treeGetCh(fp)) != '(')
{
if(ch == EOF)
{
printf("RAxML could not find a single \"(\" in what is supposed to be your tree file\n");
printf("RAxML will exit now, please check your tree file!\n");
printf("The last couple of characters RAxML read in your supposed tree file look like this:\n\n");
treeEchoContext(fp, stdout, 100);
printf("\n\n");
errorExit(-1);
}
};
i = 0;
do
{
if(i == 0)
{
nextNodeOutOfBounds(tr, nextnode);
initial_p = p = tr->nodep[nextnode];
nextnode++;
}
else
{
nextNodeOutOfBounds(tr, nextnode);
p->next = tr->nodep[nextnode];
p = p->next;
nextnode++;
}
addMultifurcation(fp, tr, p, adef, &nextnode);
i++;
}
while((ch = treeGetCh(fp)) == ',');
if(i < 2)
assert(0);
else
{
if(i == 2)
{
nodeptr
q = initial_p->back,
r = initial_p->next->back;
//printBothOpen("you provided a rooted tree, we need an unrooted one, RAxML will remove the root!\n");
if(!fastParse)
assert(initial_p->next->next == (node *)NULL);
hookupDefault(q, r, tr->numBranches);
if(tr->start == initial_p ||
tr->start == initial_p->next ||
tr->start->back == initial_p ||
tr->start->back == initial_p->next
)
{
tr->start = findAnyTip(q, tr->mxtips);
}
assert(tr->start != initial_p);
assert(tr->start != initial_p->next);
assert(tr->start->back != initial_p);
assert(tr->start->back != initial_p->next);
}
}
/*
if(i < 3)
{
printBothOpen("You need to provide unrooted input trees!\n");
assert(0);
}
*/
ungetc(ch, fp);
if(i > 2)
p->next = initial_p;
if (! treeNeedCh(fp, ')', "in"))
assert(0);
(void)treeFlushLabel(fp);
if (! treeFlushLen(fp, tr))
assert(0);
if (! treeNeedCh(fp, ';', "at end of"))
assert(0);
//printf("%d tips found, %d inner nodes used start %d maxtips %d\n", tr->ntips, nextnode - tr->mxtips, tr->start->number, tr->mxtips);
if(fastParse)
for(i = tips + 1; i < tips + 3 * tr->ntips; i++)
tr->nodep[i]->number = i;
assert(isTip(tr->start->number, tr->mxtips));
innerNodeNumber = tr->mxtips + 1;
relabelInnerNodes(tr->start->back, tr, &innerNodeNumber, &innerBranches);
//printf("Inner node number: %d\n", innerNodeNumber);
//printf("Inner branches %d\n", innerBranches);
if(0)
{
printf("(");
printMultiFurc(tr->start, tr);
printf(",");
printMultiFurc(tr->start->back, tr);
printf(");\n");
}
return innerBranches;
}