/* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees * Copyright August 2006 by Alexandros Stamatakis * * Partially derived from * fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen * * and * * Programs of the PHYLIP package by Joe Felsenstein. * * This program is free software; you may redistribute it and/or modify its * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * * For any other enquiries send an Email to Alexandros Stamatakis * Alexandros.Stamatakis@epfl.ch * * When publishing work that is based on the results from RAxML-VI-HPC please cite: * * Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". * Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446 */ #ifndef WIN32 #include #include #include #include #endif #include #include #include #include #include #include #include #include "axml.h" extern char **globalArgv; extern int globalArgc; extern char workdir[1024]; extern char run_id[128]; extern double masterTime; #ifdef _USE_PTHREADS extern volatile int NumberOfThreads; extern volatile int NumberOfJobs; #endif static double getBranch(tree *tr, double *b, double *bb) { double z = 0.0; if(!tr->multiBranch) { assert(tr->fracchange != -1.0); assert(b[0] == bb[0]); z = b[0]; if (z < zmin) z = zmin; if(z > zmax) z = zmax; z = -log(z) * tr->fracchange; return z; } else { int i; double x = 0; for(i = 0; i < tr->numBranches; i++) { assert(b[i] == bb[i]); assert(tr->partitionContributions[i] != -1.0); assert(tr->fracchanges[i] != -1.0); x = b[i]; if (x < zmin) x = zmin; if(x > zmax) x = zmax; x = -log(x) * tr->fracchanges[i]; z += x * tr->partitionContributions[i]; } return z; } } static double getBranchPerPartition(tree *tr, double *b, double *bb, int j) { double z = 0.0; if(!tr->multiBranch) { assert(tr->fracchange != -1.0); assert(b[0] == bb[0]); z = b[0]; if (z < zmin) z = zmin; if(z > zmax) z = zmax; z = -log(z) * tr->fracchange; return z; } else { int i = tr->readPartition[j]; assert(b[i] == bb[i]); assert(tr->fracchanges[i] != -1.0); z = b[i]; if (z < zmin) z = zmin; if(z > zmax) z = zmax; z = -log(z) * tr->fracchanges[i]; return z; } } static char *Tree2StringClassifyRec(char *treestr, tree *tr, nodeptr p, int *countBranches, int *inserts, boolean originalTree, boolean jointLabels, boolean likelihood) { branchInfo *bInf = p->bInf; int i, countQuery = 0; *countBranches = *countBranches + 1; if(!originalTree) { for(i = 0; i < tr->numberOfTipsForInsertion; i++) if(bInf->epa->countThem[i] > 0) countQuery++; if(countQuery > 0) { int localCounter = 0; *treestr++ = '('; if(countQuery > 1) *treestr++ = '('; for(i = 0; i < tr->numberOfTipsForInsertion; i++) { if(bInf->epa->countThem[i] > 0) { if(likelihood) { char branchLength[128]; sprintf(branchLength, "%f", bInf->epa->branches[i]); sprintf(treestr,"QUERY___%s:%s", tr->nameList[inserts[i]], branchLength); } else sprintf(treestr,"QUERY___%s", tr->nameList[inserts[i]]); while (*treestr) treestr++; if(localCounter < countQuery - 1) *treestr++ = ','; localCounter++; } } if(countQuery > 1) { sprintf(treestr,"):0.0,"); while (*treestr) treestr++; } else *treestr++ = ','; } } if(isTip(p->number, tr->rdta->numsp)) { char *nameptr = tr->nameList[p->number]; sprintf(treestr, "%s", nameptr); while (*treestr) treestr++; } else { *treestr++ = '('; treestr = Tree2StringClassifyRec(treestr, tr, p->next->back, countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ','; treestr = Tree2StringClassifyRec(treestr, tr, p->next->next->back, countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ')'; } if(countQuery > 0) { sprintf(treestr, ":%8.20f[%s]", 0.5 * p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); while (*treestr) treestr++; *treestr++ = ')'; } if(originalTree) { if(jointLabels) { if(tr->wasRooted) { if(p == tr->leftRootNode) { sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength * 0.5, p->bInf->epa->jointLabel); assert(tr->rootLabel == p->bInf->epa->jointLabel); } else { if(p == tr->rightRootNode) { sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength * 0.5, tr->numberOfBranches); assert(tr->rootLabel == p->bInf->epa->jointLabel); } else sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength, p->bInf->epa->jointLabel); } } else sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength, p->bInf->epa->jointLabel); } else sprintf(treestr, ":%8.20f[%s", p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); } else { if(countQuery > 0) sprintf(treestr, ":%8.20f[%s", 0.5 * p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); else sprintf(treestr, ":%8.20f[%s", p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); } while (*treestr) treestr++; assert(!(countQuery > 0 && originalTree == TRUE)); if(jointLabels) sprintf(treestr, "}"); else sprintf(treestr, "]"); while (*treestr) treestr++; return treestr; } char *Tree2StringClassify(char *treestr, tree *tr, int *inserts, boolean originalTree, boolean jointLabels, boolean likelihood) { nodeptr p; int countBranches = 0; if(jointLabels && tr->wasRooted) { assert(originalTree); *treestr++ = '('; treestr = Tree2StringClassifyRec(treestr, tr, tr->leftRootNode, &countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ','; treestr = Tree2StringClassifyRec(treestr, tr, tr->rightRootNode, &countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ')'; *treestr++ = ';'; assert(countBranches == 2 * tr->ntips - 2); *treestr++ = '\0'; while (*treestr) treestr++; return treestr; } else { if(jointLabels) p = tr->nodep[tr->mxtips + 1]; else p = tr->start->back; assert(!isTip(p->number, tr->mxtips)); *treestr++ = '('; treestr = Tree2StringClassifyRec(treestr, tr, p->back, &countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ','; treestr = Tree2StringClassifyRec(treestr, tr, p->next->back, &countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ','; treestr = Tree2StringClassifyRec(treestr, tr, p->next->next->back, &countBranches, inserts, originalTree, jointLabels, likelihood); *treestr++ = ')'; *treestr++ = ';'; assert(countBranches == 2 * tr->ntips - 3); *treestr++ = '\0'; while (*treestr) treestr++; return treestr; } } void markTips(nodeptr p, int *perm, int maxTips) { if(isTip(p->number, maxTips)) { perm[p->number] = 1; return; } else { nodeptr q = p->next; while(q != p) { markTips(q->back, perm, maxTips); q = q->next; } } } static boolean containsRoot(nodeptr p, tree *tr, int rootNumber) { if(isTip(p->number, tr->mxtips)) { if(p->number == rootNumber) return TRUE; else return FALSE; } else { if(p->number == rootNumber) return TRUE; else { if(containsRoot(p->next->back, tr, rootNumber)) return TRUE; else { if(containsRoot(p->next->next->back, tr, rootNumber)) return TRUE; else return FALSE; } } } } static nodeptr findRootDirection(nodeptr p, tree *tr, int rootNumber) { if(containsRoot(p, tr, rootNumber)) return p; if(containsRoot(p->back, tr, rootNumber)) return (p->back); /* one of the two subtrees must contain the root */ assert(0); } void setPartitionMask(tree *tr, int i, boolean *executeModel) { int model = 0; if(tr->perPartitionEPA) { for(model = 0; model < tr->NumberOfModels; model++) executeModel[model] = FALSE; executeModel[tr->readPartition[i]] = TRUE; } else { for(model = 0; model < tr->NumberOfModels; model++) executeModel[model] = TRUE; } } void resetPartitionMask(tree *tr, boolean *executeModel) { int model = 0; for(model = 0; model < tr->NumberOfModels; model++) executeModel[model] = TRUE; } static boolean allSmoothedEPA(tree *tr) { int i; boolean result = TRUE; for(i = 0; i < tr->numBranches; i++) { if(tr->partitionSmoothed[i] == FALSE) result = FALSE; else tr->partitionConverged[i] = TRUE; } return result; } static boolean updateEPA(tree *tr, nodeptr p, int j) { nodeptr q; boolean smoothedPartitions[NUM_BRANCHES]; int i; double z[NUM_BRANCHES], z0[NUM_BRANCHES]; double _deltaz; q = p->back; for(i = 0; i < tr->numBranches; i++) z0[i] = q->z[i]; setPartitionMask(tr, j, tr->executeModel); makenewzGeneric(tr, p, q, z0, newzpercycle, z, FALSE); for(i = 0; i < tr->numBranches; i++) smoothedPartitions[i] = tr->partitionSmoothed[i]; for(i = 0; i < tr->numBranches; i++) { if(!tr->partitionConverged[i]) { _deltaz = deltaz; if(ABS(z[i] - z0[i]) > _deltaz) { smoothedPartitions[i] = FALSE; } p->z[i] = q->z[i] = z[i]; } } for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = smoothedPartitions[i]; return TRUE; } static boolean localSmoothEPA(tree *tr, nodeptr p, int maxtimes, int j) { nodeptr q; int i; if (isTip(p->number, tr->rdta->numsp)) return FALSE; for(i = 0; i < tr->numBranches; i++) tr->partitionConverged[i] = FALSE; while (--maxtimes >= 0) { for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = TRUE; q = p; do { if (! updateEPA(tr, q, j)) return FALSE; q = q->next; } while (q != p); if (allSmoothedEPA(tr)) break; } for(i = 0; i < tr->numBranches; i++) { tr->partitionSmoothed[i] = FALSE; tr->partitionConverged[i] = FALSE; } return TRUE; } static void testInsertThorough(tree *tr, nodeptr r, nodeptr q) { double originalBranchLength = getBranch(tr, q->z, q->back->z), result, qz[NUM_BRANCHES], z[NUM_BRANCHES]; nodeptr root = (nodeptr)NULL, x = q->back; int *inserts = tr->inserts, j; boolean atRoot = FALSE; if(!tr->wasRooted) root = findRootDirection(q, tr, tr->mxtips + 1); else { if((q == tr->leftRootNode && x == tr->rightRootNode) || (x == tr->leftRootNode && q == tr->rightRootNode)) atRoot = TRUE; else { nodeptr r1 = findRootDirection(q, tr, tr->leftRootNode->number), r2 = findRootDirection(q, tr, tr->rightRootNode->number); assert(r1 == r2); root = r1; } } for(j = 0; j < tr->numBranches; j++) { qz[j] = q->z[j]; z[j] = sqrt(qz[j]); if(z[j] < zmin) z[j] = zmin; if(z[j] > zmax) z[j] = zmax; } for(j = 0; j < tr->numberOfTipsForInsertion; j++) { if(q->bInf->epa->executeThem[j]) { nodeptr s = tr->nodep[inserts[j]]; double ratio, modifiedBranchLength, distalLength; hookup(r->next, q, z, tr->numBranches); hookup(r->next->next, x, z, tr->numBranches); hookupDefault(r, s, tr->numBranches); if(tr->perPartitionEPA) { setPartitionMask(tr, j, tr->executeModel); newviewGenericMasked(tr, r); setPartitionMask(tr, j, tr->executeModel); localSmoothEPA(tr, r, smoothings, j); setPartitionMask(tr, j, tr->executeModel); evaluateGeneric(tr, r); result = tr->perPartitionLH[tr->readPartition[j]]; resetPartitionMask(tr, tr->executeModel); } else { newviewGeneric(tr, r); localSmooth(tr, r, smoothings); result = evaluateGeneric(tr, r); } if(tr->perPartitionEPA) tr->bInf[q->bInf->epa->branchNumber].epa->branches[j] = getBranchPerPartition(tr, r->z, r->back->z, j); else tr->bInf[q->bInf->epa->branchNumber].epa->branches[j] = getBranch(tr, r->z, r->back->z); tr->bInf[q->bInf->epa->branchNumber].epa->likelihoods[j] = result; if(tr->perPartitionEPA) modifiedBranchLength = getBranchPerPartition(tr, q->z, q->back->z, j) + getBranchPerPartition(tr, x->z, x->back->z, j); else modifiedBranchLength = getBranch(tr, q->z, q->back->z) + getBranch(tr, x->z, x->back->z); ratio = originalBranchLength / modifiedBranchLength; if(tr->wasRooted && atRoot) { /* always take distal length from left root node and then fix this later */ if(x == tr->leftRootNode) { if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, x->z, x->back->z, j); else distalLength = getBranch(tr, x->z, x->back->z); } else { assert(x == tr->rightRootNode); if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, q->z, q->back->z, j); else distalLength = getBranch(tr, q->z, q->back->z); } } else { if(root == x) { if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, x->z, x->back->z, j); else distalLength = getBranch(tr, x->z, x->back->z); } else { assert(root == q); if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, q->z, q->back->z, j); else distalLength = getBranch(tr, q->z, q->back->z); } } distalLength *= ratio; assert(distalLength <= originalBranchLength); tr->bInf[q->bInf->epa->branchNumber].epa->distalBranches[j] = distalLength; } } hookup(q, x, qz, tr->numBranches); r->next->next->back = r->next->back = (nodeptr) NULL; } static void testInsertFast(tree *tr, nodeptr r, nodeptr q) { double result, qz[NUM_BRANCHES], z[NUM_BRANCHES]; nodeptr x = q->back; int i, *inserts = tr->inserts; assert(!tr->grouped); for(i = 0; i < tr->numBranches; i++) { qz[i] = q->z[i]; z[i] = sqrt(q->z[i]); if(z[i] < zmin) z[i] = zmin; if(z[i] > zmax) z[i] = zmax; } hookup(r->next, q, z, tr->numBranches); hookup(r->next->next, x, z, tr->numBranches); newviewGeneric(tr, r); for(i = 0; i < tr->numberOfTipsForInsertion; i++) { if(q->bInf->epa->executeThem[i]) { hookupDefault(r, tr->nodep[inserts[i]], tr->numBranches); if(!tr->perPartitionEPA) { result = evaluateGeneric (tr, r); } else { setPartitionMask(tr, i, tr->executeModel); evaluateGeneric(tr, r); result = tr->perPartitionLH[tr->readPartition[i]]; resetPartitionMask(tr, tr->executeModel); } r->back = (nodeptr) NULL; tr->nodep[inserts[i]]->back = (nodeptr) NULL; tr->bInf[q->bInf->epa->branchNumber].epa->likelihoods[i] = result; } } hookup(q, x, qz, tr->numBranches); r->next->next->back = r->next->back = (nodeptr) NULL; } static void addTraverseRob(tree *tr, nodeptr r, nodeptr q, boolean thorough) { if(thorough) testInsertThorough(tr, r, q); else testInsertFast(tr, r, q); if(!isTip(q->number, tr->rdta->numsp)) { nodeptr a = q->next; while(a != q) { addTraverseRob(tr, r, a->back, thorough); a = a->next; } } } #ifdef _USE_PTHREADS size_t getContiguousVectorLength(tree *tr) { size_t length = 0; int model; for(model = 0; model < tr->NumberOfModels; model++) { size_t realWidth = tr->partitionData[model].upper - tr->partitionData[model].lower; int states = tr->partitionData[model].states; length += (realWidth * (size_t)states * (size_t)(tr->discreteRateCategories)); } return length; } static size_t getContiguousScalingLength(tree *tr) { size_t length = 0; int model; for(model = 0; model < tr->NumberOfModels; model++) length += tr->partitionData[model].upper - tr->partitionData[model].lower; return length; } static void allocBranchX(tree *tr) { int i = 0; for(i = 0; i < tr->numberOfBranches; i++) { branchInfo *b = &(tr->bInf[i]); b->epa->left = (double*)rax_malloc(sizeof(double) * tr->contiguousVectorLength); b->epa->leftScaling = (int*)rax_malloc(sizeof(int) * tr->contiguousScalingLength); b->epa->right = (double*)rax_malloc(sizeof(double) * tr->contiguousVectorLength); b->epa->rightScaling = (int*)rax_malloc(sizeof(int) * tr->contiguousScalingLength); } } static void updateClassify(tree *tr, double *z, boolean *partitionSmoothed, boolean *partitionConverged, double *x1, double *x2, unsigned char *tipX1, unsigned char *tipX2, int tipCase, int insertions) { int i; boolean smoothedPartitions[NUM_BRANCHES]; double newz[NUM_BRANCHES], z0[NUM_BRANCHES]; for(i = 0; i < tr->numBranches; i++) z0[i] = z[i]; makenewzClassify(tr, newzpercycle, newz, z0, x1, x2, tipX1, tipX2, tipCase, partitionConverged, insertions); for(i = 0; i < tr->numBranches; i++) smoothedPartitions[i] = partitionSmoothed[i]; for(i = 0; i < tr->numBranches; i++) { if(!partitionConverged[i]) { if(ABS(newz[i] - z0[i]) > deltaz) smoothedPartitions[i] = FALSE; z[i] = newz[i]; } } for(i = 0; i < tr->numBranches; i++) partitionSmoothed[i] = smoothedPartitions[i]; } static double localSmoothClassify (tree *tr, int maxtimes, int leftNodeNumber, int rightNodeNumber, int insertionNodeNumber, double *e1, double *e2, double *e3, branchInfo *b, int insertions) { int tipCase; boolean allSmoothed, partitionSmoothed[NUM_BRANCHES], partitionConverged[NUM_BRANCHES]; double result, *x1 = (double*)NULL, *x2 = (double*)NULL, *x3 = (double*)NULL; int i, *ex1 = (int*)NULL, *ex2 = (int*)NULL, *ex3 = (int*)NULL; unsigned char *tipX1 = (unsigned char *)NULL, *tipX2 = (unsigned char *)NULL; x3 = tr->temporaryVector; ex3 = tr->temporaryScaling; for(i = 0; i < tr->numBranches; i++) partitionConverged[i] = FALSE; while (--maxtimes >= 0) { for(i = 0; i < tr->numBranches; i++) partitionSmoothed[i] = TRUE; /* e3 */ if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_TIP; tipX1 = tr->contiguousTips[leftNodeNumber]; tipX2 = tr->contiguousTips[rightNodeNumber]; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if (isTip(leftNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[leftNodeNumber]; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if(isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[rightNodeNumber]; x2 = b->epa->left; ex2 = b->epa->leftScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); } else { tipCase = INNER_INNER; x1 = b->epa->left; ex1 = b->epa->leftScaling; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } } } tipCase = TIP_INNER; x2 = tr->temporaryVector; tipX1 = tr->contiguousTips[insertionNodeNumber]; updateClassify(tr, e3, partitionSmoothed, partitionConverged, x1, x2, tipX1, tipX2, tipCase, insertions); /* e1 **********************************************************/ tipX1 = tr->contiguousTips[insertionNodeNumber]; if(isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_TIP; tipX2 = tr->contiguousTips[rightNodeNumber]; } else { tipCase = TIP_INNER; x2 = b->epa->right; ex2 = b->epa->rightScaling; } newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e3, e2, insertions); if(isTip(leftNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[leftNodeNumber]; } else { tipCase = INNER_INNER; x1 = b->epa->left; } updateClassify(tr, e1, partitionSmoothed, partitionConverged, x1, x3, tipX1, (unsigned char*)NULL, tipCase, insertions); /* e2 *********************************************************/ tipX1 = tr->contiguousTips[insertionNodeNumber]; if(isTip(leftNodeNumber, tr->mxtips)) { tipCase = TIP_TIP; tipX2 = tr->contiguousTips[leftNodeNumber]; } else { tipCase = TIP_INNER; x2 = b->epa->left; ex2 = b->epa->leftScaling; } newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e3, e1, insertions); if(isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[rightNodeNumber]; } else { tipCase = INNER_INNER; x1 = b->epa->right; } updateClassify(tr, e2, partitionSmoothed, partitionConverged, x1, x3, tipX1, (unsigned char*)NULL, tipCase, insertions); allSmoothed = TRUE; for(i = 0; i < tr->numBranches; i++) { if(partitionSmoothed[i] == FALSE) allSmoothed = FALSE; else partitionConverged[i] = TRUE; } if(allSmoothed) break; } if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_TIP; tipX1 = tr->contiguousTips[leftNodeNumber]; tipX2 = tr->contiguousTips[rightNodeNumber]; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if (isTip(leftNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[leftNodeNumber]; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if(isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[rightNodeNumber]; x2 = b->epa->left; ex2 = b->epa->leftScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); } else { tipCase = INNER_INNER; x1 = b->epa->left; ex1 = b->epa->leftScaling; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } } } tipCase = TIP_INNER; x2 = tr->temporaryVector; tipX1 = tr->contiguousTips[insertionNodeNumber]; result = evalCL(tr, x2, ex3, tipX1, e3, insertions); return result; } void testInsertThoroughIterative(tree *tr, int branchNumber) { double result, z, e1[NUM_BRANCHES], e2[NUM_BRANCHES], e3[NUM_BRANCHES], *x3 = tr->temporaryVector; branchInfo *b = &(tr->bInf[branchNumber]); nodeptr root = (nodeptr)NULL, x = b->oP, q = b->oQ; boolean atRoot = FALSE; int tipCase, model, insertions, leftNodeNumber = b->epa->leftNodeNumber, rightNodeNumber = b->epa->rightNodeNumber, *ex3 = tr->temporaryScaling; assert(x->number == leftNodeNumber); assert(q->number == rightNodeNumber); if(!tr->wasRooted) root = findRootDirection(q, tr, tr->mxtips + 1); else { if((q == tr->leftRootNode && x == tr->rightRootNode) || (x == tr->leftRootNode && q == tr->rightRootNode)) atRoot = TRUE; else { nodeptr r1 = findRootDirection(q, tr, tr->leftRootNode->number), r2 = findRootDirection(q, tr, tr->rightRootNode->number); assert(r1 == r2); root = r1; } } for(insertions = 0; insertions < tr->numberOfTipsForInsertion; insertions++) { if(b->epa->executeThem[insertions]) { double *x1 = (double*)NULL, *x2 = (double*)NULL; int *ex1 = (int*)NULL, *ex2 = (int*)NULL; unsigned char *tipX1 = (unsigned char *)NULL, *tipX2 = (unsigned char *)NULL; double ratio, modifiedBranchLength, distalLength; for(model = 0; model < tr->numBranches; model++) { z = sqrt(b->epa->branchLengths[model]); if(z < zmin) z = zmin; if(z > zmax) z = zmax; e1[model] = z; e2[model] = z; e3[model] = defaultz; } if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_TIP; tipX1 = tr->contiguousTips[leftNodeNumber]; tipX2 = tr->contiguousTips[rightNodeNumber]; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if (isTip(leftNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[leftNodeNumber]; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } else { if(isTip(rightNodeNumber, tr->mxtips)) { tipCase = TIP_INNER; tipX1 = tr->contiguousTips[rightNodeNumber]; x2 = b->epa->left; ex2 = b->epa->leftScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); } else { tipCase = INNER_INNER; x1 = b->epa->left; ex1 = b->epa->leftScaling; x2 = b->epa->right; ex2 = b->epa->rightScaling; newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); } } } result = localSmoothClassify(tr, smoothings, leftNodeNumber, rightNodeNumber, tr->inserts[insertions], e1, e2, e3, b, insertions); b->epa->likelihoods[insertions] = result; if(tr->perPartitionEPA) b->epa->branches[insertions] = getBranchPerPartition(tr, e3, e3, insertions); else b->epa->branches[insertions] = getBranch(tr, e3, e3); if(tr->perPartitionEPA) modifiedBranchLength = getBranchPerPartition(tr, e1, e1, insertions) + getBranchPerPartition(tr, e2, e2, insertions); else modifiedBranchLength = getBranch(tr, e1, e1) + getBranch(tr, e2, e2); ratio = b->epa->originalBranchLength / modifiedBranchLength; if(tr->wasRooted && atRoot) { /* always take distal length from left root node and then fix this later */ if(x == tr->leftRootNode) { if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, e1, e1, insertions); else distalLength = getBranch(tr, e1, e1); } else { assert(x == tr->rightRootNode); if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, e2, e2, insertions); else distalLength = getBranch(tr, e2, e2); } } else { if(root == x) { if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, e1, e1, insertions); else distalLength = getBranch(tr, e1, e1); } else { assert(root == q); if(tr->perPartitionEPA) distalLength = getBranchPerPartition(tr, e2, e2, insertions); else distalLength = getBranch(tr, e2, e2); } } distalLength *= ratio; assert(distalLength <= b->epa->originalBranchLength); b->epa->distalBranches[insertions] = distalLength; } } } void addTraverseRobIterative(tree *tr, int branchNumber) { int inserts; branchInfo *b = &(tr->bInf[branchNumber]); double result, z[NUM_BRANCHES], defaultArray[NUM_BRANCHES]; assert(!tr->useFastScaling); for(inserts = 0; inserts < tr->numBranches; inserts++) { z[inserts] = sqrt(b->epa->branchLengths[inserts]); defaultArray[inserts] = defaultz; if(z[inserts] < zmin) z[inserts] = zmin; if(z[inserts] > zmax) z[inserts] = zmax; } newviewClassify(tr, b, z, inserts); for(inserts = 0; inserts < tr->numberOfTipsForInsertion; inserts++) { result = evalCL(tr, tr->temporaryVector, tr->temporaryScaling, tr->contiguousTips[tr->inserts[inserts]], defaultArray, inserts); b->epa->likelihoods[inserts] = result; } } #endif static void setupBranchMetaInfo(tree *tr, nodeptr p, int nTips, branchInfo *bInf) { int i, countBranches = tr->branchCounter; if(isTip(p->number, tr->mxtips)) { p->bInf = &bInf[countBranches]; p->back->bInf = &bInf[countBranches]; bInf[countBranches].oP = p; bInf[countBranches].oQ = p->back; bInf[countBranches].epa->leftNodeNumber = p->number; bInf[countBranches].epa->rightNodeNumber = p->back->number; bInf[countBranches].epa->originalBranchLength = getBranch(tr, p->z, p->back->z); bInf[countBranches].epa->branchNumber = countBranches; for(i = 0; i < tr->numBranches; i++) bInf[countBranches].epa->branchLengths[i] = p->z[i]; #ifdef _USE_PTHREADS if(!p->back->x) newviewGeneric(tr, p->back); masterBarrier(THREAD_GATHER_LIKELIHOOD, tr); #endif tr->branchCounter = tr->branchCounter + 1; return; } else { nodeptr q; assert(p == p->next->next->next); p->bInf = &bInf[countBranches]; p->back->bInf = &bInf[countBranches]; bInf[countBranches].oP = p; bInf[countBranches].oQ = p->back; bInf[countBranches].epa->leftNodeNumber = p->number; bInf[countBranches].epa->rightNodeNumber = p->back->number; bInf[countBranches].epa->originalBranchLength = getBranch(tr, p->z, p->back->z); bInf[countBranches].epa->branchNumber = countBranches; for(i = 0; i < tr->numBranches; i++) bInf[countBranches].epa->branchLengths[i] = p->z[i]; #ifdef _USE_PTHREADS if(!p->x) newviewGeneric(tr, p); if(!isTip(p->back->number, tr->mxtips)) { if(!p->back->x) newviewGeneric(tr, p->back); } masterBarrier(THREAD_GATHER_LIKELIHOOD, tr); #endif tr->branchCounter = tr->branchCounter + 1; q = p->next; while(q != p) { setupBranchMetaInfo(tr, q->back, nTips, bInf); q = q->next; } return; } } static void setupJointFormat(tree *tr, nodeptr p, int ntips, branchInfo *bInf, int *count) { if(isTip(p->number, tr->mxtips)) { p->bInf->epa->jointLabel = *count; *count = *count + 1; return; } else { setupJointFormat(tr, p->next->back, ntips, bInf, count); setupJointFormat(tr, p->next->next->back, ntips, bInf, count); p->bInf->epa->jointLabel = *count; *count = *count + 1; return; } } static void setupBranchInfo(tree *tr, nodeptr q) { nodeptr originalNode = tr->nodep[tr->mxtips + 1]; int count = 0; tr->branchCounter = 0; setupBranchMetaInfo(tr, q, tr->ntips, tr->bInf); assert(tr->branchCounter == tr->numberOfBranches); if(tr->wasRooted) { assert(tr->leftRootNode->back == tr->rightRootNode); assert(tr->leftRootNode == tr->rightRootNode->back); if(!isTip(tr->leftRootNode->number, tr->mxtips)) { setupJointFormat(tr, tr->leftRootNode->next->back, tr->ntips, tr->bInf, &count); setupJointFormat(tr, tr->leftRootNode->next->next->back, tr->ntips, tr->bInf, &count); } tr->leftRootNode->bInf->epa->jointLabel = count; tr->rootLabel = count; count = count + 1; if(!isTip(tr->rightRootNode->number, tr->mxtips)) { setupJointFormat(tr, tr->rightRootNode->next->back, tr->ntips, tr->bInf, &count); setupJointFormat(tr, tr->rightRootNode->next->next->back, tr->ntips, tr->bInf, &count); } } else { setupJointFormat(tr, originalNode->back, tr->ntips, tr->bInf, &count); setupJointFormat(tr, originalNode->next->back, tr->ntips, tr->bInf, &count); setupJointFormat(tr, originalNode->next->next->back, tr->ntips, tr->bInf, &count); } assert(count == tr->numberOfBranches); } static int infoCompare(const void *p1, const void *p2) { info *rc1 = (info *)p1; info *rc2 = (info *)p2; double i = rc1->lh; double j = rc2->lh; if (i > j) return (-1); if (i < j) return (1); return (0); } static void consolidateInfoMLHeuristics(tree *tr, int throwAwayStart) { int i, j; info *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); assert(tr->useEpaHeuristics); for(j = 0; j < tr->numberOfTipsForInsertion; j++) { for(i = 0; i < tr->numberOfBranches; i++) { inf[i].lh = tr->bInf[i].epa->likelihoods[j]; inf[i].number = i; } qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); for(i = throwAwayStart; i < tr->numberOfBranches; i++) tr->bInf[inf[i].number].epa->executeThem[j] = 0; } for(i = 0; i < tr->numberOfBranches; i++) for(j = 0; j < tr->numberOfTipsForInsertion; j++) tr->bInf[i].epa->likelihoods[j] = unlikely; rax_free(inf); } static void consolidateInfo(tree *tr) { int i, j; for(j = 0; j < tr->numberOfTipsForInsertion; j++) { double max = unlikely; int max_index = -1; for(i = 0; i < tr->numberOfBranches; i++) { if(tr->bInf[i].epa->likelihoods[j] > max) { max = tr->bInf[i].epa->likelihoods[j]; max_index = i; } } assert(max_index >= 0); tr->bInf[max_index].epa->countThem[j] = tr->bInf[max_index].epa->countThem[j] + 1; } } #ifdef _PAVLOS static void printPerBranchReadAlignments(tree *tr) { int i, j; for(j = 0; j < tr->numberOfBranches; j++) { int readCounter = 0; for(i = 0; i < tr->numberOfTipsForInsertion; i++) if(tr->bInf[j].epa->countThem[i] > 0) readCounter++; if(readCounter > 0) { int l, w; char alignmentFileName[2048] = "", buf[64] = ""; FILE *af; strcpy(alignmentFileName, workdir); strcat(alignmentFileName, "RAxML_BranchAlignment.I"); sprintf(buf, "%d", j); strcat(alignmentFileName, buf); af = myfopen(alignmentFileName, "wb"); fprintf(af, "%d %d\n", readCounter, tr->rdta->sites); for(i = 0; i < tr->numberOfTipsForInsertion; i++) { if(tr->bInf[j].epa->countThem[i] > 0) { unsigned char *tip = tr->yVector[tr->inserts[i]]; fprintf(af, "%s ", tr->nameList[tr->inserts[i]]); for(l = 0; l < tr->cdta->endsite; l++) { for(w = 0; w < tr->cdta->aliaswgt[l]; w++) fprintf(af, "%c", getInverseMeaning(tr->dataVector[l], tip[l])); } fprintf(af, "\n"); } } fclose(af); printBothOpen("Branch read alignment for branch %d written to file %s\n", j, alignmentFileName); } } } #endif static void analyzeReads(tree *tr) { int i, *inserts = tr->inserts; tr->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)tr->numberOfTipsForInsertion); for(i = 0; i < tr->numberOfTipsForInsertion; i++) { size_t j; int whichPartition = -1, partitionCount = 0, model, nodeNumber = tr->nodep[inserts[i]]->number; unsigned char *tipX1 = tr->yVector[nodeNumber]; for(model = 0; model < tr->NumberOfModels; model++) { int nonGap = 0; unsigned char undetermined = getUndetermined(tr->partitionData[model].dataType); for(j = tr->partitionData[model].lower; j < tr->partitionData[model].upper; j++) { if(tipX1[j] != undetermined) nonGap++; } if(nonGap > 0) { partitionCount++; whichPartition = model; } } assert(partitionCount == 1); assert(whichPartition >= 0 && whichPartition < tr->NumberOfModels); tr->readPartition[i] = whichPartition; } } void classifyML(tree *tr, analdef *adef) { int i, j, *perm; nodeptr r, q; char entropyFileName[1024], jointFormatTreeFileName[1024], labelledTreeFileName[1024], originalLabelledTreeFileName[1024], classificationFileName[1024]; FILE *entropyFile, *treeFile, *classificationFile; adef->outgroup = FALSE; tr->doCutoff = FALSE; assert(adef->restart); tr->numberOfBranches = 2 * tr->ntips - 3; if(tr->perPartitionEPA) printBothOpen("\nRAxML Evolutionary Placement Algorithm for partitioned multi-gene datasets (experimental version)\n"); else printBothOpen("\nRAxML Evolutionary Placement Algorithm\n"); if(adef->useBinaryModelFile) { evaluateGenericInitrav(tr, tr->start); //treeEvaluate(tr, 2); } else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, 1.0); } printBothOpen("\nLikelihood of reference tree: %f\n\n", tr->likelihood); perm = (int *)rax_calloc(tr->mxtips + 1, sizeof(int)); tr->inserts = (int *)rax_calloc(tr->mxtips, sizeof(int)); markTips(tr->start, perm, tr->mxtips); markTips(tr->start->back, perm ,tr->mxtips); tr->numberOfTipsForInsertion = 0; for(i = 1; i <= tr->mxtips; i++) { if(perm[i] == 0) { tr->inserts[tr->numberOfTipsForInsertion] = i; tr->numberOfTipsForInsertion = tr->numberOfTipsForInsertion + 1; } } rax_free(perm); printBothOpen("RAxML will place %d Query Sequences into the %d branches of the reference tree with %d taxa\n\n", tr->numberOfTipsForInsertion, (2 * tr->ntips - 3), tr->ntips); assert(tr->numberOfTipsForInsertion == (tr->mxtips - tr->ntips)); tr->bInf = (branchInfo*)rax_malloc(tr->numberOfBranches * sizeof(branchInfo)); for(i = 0; i < tr->numberOfBranches; i++) { tr->bInf[i].epa = (epaBranchData*)rax_malloc(sizeof(epaBranchData)); tr->bInf[i].epa->countThem = (int*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(int)); tr->bInf[i].epa->executeThem = (int*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(int)); for(j = 0; j < tr->numberOfTipsForInsertion; j++) tr->bInf[i].epa->executeThem[j] = 1; tr->bInf[i].epa->branches = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); tr->bInf[i].epa->distalBranches = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); tr->bInf[i].epa->likelihoods = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); tr->bInf[i].epa->branchNumber = i; sprintf(tr->bInf[i].epa->branchLabel, "I%d", i); } r = tr->nodep[(tr->nextnode)++]; q = findAnyTip(tr->start, tr->rdta->numsp); assert(isTip(q->number, tr->rdta->numsp)); assert(!isTip(q->back->number, tr->rdta->numsp)); q = q->back; if(tr->perPartitionEPA) analyzeReads(tr); #ifdef _USE_PTHREADS tr->contiguousVectorLength = getContiguousVectorLength(tr); tr->contiguousScalingLength = getContiguousScalingLength(tr); allocBranchX(tr); masterBarrier(THREAD_INIT_EPA, tr); #endif setupBranchInfo(tr, q); #ifdef _USE_PTHREADS masterBarrier(THREAD_FREE_VECTORS, tr); #endif if(tr->useEpaHeuristics) { int heuristicInsertions = MAX(5, (int)(0.5 + (double)(tr->numberOfBranches) * tr->fastEPAthreshold)); printBothOpen("EPA heuristics: determining %d out of %d most promising insertion branches\n", heuristicInsertions, tr->numberOfBranches); #ifdef _USE_PTHREADS NumberOfJobs = tr->numberOfBranches; masterBarrier(THREAD_INSERT_CLASSIFY, tr); #else addTraverseRob(tr, r, q, FALSE); #endif consolidateInfoMLHeuristics(tr, heuristicInsertions); } #ifdef _USE_PTHREADS NumberOfJobs = tr->numberOfBranches; masterBarrier(THREAD_INSERT_CLASSIFY_THOROUGH, tr); #else addTraverseRob(tr, r, q, TRUE); #endif consolidateInfo(tr); printBothOpen("Overall Classification time: %f\n\n", gettime() - masterTime); #ifdef _PAVLOS assert(adef->compressPatterns == FALSE); printPerBranchReadAlignments(tr); #endif strcpy(entropyFileName, workdir); strcpy(jointFormatTreeFileName, workdir); strcpy(labelledTreeFileName, workdir); strcpy(originalLabelledTreeFileName, workdir); strcpy(classificationFileName, workdir); strcat(entropyFileName, "RAxML_entropy."); strcat(jointFormatTreeFileName, "RAxML_portableTree."); strcat(labelledTreeFileName, "RAxML_labelledTree."); strcat(originalLabelledTreeFileName, "RAxML_originalLabelledTree."); strcat(classificationFileName, "RAxML_classification."); strcat(entropyFileName, run_id); strcat(jointFormatTreeFileName, run_id); strcat(labelledTreeFileName, run_id); strcat(originalLabelledTreeFileName, run_id); strcat(classificationFileName, run_id); strcat(jointFormatTreeFileName, ".jplace"); rax_free(tr->tree_string); tr->treeStringLength *= 16; tr->tree_string = (char*)rax_calloc(tr->treeStringLength, sizeof(char)); treeFile = myfopen(labelledTreeFileName, "wb"); Tree2StringClassify(tr->tree_string, tr, tr->inserts, FALSE, FALSE, TRUE); fprintf(treeFile, "%s\n", tr->tree_string); fclose(treeFile); treeFile = myfopen(originalLabelledTreeFileName, "wb"); Tree2StringClassify(tr->tree_string, tr, tr->inserts, TRUE, FALSE, TRUE); fprintf(treeFile, "%s\n", tr->tree_string); fclose(treeFile); /* JSON format only works for sequential version porting this to Pthreads will be a pain in the ass */ treeFile = myfopen(jointFormatTreeFileName, "wb"); Tree2StringClassify(tr->tree_string, tr, tr->inserts, TRUE, TRUE, TRUE); fprintf(treeFile, "{\n"); fprintf(treeFile, "\t\"tree\": \"%s\", \n", tr->tree_string); fprintf(treeFile, "\t\"placements\": [\n"); { info *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); for(i = 0; i < tr->numberOfTipsForInsertion; i++) { double all, maxprob = 0.0, lmax = 0.0, acc = 0.0; int k, validEntries = 0; for(j = 0; j < tr->numberOfBranches; j++) { inf[j].lh = tr->bInf[j].epa->likelihoods[i]; inf[j].pendantBranch = tr->bInf[j].epa->branches[i]; inf[j].distalBranch = tr->bInf[j].epa->distalBranches[i]; inf[j].number = tr->bInf[j].epa->jointLabel; } qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); for(j = 0; j < tr->numberOfBranches; j++) if(inf[j].lh == unlikely) break; else validEntries++; assert(validEntries > 0); j = 0; lmax = inf[0].lh; for(k = 0, all = 0.0; k < validEntries; k++) all += exp(inf[k].lh - lmax); fprintf(treeFile, "\t{\"p\":["); /* Erick's cutoff: I keep at most 7 placements and throw away anything that has less than 0.01*best_ml_ratio. my old cutoff was at 0.95 accumulated likelihood weight: while(acc <= 0.95) */ /*#define _ALL_ENTRIES*/ #ifdef _ALL_ENTRIES assert(validEntries == tr->numberOfBranches); while(j < validEntries) #else while(j < validEntries && j < 7) #endif { double prob = 0.0; acc += (prob = (exp(inf[j].lh - lmax) / all)); if(j == 0) maxprob = prob; #ifndef _ALL_ENTRIES if(prob >= maxprob * 0.01) #endif { if(j > 0) { if(tr->wasRooted && inf[j].number == tr->rootLabel) { double b = getBranch(tr, tr->leftRootNode->z, tr->rightRootNode->z); if(inf[j].distalBranch > 0.5 * b) fprintf(treeFile, ",[%d, %f, %f, %f, %f]", tr->numberOfBranches, inf[j].lh, prob, inf[j].distalBranch - 0.5 * b, inf[j].pendantBranch); else fprintf(treeFile, ",[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, 0.5 * b - inf[j].distalBranch, inf[j].pendantBranch); } else fprintf(treeFile, ",[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, inf[j].distalBranch, inf[j].pendantBranch); } else { if(tr->wasRooted && inf[j].number == tr->rootLabel) { double b = getBranch(tr, tr->leftRootNode->z, tr->rightRootNode->z); if(inf[j].distalBranch > 0.5 * b) fprintf(treeFile, "[%d, %f, %f, %f, %f]", tr->numberOfBranches, inf[j].lh, prob, inf[j].distalBranch - 0.5 * b, inf[j].pendantBranch); else fprintf(treeFile, "[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, 0.5 * b - inf[j].distalBranch, inf[j].pendantBranch); } else fprintf(treeFile, "[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, inf[j].distalBranch, inf[j].pendantBranch); } } j++; } if(i == tr->numberOfTipsForInsertion - 1) fprintf(treeFile, "], \"n\":[\"%s\"]}\n", tr->nameList[tr->inserts[i]]); else fprintf(treeFile, "], \"n\":[\"%s\"]},\n", tr->nameList[tr->inserts[i]]); } rax_free(inf); } #ifdef _ALL_ENTRIES assert(j == tr->numberOfBranches); #endif fprintf(treeFile, "\t ],\n"); fprintf(treeFile, "\t\"metadata\": {\"invocation\": "); fprintf(treeFile, "\""); { int i; for(i = 0; i < globalArgc; i++) fprintf(treeFile,"%s ", globalArgv[i]); } fprintf(treeFile, "\", \"raxml_version\": \"%s\"", programVersion); fprintf(treeFile,"},\n"); fprintf(treeFile, "\t\"version\": 2,\n"); fprintf(treeFile, "\t\"fields\": [\n"); fprintf(treeFile, "\t\"edge_num\", \"likelihood\", \"like_weight_ratio\", \"distal_length\", \n"); fprintf(treeFile, "\t\"pendant_length\"\n"); fprintf(treeFile, "\t]\n"); fprintf(treeFile, "}\n"); fclose(treeFile); /* JSON format end */ classificationFile = myfopen(classificationFileName, "wb"); for(i = 0; i < tr->numberOfTipsForInsertion; i++) for(j = 0; j < tr->numberOfBranches; j++) { if(tr->bInf[j].epa->countThem[i] > 0) fprintf(classificationFile, "%s I%d %d %8.20f\n", tr->nameList[tr->inserts[i]], j, tr->bInf[j].epa->countThem[i], tr->bInf[j].epa->branches[i] / (double)(tr->bInf[j].epa->countThem[i])); } fclose(classificationFile); printBothOpen("\n\nLabelled reference tree including branch labels and query sequences written to file: %s\n\n", labelledTreeFileName); printBothOpen("Labelled reference tree with branch labels (without query sequences) written to file: %s\n\n", originalLabelledTreeFileName); printBothOpen("Labelled reference tree with branch labels in portable pplacer/EPA format (without query sequences) written to file: %s\n\n", jointFormatTreeFileName); printBothOpen("Classification result file written to file: %s\n\n", classificationFileName); { info *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); FILE *likelihoodWeightsFile; char likelihoodWeightsFileName[1024]; strcpy(likelihoodWeightsFileName, workdir); strcat(likelihoodWeightsFileName, "RAxML_classificationLikelihoodWeights."); strcat(likelihoodWeightsFileName, run_id); likelihoodWeightsFile = myfopen(likelihoodWeightsFileName, "wb"); entropyFile = myfopen(entropyFileName, "wb"); for(i = 0; i < tr->numberOfTipsForInsertion; i++) { double all, entropy = 0.0, lmax = 0.0, acc = 0.0; int k, validEntries = 0; for(j = 0; j < tr->numberOfBranches; j++) { inf[j].lh = tr->bInf[j].epa->likelihoods[i]; inf[j].number = j; } qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); for(j = 0; j < tr->numberOfBranches; j++) if(inf[j].lh == unlikely) break; else validEntries++; assert(validEntries > 0); j = 0; lmax = inf[0].lh; for(k = 0, all = 0.0; k < validEntries; k++) all += exp(inf[k].lh - lmax); while(acc <= 0.95 && j < validEntries) { double prob = 0.0; acc += (prob = (exp(inf[j].lh - lmax) / all)); fprintf(likelihoodWeightsFile, "%s I%d %f %f\n", tr->nameList[tr->inserts[i]], inf[j].number, prob, acc); j++; } j = 0; while(j < validEntries) { double prob = 0.0; prob = exp(inf[j].lh - lmax) / all; if(prob > 0) entropy -= ( prob * log(prob) ); /*pp 20110531 */ j++; } /* normalize entropy by dividing with the log(validEntries) which is the maximum Entropy possible */ fprintf(entropyFile, "%s\t%f\n", tr->nameList[tr->inserts[i]], entropy / log((double)validEntries)); } rax_free(inf); fclose(entropyFile); fclose(likelihoodWeightsFile); printBothOpen("Classification result file using likelihood weights written to file: %s\n\n", likelihoodWeightsFileName); printBothOpen("Classification entropy result file using likelihood weights written to file: %s\n\n", entropyFileName); } exit(0); }