Content - 5d711ee83916e6cf935369af53021111f82ef204 - 610f929/parsePartitions.c

visit type:
Tip revision: 69a7edcba961f95f732e07ce64e7e5b1c7ae548b authored by Alexis Stamatakis on 04 October 2023, 07:33:55 UTC
Merge pull request #71 from martin-g/linux-arm64-support-2
Tip revision: 69a7edc
parsePartitions.c
/*  RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees 
 *  Copyright August 2006 by Alexandros Stamatakis
 *
 *  Partially derived from
 *  fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
 *  
 *  and 
 *
 *  Programs of the PHYLIP package by Joe Felsenstein.
 *
 *  This program is free software; you may redistribute it and/or modify its
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 * 
 *
 *  For any other enquiries send an Email to Alexandros Stamatakis
 *  Alexandros.Stamatakis@epfl.ch
 *
 *  When publishing work that is based on the results from RAxML-VI-HPC please cite:
 *
 *  Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". 
 *  Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
 */


#ifndef WIN32
#include <sys/times.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h> 
#endif

#include <math.h>
#include <time.h> 
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <strings.h>

#include "axml.h"

/*****************************FUNCTIONS FOR READING MULTIPLE MODEL SPECIFICATIONS************************************************/


extern char modelFileName[1024];
extern char excludeFileName[1024];
extern char secondaryStructureFileName[1024];


extern char seq_file[1024];

extern char *protModels[NUM_PROT_MODELS];

static boolean lineContainsOnlyWhiteChars(char *line)
{
  int i, n = strlen(line);

  if(n == 0)
    return TRUE;

  for(i = 0; i < n; i++)
    {
      if(!whitechar(line[i]))
	return FALSE;
    }
  return TRUE;
}


static int isNum(char c)
{
  
  return (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' ||
	  c == '5' || c == '6' || c == '7' || c == '8' || c == '9');
}


static void skipWhites(char **ch)
{
  while(**ch == ' ' || **ch == '\t')
    *ch = *ch + 1;
}

static void analyzeIdentifier(char **ch, int modelNumber, tree *tr)
{
  char 
    *start = *ch,
    ident[2048] = "",
    model[2048] = "",
    thisModel[2048] = "";
  
  int   
    i = 0, 
    n, 
    j,
    containsComma = 0;

  while(**ch != '=')
    {
      if(**ch == '\n' || **ch == '\r')
	{
	  printf("\nPartition file parsing error!\n");
	  printf("Each line must contain a \"=\" character\n");
	  printf("Offending line: %s\n", start);
	  printf("RAxML will exit now.\n\n");
	  errorExit(-1);
	}

      if(**ch != ' ' && **ch != '\t')
	{
	  ident[i] = **ch;      
	  i++;
	}      
      *ch = *ch + 1;
    }
  
  ident[i] = '\0';

  n = i;
  i = 0;
  
  

  for(i = 0; i < n; i++)
    if(ident[i] == ',') 
      containsComma = 1;

  if(!containsComma)
    {
      printf("Error, model file must have format: Substitution model, then a comma, and then the partition name\n");
      exit(-1);
    }
  else
    {
      boolean 
	analyzeRest = TRUE,
	useExternalFile = FALSE,
	found = FALSE;
      
      int 
	openBracket = 0,
	closeBracket = 0,
	openPos = 0,
	closePos = 0;
            
      i = 0;
      
      while(ident[i] != ',')
	{	 
	  if(ident[i] == '[')
	    {
	      openPos = i;
	      openBracket++;
	    }
	  if(ident[i] == ']')
	    {
	      closePos = i;
	      closeBracket++;
	    }
	  model[i] = ident[i];
	  i++;
	}     

      if(closeBracket > 0 || openBracket > 0)
	{
	  if((closeBracket == 1) && (openBracket == 1) && (openPos < closePos))
	    useExternalFile = TRUE;
	  else
	    {
	      printf("\nError: Apparently you want to specify a user-defined protein substitution model\n");
	      printf("or ascertainment bias correction model that shall be read from file\n");
	      printf("It must be enclosed in opening and closing bracktes like this: [prot=fileName] or [asc=fileName]\n\n");
	      printf("you specified: %s\n\n", model);
	      exit(-1);
	    }
	}
      
      if(useExternalFile)
	{
	  char
	    designator[2048] = "",
	    fileName[2048] = "";

	  int 
	    pos,	   
	    index,
	    lower = 0,
	    upper = i - 1;	       
	  
	  boolean
	    isProteinFile   = TRUE;

	  while(model[lower] == '[')
	    lower++;

	  while(model[upper] == ']')
	    upper--;
	  
	  assert(lower < upper);

	  index = lower;
	  pos = 0;	  
	  
	  while(model[index] != '~')
	    {
	      assert(index <= upper);
	      designator[pos] = model[index];
	      pos++;
	      index++;
	    }

	  designator[pos] = '\0';
	  
	  if(strcmp(designator, "asc") == 0)	    
	    isProteinFile = FALSE;	    
	  else
	    {
	      if(strcmp(designator, "prot") == 0)
		isProteinFile = TRUE;
	      else
		{
		  printf("Error external partition file type %s does not exist\n", designator);
		  printf("Available file types: asc and prot\n");
		  exit(-1);
		}	       
	    }	    	 
	  
	  while(model[index] == '~')	    	     
	    index++;	   

	  pos = 0;
	  
	  while(model[index] != ']')
	    {
	      fileName[pos] = model[index];
	      index++;
	      pos++;
	    }	    	  
	  
	  fileName[pos] = '\0';	 

	  if(!filexists(fileName))
	    {
	      printf("\n\ncustom protein substitution or ascertainment bias file [%s] you want to use does not exist!\n", fileName);
	      printf("you need to specify the full path\n");
	      printf("the file name shall not contain blanks!\n\n");
	      exit(-1);
	    }
	  
	  
	  if(isProteinFile)
	    {
	      strcpy(tr->initialPartitionData[modelNumber].proteinSubstitutionFileName, fileName);
	      /*printf("%s \n", tr->initialPartitionData[modelNumber].proteinSubstitutionFileName);*/
	      
	      tr->initialPartitionData[modelNumber].protModels = PROT_FILE;		  
	      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = TRUE;
	      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;
	      analyzeRest = FALSE;
	    }
	  else
	    {
	      int 
		newIndex = 0;
	      	      
	      strcpy(tr->initialPartitionData[modelNumber].ascFileName, fileName); 
	      
	      i = 0;
	      
	      while(ident[i] != ',')
		{
		  if(ident[i] == '\0')
		    {
		      printf("Expecting two commas in string %s\n", ident);
		      exit(-1);
		    }
		  i++;
		}
	      
	      i++;	     

	      while(ident[i] != ',')
		{
		  if(ident[i] == '\0')
		    {
		      printf("Expecting two commas in string %s\n", ident);
		      exit(-1);
		    }
		  model[newIndex] = ident[i];
		  i++;
		  newIndex++;
		}
		    	     
	      model[newIndex] = '\0';
	    }	 
	}
      
      if(analyzeRest)
	{
	  /* AA */
	  tr->initialPartitionData[modelNumber].ascBias = FALSE;
	   
	  for(i = 0; i < NUM_PROT_MODELS && !found; i++)
	    {	
	      strcpy(thisModel, protModels[i]);
	      
	      if(strcasecmp(model, thisModel) == 0)
		{	      	      
		  tr->initialPartitionData[modelNumber].protModels = i;		  
		  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs = TRUE;
		  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		  
		  found = TRUE;
		}
	      
	      if(!found)
		{
		  if(i != GTR && i != GTR_UNLINKED)
		    {
		      strcpy(thisModel, protModels[i]);
		      strcat(thisModel, "F");
		      
		      if(strcasecmp(model, thisModel) == 0)
			{	      
			  tr->initialPartitionData[modelNumber].protModels = i;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			  found = TRUE;

			  if(tr->initialPartitionData[modelNumber].protModels == AUTO)
			    {
			      printf("\nError: Option AUTOF has been deprecated, exiting\n\n");
			      errorExit(-1);
			    }
			}

		     
		    }
		}
	      
	      
	      if(!found)
		{		  
		  strcpy(thisModel, protModels[i]);
		  strcat(thisModel, "X");
		      
		  if(strcasecmp(model, thisModel) == 0)
		    {	      
		      tr->initialPartitionData[modelNumber].protModels = i;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
		      found = TRUE;
		      
		      if(tr->initialPartitionData[modelNumber].protModels == AUTO)
			{
			  printf("\nError: Option AUTOX has been deprecated, exiting\n\n");
			  errorExit(-1);
			}
		    }			  

		 
		}
	      

	      if(found && (tr->initialPartitionData[modelNumber].protModels == GTR || tr->initialPartitionData[modelNumber].protModels == GTR_UNLINKED))
		tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;		    		
	    }

	   /* AA with Asc bias*/
	  
	  if(!found)
	    {
	      for(i = 0; i < NUM_PROT_MODELS && !found; i++)
		{	
		  strcpy(thisModel, "ASC_");
		  strcat(thisModel, protModels[i]);
		  
		  if(strcasecmp(model, thisModel) == 0)
		    {	      	      
		      tr->initialPartitionData[modelNumber].protModels = i;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs = TRUE;
		      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		  
		      found = TRUE;
		    }
		  
		  if(!found)
		    {
		      if(i != GTR && i != GTR_UNLINKED)
			{
			  strcpy(thisModel, protModels[i]);
			  strcat(thisModel, "F");
			  
			  if(strcasecmp(model, thisModel) == 0)
			    {	      
			      tr->initialPartitionData[modelNumber].protModels = i;		  
			      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			      found = TRUE;
			    }
			}
		    }
		  
	      
		  if(!found)
		    {		  
		      strcpy(thisModel, protModels[i]);
		      strcat(thisModel, "X");
		      
		      if(strcasecmp(model, thisModel) == 0)
			{	      
			  tr->initialPartitionData[modelNumber].protModels = i;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			  found = TRUE;
			}		
		    }
		  
		  if(found)
		    tr->initialPartitionData[modelNumber].ascBias = TRUE;

		  if(found && (tr->initialPartitionData[modelNumber].protModels == GTR || tr->initialPartitionData[modelNumber].protModels == GTR_UNLINKED))
		    tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;		    		
		}
	    }
	  

	  
	  if(!found)
	    {		  	  
	      if(strcasecmp(model, "DNA") == 0 || strcasecmp(model, "DNAX") == 0 || strcasecmp(model, "ASC_DNA") == 0 || strcasecmp(model, "ASC_DNAX") == 0)
		{ 
		  tr->initialPartitionData[modelNumber].protModels = -1;		  
		  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		  tr->initialPartitionData[modelNumber].dataType   = DNA_DATA;

		  if(strcasecmp(model, "DNAX") == 0 || strcasecmp(model, "ASC_DNAX") == 0)
		    {
		      if(strcasecmp(model, "ASC_DNAX") == 0)
			tr->initialPartitionData[modelNumber].ascBias = TRUE;
		      else
			tr->initialPartitionData[modelNumber].ascBias = FALSE;
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
		    }
		  else
		    {
		     
		      if(strcasecmp(model, "ASC_DNA") == 0)
			tr->initialPartitionData[modelNumber].ascBias = TRUE;
		      else
			tr->initialPartitionData[modelNumber].ascBias = FALSE;
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
		    }
		      
		  
		  found = TRUE;
		}	  
	      else
		{	    	  
		  if(strcasecmp(model, "BIN") == 0 || strcasecmp(model, "BINX") == 0 || strcasecmp(model, "ASC_BIN") == 0 || strcasecmp(model, "ASC_BINX") == 0)
		    {	     	      
		      tr->initialPartitionData[modelNumber].protModels = -1;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		      tr->initialPartitionData[modelNumber].dataType   = BINARY_DATA;
		      
		      if(strcasecmp(model, "BINX") == 0 || strcasecmp(model, "ASC_BINX") == 0)
			{
			  if(strcasecmp(model, "ASC_BINX") == 0)
			    tr->initialPartitionData[modelNumber].ascBias = TRUE;
			  else
			    tr->initialPartitionData[modelNumber].ascBias = FALSE;
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			}
		      else
			{
			  if(strcasecmp(model, "ASC_BIN") == 0)
			    tr->initialPartitionData[modelNumber].ascBias = TRUE;
			  else
			    tr->initialPartitionData[modelNumber].ascBias = FALSE;
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
			}
		      
		      found = TRUE;
		    }
		  else
		    {
		      if(strcasecmp(model, "MULTI") == 0 || strcasecmp(model, "MULTIX") == 0 || strcasecmp(model, "ASC_MULTI") == 0 || strcasecmp(model, "ASC_MULTIX") == 0)
			{	     	      
			  tr->initialPartitionData[modelNumber].protModels = -1;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = GENERIC_32;

			  if(strcasecmp(model, "MULTIX") == 0 || strcasecmp(model, "ASC_MULTIX") == 0)
			    {
			      if(strcasecmp(model, "ASC_MULTIX") == 0)
				tr->initialPartitionData[modelNumber].ascBias = TRUE;
			      else
				tr->initialPartitionData[modelNumber].ascBias = FALSE;
			      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			    }
			  else
			    {
			      if(strcasecmp(model, "ASC_MULTI") == 0)
				tr->initialPartitionData[modelNumber].ascBias = TRUE;
			      else
				tr->initialPartitionData[modelNumber].ascBias = FALSE;
			      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
			    }
			  
			  found = TRUE;
			}
		      else
			{
			  if(strcasecmp(model, "CODON") == 0 || strcasecmp(model, "CODONX") == 0 || strcasecmp(model, "ASC_CODON") == 0 || strcasecmp(model, "ASC_CODONX") == 0)
			    {	     	      
			      tr->initialPartitionData[modelNumber].protModels = -1;		  
			      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			      tr->initialPartitionData[modelNumber].dataType   = GENERIC_64;
			      
			      if(strcasecmp(model, "CODONX") == 0 || strcasecmp(model, "ASC_CODONX") == 0)
				{
				  if(strcasecmp(model, "ASC_CODONX") == 0)
				    tr->initialPartitionData[modelNumber].ascBias = TRUE;
				  else
				    tr->initialPartitionData[modelNumber].ascBias = FALSE;
				  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
				}
			      else
				{
				  if(strcasecmp(model, "ASC_CODON") == 0)
				    tr->initialPartitionData[modelNumber].ascBias = TRUE;
				  else
				    tr->initialPartitionData[modelNumber].ascBias = FALSE;
				  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
				}
			     
			      
			      found = TRUE;
			    }
			}
		    }
		  
		}
	    }

	  if(!found)
	    {
	      printf("ERROR: you specified the unknown model %s for partition %d\n", model, modelNumber);
	      exit(-1);
	    }
	}
	  
      i = 0;
      while(ident[i++] != ',');      
	  
      tr->initialPartitionData[modelNumber].partitionName = (char*)rax_malloc((n - i + 1) * sizeof(char));          
	  
      j = 0;
      while(i < n)	
	tr->initialPartitionData[modelNumber].partitionName[j++] =  ident[i++];
      
      tr->initialPartitionData[modelNumber].partitionName[j] = '\0';                      
    }
}



static void setModel(int model, int position, int *a)
{
  if(a[position] == -1)
    a[position] = model;
  else
    {
      printf("ERROR trying to assign model %d to position %d \n", model, position);
      printf("while already model %d has been assigned to this position\n", a[position]);
      exit(-1);
    }      
}


static int myGetline(char **lineptr, int *n, FILE *stream)
{
  char *line, *p;
  int size, copy, len;
  int chunkSize = 256 * sizeof(char);

   if (*lineptr == NULL || *n < 2) 
    {
      line = (char *)rax_realloc(*lineptr, chunkSize, FALSE);
      if (line == NULL)
	return -1;
      *lineptr = line;
      *n = chunkSize;
    }

   line = *lineptr;
   size = *n;
  
   copy = size;
   p = line;
   
   while(1)
     {
       while (--copy > 0)
	 {
	   register int c = getc(stream);
	   if (c == EOF)
	     goto lose;
	   else
	     {
	       *p++ = c;
	       if(c == '\n' || c == '\r')	
		 goto win;
	     }
	 }

       /* Need to enlarge the line buffer.  */
       len = p - line;
       size *= 2;
       line = rax_realloc (line, size, FALSE);
       if (line == NULL)
	 goto lose;
       *lineptr = line;
       *n = size;
       p = line + len;
       copy = size - len;
     }
   
 lose:
  if (p == *lineptr)
    return -1;
  /* Return a partial line since we got an error in the middle.  */
 win:
  *p = '\0';
  return p - *lineptr;
}

static void nonContiguousError(analdef *adef)
{
  if(adef->compressPatterns == FALSE)
    {
      printf("\nError: You are not allowed to use interleaved partitions, that is, assign non-contiguous sites\n");
      printf("to the same partition model, when pattern compression is disabled via the -H flag,\n");
      printf("or when pattern compression is disabled implicitely by some other option that requires it!\n\n");
      exit(-1);
    }
}

void parsePartitions(analdef *adef, rawdata *rdta, tree *tr)
{
  FILE *f; 
  int numberOfModels = 0; 
  int nbytes = 0;
  char *ch;
  char *cc = (char *)NULL;
  char **p_names;
  int n, i, l;
  int lower, upper, modulo;
  char buf[256];
  int **partitions;
  int pairsCount;
  int as, j;
  int k; 

  f = myfopen(modelFileName, "rb");   

 
  while(myGetline(&cc, &nbytes, f) > -1)
    {     
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  numberOfModels++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }     
  
  rewind(f);
      
  p_names = (char **)rax_malloc(sizeof(char *) * numberOfModels);
  partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels);
      
 
  
  tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * numberOfModels);

      
  for(i = 0; i < numberOfModels; i++) 
    {     
      tr->initialPartitionData[i].protModels = adef->proteinMatrix;
      tr->initialPartitionData[i].usePredefinedProtFreqs  = adef->protEmpiricalFreqs;
      tr->initialPartitionData[i].optimizeBaseFrequencies = FALSE;
      tr->initialPartitionData[i].dataType   = -1;
    }

  for(i = 0; i < numberOfModels; i++)    
    partitions[i] = (int *)NULL;
    
  i = 0;
  while(myGetline(&cc, &nbytes, f) > -1)
    {          
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  n = strlen(cc);	 
	  p_names[i] = (char *)rax_malloc(sizeof(char) * (n + 1));
	  strcpy(&(p_names[i][0]), cc);	 
	  i++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }         

  

  for(i = 0; i < numberOfModels; i++)
    {         
     
      ch = p_names[i];     
      pairsCount = 0;
      skipWhites(&ch);
      
      if(*ch == '=')
	{
	  printf("Identifier missing prior to '=' in %s\n", p_names[i]);
	  exit(-1);
	}
      
      analyzeIdentifier(&ch, i, tr);
      ch++;
            
    numberPairs:
      pairsCount++;
      partitions[i] = (int *)rax_realloc((void *)partitions[i], (1 + 3 * pairsCount) * sizeof(int), FALSE);
      partitions[i][0] = pairsCount;
      partitions[i][3 + 3 * (pairsCount - 1)] = -1; 	
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}   
      
      l = 0;
      while(isNum(*ch))		 
	{
	  /*printf("%c", *ch);*/
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      lower = atoi(buf);
      partitions[i][1 + 3 * (pairsCount - 1)] = lower;   
      
      skipWhites(&ch);
      
      /* NEW */
      
      if((*ch != '-') && (*ch != ','))
	{
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {
	      upper = lower;
	      goto SINGLE_NUMBER;
	    }
	  else
	    {
	      printf("'-' or ',' expected in %s\n", p_names[i]);
	      exit(-1);
	    }
	}	 
      
      if(*ch == ',')
	{	     
	  upper = lower;
	  nonContiguousError(adef);
	  goto SINGLE_NUMBER;
	}
      
      /* END NEW */
      
      ch++;   
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}    
      
      l = 0;
      while(isNum(*ch))
	{    
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      upper = atoi(buf);     
    SINGLE_NUMBER:
      partitions[i][2 + 3 * (pairsCount - 1)] = upper;        	  
      
      if(upper < lower)
	{
	  printf("Upper bound %d smaller than lower bound %d for this partition: %s\n", upper, lower,  p_names[i]);
	  exit(-1);
	}
      
      skipWhites(&ch);
      
      if(*ch == '\0' || *ch == '\n' || *ch == '\r') /* PC-LINEBREAK*/
	{    
	  goto parsed;
	}
      
      if(*ch == ',')
	{	 
	  ch++;
	  nonContiguousError(adef);
	  goto numberPairs;
	}
      
      if(*ch == '\\')
	{
	  ch++;
	  skipWhites(&ch);
	  
	  if(!isNum(*ch))
	    {
	      printf("%c Number expected in %s\n", *ch, p_names[i]);
	      exit(-1);
	    }     

	  nonContiguousError(adef);
	  
	  l = 0;
	  while(isNum(*ch))
	    {
	      buf[l] = *ch;
	      ch++;	
	      l++;
	    }
	  buf[l] = '\0';
	  modulo = atoi(buf);      
	  partitions[i][3 + 3 * (pairsCount - 1)] = modulo; 	
	  
	  skipWhites(&ch);
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {	     
	      goto parsed;
	    }

	  if(*ch == ',')
	    {	       
	      ch++;
	      nonContiguousError(adef);
	      goto numberPairs;
	    }
	}  
      
      if(*ch == '/')
	{
	  printf("\nRAxML detected the character \"/\" in your partition file.\n");
	  printf("Did you mean to write something similar to this: \"DNA, p1=1-100\\3\" ?\n");
	  printf("It's actually a backslash, not a slash, the program will exit now with an error!\n\n");
	}    
      else
	{ 	 
	  printf("\nRAxML detected the character \"%c\" in your partition file,\n", *ch);
	  printf("while it does not belong there!\n");
	  printf("\nAre you sure that your partition file complies with the RAxML partition file format?\n");
	  printf("\nActually reading the manual, does indeed do help a lot\n\n");
	  printf("The program will exit now with an error!\n\n");
	}

      printf("The problematic line in your partition file is this one here:\n\n");
	
      printf("%s\n\n", p_names[i]);

      assert(0);
       
    parsed:
      ;
    }
  
  fclose(f);
 
  /*********************************************************************************************************************/ 

  for(i = 0; i <= rdta->sites; i++)
    tr->model[i] = -1;
  
  for(i = 0; i < numberOfModels; i++)
    {   
      as = partitions[i][0];     
      
      for(j = 0; j < as; j++)
	{
	  lower = partitions[i][1 + j * 3];
	  upper = partitions[i][2 + j * 3]; 
	  modulo = partitions[i][3 + j * 3];	
	 
	  if(modulo == -1)
	    {
	      for(k = lower; k <= upper; k++)
		setModel(i, k, tr->model);
	    }
	  else
	    {
	      for(k = lower; k <= upper; k += modulo)
		{
		  if(k <= rdta->sites)
		    setModel(i, k, tr->model);	      
		}
	    }
	}        
    }


  for(i = 1; i < rdta->sites + 1; i++)
    {
      
      if(tr->model[i] == -1)
	{
	  printf("ERROR: Alignment Position %d has not been assigned any model\n", i);
	  exit(-1);
	}      
    }  

  for(i = 0; i < numberOfModels; i++)
    {
      rax_free(partitions[i]);
      rax_free(p_names[i]);
    }
  
  rax_free(partitions);
  rax_free(p_names);    
    
  tr->NumberOfModels = numberOfModels;     
  
  if(adef->perGeneBranchLengths)
    {
      if(tr->NumberOfModels > NUM_BRANCHES)
	{
	  printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels);
	  printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES);
	  printf("\n");
	  printf("In order to change this please replace the line \"#define NUM_BRANCHES   %d\" in file \"axml.h\" \n", NUM_BRANCHES);
	  printf("by \"#define NUM_BRANCHES   %d\" and then re-compile RAxML.\n", tr->NumberOfModels);
	  exit(-1);
	}
      else
	{
	  tr->multiBranch = 1;
	  tr->numBranches = tr->NumberOfModels;
	}
    }
}

/*******************************************************************************************************************************/

void handleExcludeFile(tree *tr, analdef *adef, rawdata *rdta)
{
  FILE *f;  
  char buf[256];
  int
    ch,
    j, value, i,
    state = 0,
    numberOfModels = 0,
    l = -1,
    excludeRegion   = 0,
    excludedColumns = 0,
    modelCounter    = 1;
  int
    *excludeArray, *countArray, *modelList;
  int
    **partitions;

  printf("\n\n");

  f = myfopen(excludeFileName, "rb");    

  while((ch = getc(f)) != EOF)
    {
      if(ch == '-')
	numberOfModels++;
    } 

  excludeArray = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
  countArray   = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
  modelList    = (int *)rax_malloc((rdta->sites + 1)* sizeof(int));

  partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels);  
  for(i = 0; i < numberOfModels; i++)
    partitions[i] = (int *)rax_malloc(sizeof(int) * 2);

  rewind(f);
  
  while((ch = getc(f)) != EOF)
    {     
      switch(state)
	{
	case 0: /* get first number */
	  if(!whitechar(ch))
	    {
	      if(!isNum(ch))
		{
		  printf("exclude file must have format: number-number [number-number]*\n");
		  exit(-1);
		}
	      l = 0;
	      buf[l++] = ch;
	      state = 1;
	    }
	  break;
	case 1: /*get the number or detect - */
	  if(!isNum(ch) && ch != '-')
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  if(isNum(ch))
	    {
	      buf[l++] = ch;
	    }
	  else
	    {
	      buf[l++] = '\0';	     
	      value = atoi(buf);
	      partitions[excludeRegion][0] = value;
	      state = 2;
	    }
	  break;
	case 2: /*get second number */
	  if(!isNum(ch))
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  l = 0;
	  buf[l++] = ch;
	  state = 3;
	  break;
	case 3: /* continue second number or find end */	 
	  if(!isNum(ch) && !whitechar(ch))
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  if(isNum(ch))
	    {
	      buf[l++] = ch;
	    }
	  else
	    {	      
	      buf[l++] = '\0';	     
	      value = atoi(buf);
	      partitions[excludeRegion][1] = value;
	      excludeRegion++;
	      state = 0;
	    }
	  break;
	default:
	  assert(0);
	}
    }
     
  if(state == 3)
    {
      buf[l++] = '\0';     
      value = atoi(buf);
      partitions[excludeRegion][1] = value;
      excludeRegion++;
    }
  
  assert(excludeRegion == numberOfModels);

  for(i = 0; i <= rdta->sites; i++)
    {
      excludeArray[i] = -1;
      countArray[i] = 0;      
      modelList[i] = -1;
    }  

  for(i = 0; i < numberOfModels; i++)
    {
      int lower = partitions[i][0];
      int upper = partitions[i][1];

      if(lower > upper)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("lower bound %d is greater than upper bound %d\n", lower, upper);
	  exit(-1);
	}

      if(lower == 0)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("lower bound must be greater than 0\n");
	  exit(-1);
	}

      if(upper > rdta->sites)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("upper bound %d must be smaller than %d\n", upper, (rdta->sites + 1));
	  exit(-1);
	}	
      for(j = lower; j <= upper; j++)
	{
	  if(excludeArray[j] != -1)
	    {
	      printf("WARNING: Exclude regions %d and %d overlap at position %d (already excluded %d times)\n", 
		     excludeArray[j], i, j, countArray[j]);
	    }
	  excludeArray[j] = i;
	  countArray[j]   =  countArray[j] + 1;	 
	}
    }

  for(i = 1; i <= rdta->sites; i++)
    {
      if(excludeArray[i] != -1)
	excludedColumns++;
      else
	{
	  modelList[modelCounter] = tr->model[i];
	  modelCounter++;
	}
    }

  printf("You have excluded %d out of %d columns\n", excludedColumns, rdta->sites);

  if(excludedColumns == rdta->sites)
    {
      printf("Error: You have excluded all sites\n");
      exit(-1);
    }

  if(adef->useSecondaryStructure && (excludedColumns > 0))
    {
      char mfn[2048];
      int countColumns;
      FILE *newFile;

      assert(adef->useMultipleModel);

      strcpy(mfn, secondaryStructureFileName);
      strcat(mfn, ".");
      strcat(mfn, excludeFileName);

      newFile = myfopen(mfn, "wb");

      printBothOpen("\nA secondary structure file with analogous structure assignments for non-excluded columns is printed to file %s\n", mfn);        	     	    
		  
      for(i = 1, countColumns = 0; i <= rdta->sites; i++)
	{		  
	  if(excludeArray[i] == -1)
	    fprintf(newFile, "%c", tr->secondaryStructureInput[i - 1]);
	  else
	    countColumns++;
	}
		  
      assert(countColumns == excludedColumns);
		  
      fprintf(newFile,"\n");
		  
      fclose(newFile);
    }


  if(adef->useMultipleModel && (excludedColumns > 0))
    {      
      char mfn[2048];
      FILE *newFile;

      strcpy(mfn, modelFileName);
      strcat(mfn, ".");
      strcat(mfn, excludeFileName);

      newFile = myfopen(mfn, "wb");

      printf("\nA partition file with analogous model assignments for non-excluded columns is printed to file %s\n", mfn);     
	      
      for(i = 0; i < tr->NumberOfModels; i++)
	{
	  boolean modelStillExists = FALSE;
		  
	  for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++)
	    {
	      if(modelList[j] == i)
		modelStillExists = TRUE;
	    }

	  if(modelStillExists)
	    {	  	      
	      int k = 1;
	      int lower, upper;
	      int parts = 0;

	      switch(tr->partitionData[i].dataType)
		{
		case AA_DATA:		      		     
		  {
		    char AAmodel[1024];
		    
		    if(tr->partitionData[i].ascBias)
		      {
			strcpy(AAmodel, "ASC_");
			strcat(AAmodel, protModels[tr->partitionData[i].protModels]);
		      }
		    else
		      strcpy(AAmodel, protModels[tr->partitionData[i].protModels]);
		    if(tr->partitionData[i].usePredefinedProtFreqs == FALSE)
		      strcat(AAmodel, "F");
		    if(tr->partitionData[i].optimizeBaseFrequencies)
		      strcat(AAmodel, "X");

		    assert(!(tr->partitionData[i].optimizeBaseFrequencies && tr->partitionData[i].usePredefinedProtFreqs));
		    
		    fprintf(newFile, "%s, ", AAmodel);
		  }
		  break;
		case DNA_DATA:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_DNAX, ");
		      else
			fprintf(newFile, "DNAX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_DNA, ");
		      else
			fprintf(newFile, "DNA, ");
		    }
		  break;
		case BINARY_DATA:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_BINX, ");
		      else
			fprintf(newFile, "BINX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_BIN, ");
		      else
			fprintf(newFile, "BIN, ");
		    }
		  break;
		case GENERIC_32:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_MULTIX, ");
		      else
			fprintf(newFile, "MULTIX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_MULTI, ");
		      else
			fprintf(newFile, "MULTI, ");
		    }
		  break;
		case GENERIC_64:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_CODONX, ");
		      else
			fprintf(newFile, "CODONX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_CODON, ");
		      else
			fprintf(newFile, "CODON, ");
		    }
		  break;
		default:
		  assert(0);
		}

	      fprintf(newFile, "%s = ", tr->partitionData[i].partitionName);
	      
	      while(k <= rdta->sites)
		{
		  if(modelList[k] == i)
		    {
		      lower = k;
		      while((modelList[k + 1] == i) && (k <= rdta->sites))		      			
			k++;
		      upper = k;
		      
		      if(lower == upper)		  
			{
			  if(parts == 0)
			    fprintf(newFile, "%d", lower);
			  else
			    fprintf(newFile, ",%d", lower);
			}
		      else
			{
			  if(parts == 0)
			    fprintf(newFile, "%d-%d", lower, upper);
			  else
			    fprintf(newFile, ",%d-%d", lower, upper);
			}		  
		      parts++;
		    }
		  k++;
		}
	      fprintf(newFile, "\n");
	    }		  
	}	
      fclose(newFile);
    }

  
  {
    FILE *newFile;
    char mfn[2048];
   

    strcpy(mfn, seq_file);
    strcat(mfn, ".");
    strcat(mfn, excludeFileName);
    
    newFile = myfopen(mfn, "wb");
    
    printf("\nAn alignment file with excluded columns is printed to file %s\n\n\n", mfn);
    
    fprintf(newFile, "%d %d\n", tr->mxtips, rdta->sites - excludedColumns);
    
    for(i = 1; i <= tr->mxtips; i++)
      {   
	unsigned char *tipI =  &(rdta->y[i][1]);
	fprintf(newFile, "%s ", tr->nameList[i]);
	
	for(j = 0; j < rdta->sites; j++)
	  {
	    if(excludeArray[j + 1] == -1)	      
	      fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j]));	       	  
	  }
	
	fprintf(newFile, "\n");
      }
    
    fclose(newFile);
  }

  
  fclose(f);
  for(i = 0; i < numberOfModels; i++)
    rax_free(partitions[i]);
  rax_free(partitions);  
  rax_free(excludeArray);
  rax_free(countArray);
  rax_free(modelList);
}


void parseProteinModel(double *externalAAMatrix, char *fileName)
{
  FILE 
    *f = myfopen(fileName, "rb"); 
  
  int 
    doublesRead = 0,
    result = 0,
    i, 
    j;
  
  double 
    acc = 0.0;

  printf("\nParsing user-defined protein model from file %s\n", fileName);  

  while(doublesRead < 420)
    {     
      result = fscanf(f, "%lf", &(externalAAMatrix[doublesRead++]));           

      if(result == EOF)
	{
	  printf("Error protein model file must consist of exactly 420 entries \n");
	  printf("The first 400 entries are for the rates of the AA matrix, while the\n");
	  printf("last 20 should contain the empirical base frequencies\n");
	  printf("Reached End of File after %d entries\n", (doublesRead - 1));
	  exit(-1);
	}    
    }
       
  fclose(f);

  /* CHECKS */
  for(i = 0; i < 20; i++)
    for(j = 0; j < 20; j++)
      {
	if(i != j)
	  {
	    if(externalAAMatrix[i * 20 + j] != externalAAMatrix[j * 20 + i])
	      {
		printf("Error user-defined Protein model matrix must be symmetric\n");
		printf("Entry P[%d][%d]=%f at position %d is not equal to P[%d][%d]=%f at position %d\n", 
		       i, j,  externalAAMatrix[i * 20 + j], (i * 20 + j),
		       j, i,  externalAAMatrix[j * 20 + i], (j * 20 + i));
		exit(-1);
	      }
	  }
      }

  acc = 0.0;

  for(i = 400; i < 420; i++)    
    acc += externalAAMatrix[i];         

  if((acc > 1.0 + 1.0E-6) || (acc <  1.0 - 1.0E-6))
    {
      printf("Base frequencies in user-defined AA substitution matrix do not sum to 1.0\n");
      printf("the sum is %1.80f\n", acc);
      exit(-1);
    }
}




void parseSecondaryStructure(tree *tr, analdef *adef, int sites)
{
  if(adef->useSecondaryStructure)
    {
      FILE *f = myfopen(secondaryStructureFileName, "rb");

      int
	i,
	k,
	countCharacters = 0,
	ch,
	*characters,
	**brackets,
	opening,
	closing,
	depth,
	numberOfSymbols,
	numSecondaryColumns;      

      unsigned char bracketTypes[4][2] = {{'(', ')'}, {'<', '>'},  {'[', ']'},  {'{', '}'}};          

      numberOfSymbols = 4;     

      tr->secondaryStructureInput = (char*)rax_malloc(sizeof(char) * sites);

      while((ch = fgetc(f)) != EOF)
	{
	  if(ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '.')
	    countCharacters++;
	  else
	    {
	      if(!whitechar(ch))
		{
		  printf("Secondary Structure file %s contains character %c at position %d\n", secondaryStructureFileName, ch, countCharacters + 1);
		  printf("Allowed Characters are \"( ) < > [ ] { } \" and \".\" \n");
		  errorExit(-1);
		}
	    }
	}
      
      if(countCharacters != sites)
	{
	  printf("Error: Alignment length is: %d, secondary structure file has length %d\n", sites, countCharacters);
	  errorExit(-1);
	}
    
      characters = (int*)rax_malloc(sizeof(int) * countCharacters); 

      brackets = (int **)rax_malloc(sizeof(int*) * numberOfSymbols);
      
      for(k = 0; k < numberOfSymbols; k++)	  
	brackets[k]   = (int*)rax_calloc(countCharacters, sizeof(int));

      rewind(f);

      countCharacters = 0;
      while((ch = fgetc(f)) != EOF)
	{
	  if(!whitechar(ch)) 
	    {
	      tr->secondaryStructureInput[countCharacters] = ch;
	      characters[countCharacters++] = ch;
	    }
	}
      
      assert(countCharacters == sites);

      for(k = 0; k < numberOfSymbols; k++)
	{
	  for(i = 0, opening = 0, closing = 0, depth = 0; i < countCharacters; i++)
	    {
	      if((characters[i] == bracketTypes[k][0] || characters[i] == bracketTypes[k][1]) && 
		 (tr->extendedDataVector[i+1] == AA_DATA || tr->extendedDataVector[i+1] == BINARY_DATA ||
		  tr->extendedDataVector[i+1] == GENERIC_32 || tr->extendedDataVector[i+1] == GENERIC_64))
		{
		  printf("Secondary Structure only for DNA character positions \n");
		  printf("I am at position %d of the secondary structure file and this is not part of a DNA partition\n", i+1);
		  errorExit(-1);
		}
	      
	      if(characters[i] == bracketTypes[k][0])
		{	      
		  depth++;
		  /*printf("%d %d\n", depth, i);*/
		  brackets[k][i] = depth;
		  opening++;
		}
	      if(characters[i] == bracketTypes[k][1])
		{	  
		  brackets[k][i] = depth; 
		  /*printf("%d %d\n", depth, i);  */
		  depth--;	
		  
		  closing++;
		}	  	  	  
	      
	      if(closing > opening)
		{
		  printf("at position %d there is a closing bracket too much\n", i+1);
		  errorExit(-1);
		}
	    }	

	  if(depth != 0)
	    {
	      printf("Problem: Depth: %d\n", depth);
	      printf("Your secondary structure file may be missing a closing or opening paraenthesis!\n");
	    }
	  assert(depth == 0);
	  
	  if(countCharacters != sites)
	    {
	      printf("Problem: sec chars: %d sites: %d\n",countCharacters, sites);
	      printf("The number of sites in the alignment does not match the length of the secondary structure file\n");
	    }
	  assert(countCharacters == sites);
	
      
	  if(closing != opening)
	    {
	      printf("Number of opening brackets %d should be equal to number of closing brackets %d\n", opening, closing);
	      errorExit(-1);
	    }
	}
      
      for(i = 0, numSecondaryColumns = 0; i < countCharacters; i++)
	{
	  int checkSum = 0;

	  for(k = 0; k < numberOfSymbols; k++)
	    {
	      if(brackets[k][i] > 0)
		{
		  checkSum++;
		  
		  switch(tr->secondaryStructureModel)
		    {
		    case SEC_16:
		    case SEC_16_A:
		    case SEC_16_B:
		    case SEC_16_C:
		    case SEC_16_D:
		    case SEC_16_E:
		    case SEC_16_F:
		    case SEC_16_I:
		    case SEC_16_J:
		    case SEC_16_K:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA;
		      break;
		    case SEC_6_A:
		    case SEC_6_B:
		    case SEC_6_C:
		    case SEC_6_D:
		    case SEC_6_E:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA_6;
		      break;
		    case SEC_7_A:
		    case SEC_7_B:
		    case SEC_7_C:
		    case SEC_7_D:
		    case SEC_7_E:
		    case SEC_7_F:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA_7;
		      break;
		    default:
		      assert(0);
		    }
		 
		  numSecondaryColumns++;
		}
	    }
	  assert(checkSum <= 1);
	}
      
      assert(numSecondaryColumns % 2 == 0);
      
      /*printf("Number of secondary columns: %d merged columns: %d\n", numSecondaryColumns, numSecondaryColumns / 2);*/

      tr->numberOfSecondaryColumns = numSecondaryColumns;
      if(numSecondaryColumns > 0)
	{
	  int model = tr->NumberOfModels;
	  int countPairs;
	  pInfo *partBuffer = (pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels);

	  for(i = 1; i <= sites; i++)
	    {
	      for(k = 0; k < numberOfSymbols; k++)
		{
		  if(brackets[k][i-1] > 0)
		    tr->model[i] = model;
		}

	    }

	  /* now make a copy of partition data */

	 
	  for(i = 0; i < tr->NumberOfModels; i++)
	    {
	      partBuffer[i].partitionName = (char*)rax_malloc((strlen(tr->extendedPartitionData[i].partitionName) + 1) * sizeof(char));
	      strcpy(partBuffer[i].partitionName, tr->extendedPartitionData[i].partitionName);
	      strcpy(partBuffer[i].proteinSubstitutionFileName, tr->extendedPartitionData[i].proteinSubstitutionFileName);
	      strcpy(partBuffer[i].ascFileName, tr->extendedPartitionData[i].ascFileName);
	      partBuffer[i].dataType =  tr->extendedPartitionData[i].dataType;
	      partBuffer[i].protModels =  tr->extendedPartitionData[i].protModels;
	      partBuffer[i].usePredefinedProtFreqs =  tr->extendedPartitionData[i].usePredefinedProtFreqs;	      
	      partBuffer[i].optimizeBaseFrequencies =  tr->extendedPartitionData[i].optimizeBaseFrequencies;
	    }

	  for(i = 0; i < tr->NumberOfModels; i++)
	    rax_free(tr->extendedPartitionData[i].partitionName);
	  rax_free(tr->extendedPartitionData);
	 
	  tr->extendedPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * (tr->NumberOfModels + 1));
	  
	  for(i = 0; i < tr->NumberOfModels; i++)
	    {
	      tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(partBuffer[i].partitionName) + 1) * sizeof(char));
	      strcpy(tr->extendedPartitionData[i].partitionName, partBuffer[i].partitionName);
	      strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, partBuffer[i].proteinSubstitutionFileName);
	      strcpy(tr->extendedPartitionData[i].ascFileName, partBuffer[i].ascFileName);
	      tr->extendedPartitionData[i].dataType =  partBuffer[i].dataType;
	      tr->extendedPartitionData[i].protModels= partBuffer[i].protModels;
	      tr->extendedPartitionData[i].usePredefinedProtFreqs=  partBuffer[i].usePredefinedProtFreqs;
	      tr->extendedPartitionData[i].optimizeBaseFrequencies =  partBuffer[i].optimizeBaseFrequencies;
	      rax_free(partBuffer[i].partitionName);
	    }
	  rax_free(partBuffer);

	  tr->extendedPartitionData[i].partitionName = (char*)rax_malloc(64 * sizeof(char));

	  switch(tr->secondaryStructureModel)
	    {
	    case SEC_16:
	    case SEC_16_A:
	    case SEC_16_B:
	    case SEC_16_C:
	    case SEC_16_D:
	    case SEC_16_E:
	    case SEC_16_F:
	    case SEC_16_I:
	    case SEC_16_J:
	    case SEC_16_K:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 16 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA;
	      break;
	    case SEC_6_A:
	    case SEC_6_B:
	    case SEC_6_C:
	    case SEC_6_D:
	    case SEC_6_E:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 6 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA_6;
	      break;
	    case SEC_7_A:
	    case SEC_7_B:
	    case SEC_7_C:
	    case SEC_7_D:
	    case SEC_7_E:
	    case SEC_7_F:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 7 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA_7;
	      break;
	    default:
	      assert(0);
	    }

	  tr->extendedPartitionData[i].protModels= -1;
	  tr->extendedPartitionData[i].usePredefinedProtFreqs =  FALSE;	 

	  tr->NumberOfModels++;	 
	  
	  if(adef->perGeneBranchLengths)
	    {
	      if(tr->NumberOfModels > NUM_BRANCHES)
		{
		  printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels);
		  printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES);
		  printf("Note that the number of partitions has automatically been incremented by one to accommodate secondary structure models\n");
		  printf("\n");
		  printf("In order to change this please replace the line \"#define NUM_BRANCHES   %d\" in file \"axml.h\" \n", NUM_BRANCHES);
		  printf("by \"#define NUM_BRANCHES   %d\" and then re-compile RAxML.\n", tr->NumberOfModels);
		  exit(-1);
		}
	      else
		{
		  tr->multiBranch = 1;
		  tr->numBranches = tr->NumberOfModels;
		}
	    }
	  
	  assert(countCharacters == sites);

	  tr->secondaryStructurePairs = (int*)rax_malloc(sizeof(int) * countCharacters);
	  for(i = 0; i < countCharacters; i++)
	    tr->secondaryStructurePairs[i] = -1;
	  /*
	    for(i = 0; i < countCharacters; i++)
	    printf("%d", brackets[i]);
	    printf("\n");
	  */
	  countPairs = 0;

	  for(k = 0; k < numberOfSymbols; k++)
	    {
	      i = 0;
	      
	  
	      while(i < countCharacters)
		{
		  int 
		    j = i,
		    bracket = -1,
		    openBracket,
		    closeBracket;
		  
		  while(j < countCharacters && ((bracket = brackets[k][j]) == 0))
		    {
		      i++;
		      j++;
		    }

		  assert(bracket >= 0);

		  if(j == countCharacters)
		    {
		      assert(bracket == 0);
		      break;
		    }
	    
		  openBracket = j;
		  j++;
		  
		  while(bracket != brackets[k][j] && j < countCharacters)
		    j++;
		  assert(j < countCharacters);
		  closeBracket = j;
		  
		  assert(closeBracket < countCharacters && openBracket < countCharacters);

		  assert(brackets[k][closeBracket] > 0 && brackets[k][openBracket] > 0);
		  
		  /*printf("%d %d %d\n", openBracket, closeBracket, bracket);*/
		  brackets[k][closeBracket] = 0;
		  brackets[k][openBracket]  = 0;	      	      
		  countPairs++;
		  
		  tr->secondaryStructurePairs[closeBracket] = openBracket;
		  tr->secondaryStructurePairs[openBracket] = closeBracket;
		}

	      assert(i == countCharacters);
	    }
	     
	  assert(countPairs == numSecondaryColumns / 2);

	  
	  /*for(i = 0; i < countCharacters; i++)
	    printf("%d ", tr->secondaryStructurePairs[i]);
	    printf("\n");*/
	  

	  adef->useMultipleModel = TRUE;

	}

      
      for(k = 0; k < numberOfSymbols; k++)	  
	rax_free(brackets[k]);
      rax_free(brackets);
      rax_free(characters);

      fclose(f);
    }
}
Browse the archive

https://github.com/stamatak/standard-RAxML