/* @author: joao lopes @workplace: Reading University @date: 1st May 2009 NBB - based on Mark's genparmf.c and genparmfS.c */ #include "abc.h" /* This function calculates the number of topologies according to the npop @param npop - numer of populations @result number of topologies */ int notop(int npop); /* This function frees the memory used for the DNA sequence data simulations @param nloc - number of loci @param na - number of diferente sequencies @param val - all the diferente sequencies by loci @param ltype - DNA type per loci */ void freeMem(int nloc, int *na,char ***val,char * ltype); /* Start of the ABC program. Simulates coordenated points (summary_statistics, parameters) to be used in an Aproximate Bayesian Computation model to estimate the true values of a genealogical tree. @arg .prs input filename @arg .ssz input filename @arg .sst input filename @arg .dat output filename @arg print or not the .len and .frq files of every genetic tree (0-don't print; 1-print) @arg print or not the .mut (0-don't print; 1-print) @arg print or not the .rec (0-don't print; 1-print) */ int main(int argc, char* argv[]){ int csamp,cpop,cpop2,cloc,cevt,i,cSTR, //iterators foundSTR, //check if STR's are present foundSNP, //check if SNP's are present nrSTR, //number of linked STR's npop, //nr. populations nloc, //nr. loci nevt, //nr. time events nstats_aux, //auxiliar to get the nr. of summary statistics used nstats, //nr. of summary statistics used nparams, //nr. of parameters used printIt, //print or not the information about frequency of haplotyes, sizes of haplotypes, number of diferent sizes, number of loci printMut, //print or not to a file the mutation rate of each locus printRec, //print or not to a file the recombination rate of each locus cut, //auxiliar to work around strings homesize, //size of the path from where the program is being run outsize, //size of output filename started, //auxiliar to help print lsstats2 into the file lsstats[MAXSSTATS], //list of the sstats (0 - absent; 1 - present) *sampleSTR; //sample sizes of the linked STR's double ploidySTR; //ploidy of the linked STR's long citer, //iterator niter; //nr. iterations char type, //type of analysis *lsstats2[MAXSSTATS] = {"H","varL","k_M","curL","sH_M","NmH","pi","S","k_S","sH_S","avMFS","sdMFS","NmS","privS","S(1)"}, *home, //path where the program is being run *path, //path to which the output files are going to be store in *outline1, //stores parameters informations *outline2, //stores summstats informations *name_dat, //output filename *name_inf, //output filename *name_mut, //output filename *name_rec; //output filename FILE *inp_prs, //pntr to .prs *inp_ssz, //pntr to .ssz *inp_sst, //pntr to .ssz *out_mut, //pntr to .mut *out_rec, //pntr to .rec *out_dat, //pntr to .dat *out_inf; //pntr to .txt time_t startClock, //time_t when the program starts endClock; //time_t when the program ends struct params pm; //struct that stores the parameter set values struct data data; //struct that stores the data of a genetic tree const struct tm *startTime, //struct time when the program starts *endTime; //struct time when the program ends if(!(argc == 8)) printerr("needs .prs file, .ssz file, .sst file, output filename (no extension), printIt, printMut, printRec"); time( &startClock ); // Get time in seconds startTime = localtime( &startClock ); // Convert time to struct tm form inp_prs = fopen(argv[1],"r"); //input .prs if(inp_prs == NULL) printerr("cannot open .prs file"); inp_ssz = fopen(argv[2],"r"); //input .ssz if(inp_ssz == NULL) printerr("cannot open .ssz file"); inp_sst = fopen(argv[3],"r"); //input .sst if(inp_sst == NULL) printerr("cannot open .sst file"); /*get the path from where the progam is being run*/ homesize = strlen(argv[0])+ 5; home = (char *)malloc(homesize*sizeof(char)); strcpy(home,argv[0]); for(cut=-1,i=homesize-1 ; home[i]!='/' && home[i]!='\\' && i >= 0 ; cut++,i--){ home[i]='\0'; } home = realloc(home,(homesize-cut)*sizeof(char)); /*get the path to output files*/ outsize = strlen(argv[4]) + 5; path = (char *)malloc(outsize*sizeof(char)); strcpy(path,argv[4]); for(cut=-1,i=outsize-1 ; path[i]!='/' && path[i]!='\\' && i >= 0 ; cut++,i--){ path[i]='\0'; } path = realloc(path,(outsize-cut)*sizeof(char)); name_dat = (char *)malloc(outsize*sizeof(char)); strcpy(name_dat,argv[4]); out_dat = fopen(strcat(name_dat,".dat"),"w"); //output .dat free(name_dat); if(out_dat == NULL) printerr("cannot create .dat file"); name_inf = (char *)malloc(outsize*sizeof(char)); strcpy(name_inf,argv[4]); out_inf = fopen(strcat(name_inf,".txt"),"w"); //output info.txt if(out_inf == NULL) printerr("cannot create .txt file"); free(name_inf); printIt = atoi(argv[5]); //value of printIt printMut = atoi(argv[6]); //value of printMut printRec = atoi(argv[7]); //value of printRec fscanf(inp_prs,"%d %lf %d %d",&pm.niter,&pm.gent,&pm.npop,&pm.nloc); pm.nevt = pm.npop - 1; pm.ntop = notop(pm.npop); npop = pm.npop; nloc = pm.nloc; niter = pm.niter; nevt = pm.nevt; if(npop>1){ pm.mig = (double **)malloc(nevt*sizeof(double*)); pm.tev = (double *)malloc(nevt*sizeof(double)); } pm.mu = (double *)malloc(nloc*sizeof(double)); pm.rec = (double *)malloc(nloc*sizeof(double)); pm.ploidy = (double *)malloc(nloc*sizeof(double)); pm.type = (char *)malloc((nloc+1)*sizeof(char)); pm.nsamp = (int **)malloc(nloc*sizeof(int *)); for(cloc=0;cloc1){ //applied to each population and to the populations pooled together nstats_aux = 0; if(foundSTR) for(i=0 ; i 2){ fprintf(out_inf,"Populational tree topology:\n"); //uniform distribution along the number of different topologies if(P_top.type == 0) fprintf(out_inf,"- Prior distribution (Topol): uniform(0,%d)\n",pm.ntop); //read values from file else if(P_top.type == 1){ P_top.p = (double*)malloc(sizeof(double)); fscanf(inp_prs,"%lf ",&P_top.p[0]); fprintf(out_inf,"- Fixed Topology: %.0lf\n",P_top.p[0]); } //discriminate the join events else if(P_top.type == 2){ P_top.p = (double*)malloc(2*nevt*sizeof(double)); for(i=0 ; i<2*nevt ; i++){ fscanf(inp_prs,"%lf ",&P_top.p[i]); } fprintf(out_inf,"- Fixed Topology: ("); for(i=0 ; i<2*nevt ; i++){ fprintf(out_inf,"%.0lf",P_top.p[i]); if(i%2==0) fprintf(out_inf,","); else if(i!=2*nevt-1) fprintf(out_inf,")("); } fprintf(out_inf,")\n"); } //uniform distribution along the number of different topologies (with specific marker) else if(P_top.type == 3){ P_top.p = (double*)malloc(sizeof(double)); fscanf(inp_prs,"%lf ",&P_top.p[0]); fprintf(out_inf,"- Prior distribution (Topol): uniform(0,%d) [with Model marker:%.0lf]\n",pm.ntop,P_top.p[0]); } //read values from file (with specific marker) else if(P_top.type == 4){ P_top.p = (double*)malloc(2*sizeof(double)); fscanf(inp_prs,"%lf ",&P_top.p[0]); fscanf(inp_prs,"%lf ",&P_top.p[1]); fprintf(out_inf,"- Fixed Topology: %.0lf [with Model marker:%.0lf]\n",P_top.p[0],P_top.p[1]); } //discriminate the join events (with specific marker) else if(P_top.type == 5){ P_top.p = (double*)malloc((1+(2*nevt))*sizeof(double)); for(i=0 ; i<2*nevt ; i++){ fscanf(inp_prs,"%lf ",&P_top.p[i]); } fscanf(inp_prs,"%lf ",&P_top.p[2*nevt]); fprintf(out_inf,"- Fixed Topology: ("); for(i=0 ; i<2*nevt ; i++){ fprintf(out_inf,"%.0lf",P_top.p[i]); if(i%2==0) fprintf(out_inf,","); else if(i!=2*nevt-1) fprintf(out_inf,")("); } fprintf(out_inf,") [with Model marker:%.0lf]\n",P_top.p[2*nevt]); } else printerr("wrong topology prior type"); } if(npop<=2){ fprintf(out_inf,"Populational tree topology:\n"); if(P_top.type == 0) fprintf(out_inf,"- Single topology\n"); else if(P_top.type == 3){ P_top.p = (double*)malloc(sizeof(double)); fscanf(inp_prs,"%lf ",&P_top.p[0]); fprintf(out_inf,"- Single topology [with Model marker:%.0lf]\n",P_top.p[0]); } else if(P_top.type == 1 || P_top.type == 2 || P_top.type == 4 || P_top.type == 5) printerr("with 1 or 2 pops only topology prior type 0 or 3 can be used"); else printerr("wrong topology prior type"); } /*Reads the Populations'Ne priors*/ P_psize = (struct prior **)malloc((nevt+1)*sizeof(struct prior *)); fprintf(out_inf,"Efective population size:\n"); for(cevt=0;cevt<=nevt;++cevt){ P_psize[cevt] = (struct prior *)malloc(npop*sizeof(struct prior)); for(cpop=0;cpop 0) break; } } if(npop>1){ /*Reads the events'time priors*/ P_t = (struct prior *)malloc(nevt*sizeof(struct prior)); fprintf(out_inf,"Time events:\n"); for(cevt=0;cevt0){ fprintf(out_inf,"- Prior distribution (tev%d): tev%d + uniform(",cevt+1,cevt); for(i=0;i<2;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==1) fprintf(out_inf,"%.0lf)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%.0lf,",P_t[cevt].p[i]); } } else{ fprintf(out_inf,"- Prior distribution (tev%d): uniform(",cevt+1); for(i=0;i<2;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==1) fprintf(out_inf,"%.0lf)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%.0lf,",P_t[cevt].p[i]); } } } //use generalized gamma else if(P_t[cevt].type == 2){ P_t[cevt].p = (double*)malloc(4*sizeof(double)); if(cevt>0){ fprintf(out_inf,"- Prior distribution (tev%d): tev%d + generalized gamma(",cevt+1,cevt); for(i=0;i<4;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%g,",P_t[cevt].p[i]); } } else{ fprintf(out_inf,"- Prior distribution (tev%d): generalized gamma(",cevt+1); for(i=0;i<4;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%g,",P_t[cevt].p[i]); } } } //use uniform distribution (all tev) else if(P_t[cevt].type == 3){ P_t[cevt].p = (double*)malloc(2*sizeof(double)); fprintf(out_inf,"- Prior distribution (all tev): uniform("); for(i=0;i<2;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==1) fprintf(out_inf,"%.0lf)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%.0lf,",P_t[cevt].p[i]); } if(cevt!=0) printerr("if using tev type 3 prior, define prior only once"); break; } //use generalized gamma (all tev) else if(P_t[cevt].type == 4){ P_t[cevt].p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- Prior distribution (all tev): generalized gamma("); for(i=0;i<4;++i){ fscanf(inp_prs,"%lf",&P_t[cevt].p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_t[cevt].p[i]); else fprintf(out_inf,"%g,",P_t[cevt].p[i]); } if(cevt!=0) printerr("if using tev type 4 prior, define prior only once"); break; } else printerr("wrong tev priors"); } /*Reads the migration priors*/ P_mig = (struct prior **)malloc(nevt*sizeof(struct prior *)); fprintf(out_inf,"Migration rates:\n"); for(cevt=0;cevt 0) break; } } } else{ fprintf(out_inf,"Time events:\n"); fprintf(out_inf,"- No time events -\n"); fprintf(out_inf,"Migration rates:\n"); fprintf(out_inf,"- No migration -\n"); } /*Reads the mutation rates (Microsatelites)*/ fprintf(out_inf,"Mutation rates (Microsatelites):\n"); fscanf(inp_prs,"%d",&P_mutM.type); //no mutation present if(P_mutM.type == 0){ P_mutM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- No mutation -\n"); } //lognormal distribution else if(P_mutM.type == 1){ P_mutM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: lognormal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_mutM.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_mutM.p[i]); else fprintf(out_inf,"%g,",P_mutM.p[i]); } } //normal distribution else if(P_mutM.type == 2){ P_mutM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: normal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_mutM.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_mutM.p[i]); else fprintf(out_inf,"%g,",P_mutM.p[i]); } } else{ printerr("wrong mutation prior type (Microsatelite)"); } /*Reads the mutation rates (Sequence data)*/ fprintf(out_inf,"Mutation rates (Sequence data):\n"); fscanf(inp_prs,"%d",&P_mutS.type); //no mutation present if(P_mutS.type == 0){ P_mutS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- No mutation -\n"); } //lognormal distribution else if(P_mutS.type == 1){ P_mutS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: lognormal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_mutS.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_mutS.p[i]); else fprintf(out_inf,"%g,",P_mutS.p[i]); } } //normal distribution else if(P_mutS.type == 2){ P_mutS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: normal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_mutS.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_mutS.p[i]); else fprintf(out_inf,"%g,",P_mutS.p[i]); } } else{ printerr("wrong mutation prior type (Sequence data)"); } /*Reads the recombination rates (Microsatelites)*/ fprintf(out_inf,"Recombination rates (Microsatelites):\n"); fscanf(inp_prs,"%d",&P_recM.type); //no recombination present if(P_recM.type == 0){ P_recM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- No recombination -\n"); } //lognormal distribution else if(P_recM.type == 1){ P_recM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: lognormal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_recM.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_recM.p[i]); else fprintf(out_inf,"%g,",P_recM.p[i]); } } //normal distribution else if(P_recM.type == 2){ P_recM.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: normal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_recM.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_recM.p[i]); else fprintf(out_inf,"%g,",P_recM.p[i]); } } else{ printerr("wrong recombination prior type (Microsatelite"); } /*Reads the recombination rates (Sequence data)*/ fprintf(out_inf,"Recombination rates (Sequence data):\n"); fscanf(inp_prs,"%d",&P_recS.type); //no recombination present if(P_recS.type == 0){ P_recS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- No recombination -\n"); } //lognormal distribution else if(P_recS.type == 1){ P_recS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: lognormal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_recS.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_recS.p[i]); else fprintf(out_inf,"%g,",P_recS.p[i]); } } //normal distribution else if(P_recS.type == 2){ P_recS.p = (double*)malloc(4*sizeof(double)); fprintf(out_inf,"- HiperPrior distribution: normal("); for(i=0;i<4;i++){ fscanf(inp_prs,"%lf ",&P_recS.p[i]); if(i==3) fprintf(out_inf,"%g)\n",P_recS.p[i]); else fprintf(out_inf,"%g,",P_recS.p[i]); } } else{ printerr("wrong recombination prior type (Sequence data)"); } /*Reads the migration weights*/ if(npop>2){ fprintf(out_inf,"Migration weights matrix:\n"); fscanf(inp_prs,"%d",&M_migw.type); //no use of migration weights if(M_migw.type == 0){ fprintf(out_inf,"- No migration weights in use -\n"); } //use of migration weights else if(M_migw.type == 1){ if(P_top.type==0 || P_top.type==3) printerr("to use migration weights matrix, topology must be defined (option 1,2,4 or 5)"); M_migw.m = (double***)malloc(npop*sizeof(double**)); for(cpop=0; cpop1){ if(P_top.type==3||P_top.type==4||P_top.type==5) nparams++; if(foundSTR) nparams+=2; if(foundSNP) nparams+=2; if(foundSTR) nparams+=2; if(foundSNP) nparams+=2; nparams+= 1+(npop-1)+(2*npop-1)+(2*npop-2); } else{ if(P_top.type==3) nparams++; if(foundSTR) nparams+=2; if(foundSNP) nparams+=2; if(foundSTR) nparams+=2; if(foundSNP) nparams+=2; nparams+= 1+(2*npop-1); } //check if there are too many nparams+nstats if((nparams+nstats)*niter>MAXDATA) printerr(".dat file would be too big to be analyzed"); /*check if the STR's loci are linked*/ if(foundSTR && P_recM.type != 0){ //join all the STR loci nrSTR = 0; sampleSTR = (int*)malloc(nloc*sizeof(int)); for(cloc=0,i=0;cloc1){ for(cevt=0;cevt 0) break; } } for(cevt=0;cevt 0) break; } } } else fprintf(out_inf,"|Ne1"); fprintf(out_inf,"]\n%d summstats [",nstats); started=0; if(foundSTR) for(i=0;i1){ if(P_t[0].type==1||P_t[0].type==2){ for(cevt=0;cevt0) break; } free(P_mig[cevt]); } free(pm.mig); free(pm.tev); free(P_t); free(P_mig); } if(npop>2){ if(M_migw.type==1){ for(cpop=0; cpop0) break; } free(P_psize[cevt]); free(pm.psize[cevt]); } if(P_top.type!=0) free(P_top.p); free(P_psize); free(pm.psize); free(outline2); free(outline1); free(data.Nmax); free(data.ldna); free(data.tsamp); for(cloc = 0;cloc