/* @author: joao lopes @workplace: Reading University @date: 12th May 2009 */ #ifndef INTERFACE_H_ #define INTERFACE_H_ #include #include #include #include #include #include "mylib.h" #define MAXSSTATS 15 //number of possible used summary statistics #define MAXSSTATS_M 6 //number of possible used summary statistics for Microssatelites #define MAXSSTATS_S 9 //number of possible used summary statistics for Sequence Data #define COALEVNT 3 //different types of events in the Genetic Trees #define MAXDATA 250000000 //maximum number of data (nparam+nstats)*nsim to be analysed #define NPOP 2 #define LOCMAXCHAR 10 #define MAXMODEL 5 #define NPLOIDY 2 #define MAXCHAR 256 //max number of char of the pops names #define MAXNPRIOR 10000 //maximum number of parameter values to be stored // struct used to save the parameters of a prior distribution struct prior{ int type; //Distribution of the prior double *p; //parameters of the distribution }; // struct used to save the migration weights matrix struct migweights{ int type; //Distribution of the prior double ***m; //migweights per npop per ntev per npop }; // struct used to save the information about a gene tree struct data{ int npop; //number of populations int nloc; //number of loci int **nsamp; //pointer to a 2d-array(population, number of samples) int *tsamp; //total number of samples per loci int ***freq; //frequency of the diferent haplotypes by loci by pop int *ldna; //array that gives the number of all the diferent haplotypes by loci int *Nmax; //maximum number of different alleles/haplotypes //only used in microssatelites analysis int **valM; //all the diferent haplotypes by loci //only used in dna sequence analysis int *lsites; //array(number of sites of the dna sequency (number of mutations), loci) char ***valS; //2d-array(all the diferent haplotypes, loci) }; // struct used to save the parameters to build a gene tree struct params{ int npop; //number of populations int niter; //number of iterations int nevt; //number of events int nrSTR; //number of linked STR's loci double gent; //number of generation time int ntop; //number of possible topologies double *ploidy; //heriditage scalar per locus char *type; //DNA type int nloc; //number of loci int **nsamp; //pointer to 2d array: population vs number of samples double *mu; //pointer to array with mutation rate in diferente loci double *rec; //pointer to array with recombination rate in different loci int topol; //topology int *seq; //pointer to array with information about split population events double *tev; //pointer to array with events' times double **psize; //2d array: populations' Ne size vs number of events + 1 double **mig; //2d array: migration rates vs number of events }; /* Start of the ABC program. Simulates coordenated points (summary_statistics, parameters) to be used in an Aproximate Bayesian Computation model to estimate the true values of a genealogical tree (sequence data). @param input_prs - .prs input filename @param input_ssz - .ssz input filename @param input_sst - .sst input filename @param output - .dat output filename @param printIt - print or not the .len and .frq files of every genetic tree (0-don't print; 1-print) @param printMut - print or not the .mut (0-don't print; 1-print) @param printRec - print or not the .rec (0-don't print; 1-print) */ int abc(char *input_prs,char *input_ssz,char *input_sst, char* output,int printIt,int printMut,int printRec); /* This function samples values from the priors and stores them Defined in samplePriors.c @param pm - pointer to the parameters to be used in the simulation @param outline1 - place to store all the first part of the data @param out_mut - pointer to output file .mut @param printMut - indicate if the mutation rates are going to be printed or not @param foundSTR - check if STR's are present in the study @param foundSNP - check if SNP's are present in the study @param ltype - DNA type per loci */ int sampPriors(struct params *pm,char *outline1,FILE *out_mut,FILE *out_rec,int printMut,int printRec,int foundSTR,int foundSNP,char *ltype); /* This function calulates the summary statistics from simulated data and stores them Defined in summStats.c @param data - struture with the informations about a genealogical tree @param lsstats - list of the used sstats (0-absent;1-present) @param outp - string that will store the summary statistics @param foundSTR - check if STR's are present in the study @param foundSNP - check if SNP's are present in the study @param ltype - DNA type per loci */ void summStats(struct data *data,int *lsstats,char *outp,int foundSTR,int foundSNP, char*ltype); /* This function simulats the genetic data Defined in geneticTree.c @param pm - parameters used to build a geneology tree @param data - data from a geneology tree @param printit - use to print or not the informations to a separate file @param citer - current iteration @param path - path where the output files are going to be store in */ int sampLikelihood(struct params *pm, struct data *data,int printit,long citer, char *path); /* Frees the memory allocated in the first iteration Defined in abc.c @param nloc number of loci @param npop number of populations @param nevt number of events */ void freetree(int nloc,int npop,int nevt); /* This function gets an sample file (.pop) and creates a table file (.len) @param input - input filename @param output - output filename */ int createFreqTab(char *intput,char *output); /* This function gets an IMa input file and creates a table file (.len) @param input - input filename @param output - output filename */ int createFreqTab2(char *intput,char *output); /* This function gets a GenePop input file and creates a table file (.len) @param input - input filename @param output - output filename */ int createFreqTab3(char *intput,char *output); /* This function gets a Nexus file (.nex) and creates a table file (.len) @param input - input filename @param output - output filename */ int createFreqTab4(char *intput,char *output); /* This funtion uses a freq_tab_length file and creates a output file which contains the summary statistics of the given file and its sample sizes @param input filename (*.len) @param input filename (*.sst) @param output filename (*.trg AND *.szz) @return an integer to check for errors */ int maketarget(char *input_len,char *input_sst,char *output); /* This program prints out a proportion, specified by tolerance, of the simulated data with an ABC approach which are closer in Euclidian distance to the target summary statistics. It will also create a file which will contain the simulated populational tree parameters (only) in the first 10000 lines of the data. These can then be used to build a posterior distributions with which we can compare prior distributions @arg filename with simulated data @arg filename with target summary statistics (summary statistics from our "real" data) @arg filename of the output of the program @arg number of parameters @arg number of summstats @arg tolerance of analysis (between 0 and 1) */ int firstpass(char *input,char *target,char *output,int nparam,int nsstas,double tol); /* This file creates a .prs file. @param outname - name of the output file @param niter - number of simulations to run @param genet - generation time of the populations species @param npop - number of population @param nloc - number of loci @param lplo - herederiter scalar per locus @param ltype - type of DNA data per locus @param pr_top - prior struct - topology @param pr_Ne - list of prior struct (size=npop*2-1) - pop size @param pr_tev - list of prior struct (size=npop-1) - tev @param pr_mig - list of prior struct (size=2*npop-2) - mig @param pr_mut - prior struct - mutSTR @param pr_mut - prior struct - mutSNP @param pr_rec - prior struct - recSTR @param pr_rec - prior struct - recSNP */ int makeprior(char *output, int niter,int genet,int npop,int nloc, //1sr line double *lplo, //2nd line char *ltype, //3th line struct prior pr_top, //4th line struct prior *pr_Ne, //5th line struct prior *pr_tev, //6th line struct prior *pr_mig, //7th line struct prior pr_mutSTR, //8th line struct prior pr_mutSNP, //9th line struct prior pr_recSTR, //10th line struct prior pr_recSNP, //11th line struct migweights migw); //12th line /* This file creates a .sst file. @param outname - name of the output file @param lsstats - list of the used sstats (0 - don't use; 1 - use) */ int makestats(char *output,int *lsstats); /* This funtion takes a .len file and creates a sample populations for it @param filename of .len file @param filename of the output of the function @return an integer to check for errors */ int makepop(char *input, char *output); /* It creates an ABC .len file given an IM input file. @param input filename @param output filename @return a integer to check for errors */ int convertToABC(char *input,char *output); /* This function joins files together @param ninp number of files to join @param linp list of the input filenames to join @param out output filename @return an integer to check for errors */ int joindata(int ninp, char *linp[],char *outp); static const int RecIter = 50000, //stepsize of iteration when the program records information RecLine = 500000, //record step per lines analysed DnaSizeM = 100, //size of microssatellite dna DnaSizeS = 10; //size of dna sequence int Exepathsize; //size of the path to the executable char *Exepath; //path to the executable struct prior P_top, //prior of topology P_mutM, //hiperprior of mutation rate Microsatelites P_mutS, //hiperprior of mutation rate Sequence data P_recM, //hiperprior of recombination rate Microsatelites P_recS, //hiperprior of recombination rate Sequence data *P_t, //priors of time events **P_mig, //prior of migration **P_psize; //priors of Ne by events struct migweights M_migw; //migration weights matrix #endif /*INTERFACE_H_*/