/* @author: joao lopes @workplace: Reading University @date: 12th May 2009 NBB - based on Mark's translate.c and translateS.c */ #include "abc.h" /* This function calculates the allele number in one population @param nhap - total number of alleles @param freq - frequency of an allele @return the allele number in one population */ int calc_nhap(int nhap, int freq[]); /* This function calculates the Shannon's index of one population @param nhap - number of all the haplotypes @param npop - number of population @param nsamp - number of samples of a population @param freq - frequency of haplotypes of a population in a locus @param cpop - current populations @result number of haplotypes of one population */ double calc_shanon(int nhap, int nsamp, int *freq, int cpop); /* only used in Microsatellite analysis ********************************************************************/ /* This function calculates the hetetozygosity in a population @param sum - size of the sample @param freq - frequency of a microsatellite size (in number of occurrencies @param nhap - number of diferent microsatellite sizes @return the hetetozygosity in one population */ double calc_het(int sum,int freq[], int nhap); /* This function calculates the variance of allele length in a population @param sum - size of the sample @param freq - frequency of a microsatellite size (in number of occurrencies) @param val - diferent sizes of a microsatellite @param nhap - number of diferent microsatellite sizes @return variance of allele length in one population */ double calc_var(int sum,int freq[],int val[],int nhap); /* This function calculates the curtosis of allele length in a population @param nhap - total number of microsatellite sizes @param freq - frequency of a microsatellite size (in number of occurrencies) @return the allele number in one population */ double calc_curt(int nhap, int *freq); /* This function calculates the Nm estimator using the heterozygosity @param Hw - heterozygosity within pop @param Ha - heterozygosity among pop @result Nm_H */ double calc_Nm_H(double Hw,double Ha); /* only used in Sequence Data analysis *********************************************************************/ /* This function is used to caluclate the pairwise difference for one population @param sum - number of samples @param freq - frequency of haplotypes of a population in a locus @param lsites - size of a haplotype in a locus @param val - all the haplotypes in a locus @param nhap - number of all the haplotypes @result pairwise difference of one population */ double calc_pi(int sum,int freq[], int lsites, char **val,int nhap); /* Number of diferent sites in two diferent haplotypes @param n - number of sites in the haplotype @param vi - one haplotype @param v2 - other haplotype @result number of diferent sites in the two haplotypes */ int numdiff(int n, char *v1, char *v2); /* This function is used to caluclate the number of segregate sites of one population @param sum - number of samples @param freq - frequency of haplotypes of a population in a locus @param lsites - size of a haplotype in a locus @param val - all the haplotypes in a locus @param nhap - number of all the haplotypes @param lsnp - SNP in a locus in a population @result number of segregate sites of one population */ double calc_segsites(int sum,int freq[],int lsites, char **val,int nhap, int *lsnp); /* This function is used to caluclate the number of segregate sites of populations pooled together @param sum - number of samples @param freq - frequency of haplotypes of a population in a locus @param lsites - size of a haplotype in a locus @param val - all the haplotypes in a locus @param nhap - number of all the haplotypes @result number of segregate sites of one population */ double calc_segsites2(int sum,int freq[],int lsites, char **val,int nhap); /* This function calculates the Mutation Frequency Spectrum (MFS) @param val - array with all the haplotypes @param freq - array with the frequency of all the haplotypes @param nhap - number of different haplotypes @param nsites - number of sites of a locus @param cloc - considered locus @param cpop - considered population @param mfs - Mutation Frequency Spectrum */ void fillMFS(char **val,int *freq,int nhap,int nsites,int nsamp,int cloc,int cpop,int***mfs); /* This function changes a char '1' or '0' to a int 1 or 0 @param c1 - char '1' or '0' @result int 1 or 0 */ int segtoint(char c1); /* This function calculates the mean of the Mutation Frequency Spectrum (MFS) @param nsites - number of sites of a locus @param mfs - MFS @param snp - number of SNP @result mean of MFS */ double calc_meanMFS(int nsites,int *mfs, int snp); /* This function calculates the stdev of the Mutation Frequency Spectrum (MFS) @param nsites - number of sites of a locus @param mfs - MFS @param snp - number of SNP @param lsnp - list the SNPs @param mMFS - mean of MFS @result stdev of MFS */ double calc_stdevMFS(int nsites,int *mfs, int snp, int *lsnp, double mMFS); /* This function calculates the Nm estimator using the segregating sites @param Sw - no segregating sites within pop @param Sa - no segregating sites among pop @result Nm_S */ double calc_Nm_S(double Sw,double Sa); /* This function calculates the number of private segregating sites @param nsites - number of sites of the current locus @param npop - total number of populations @param pop - current population @param loc - current locus @param lsnp - SNP per site per loci per pop @result privateS */ double calc_privS(int nsites, int npop, int pop, int loc,int*** lsnp); /* This function calculates the average frequency of private segregating sites @param nsites - number of sites of the current locus @param npop - total number of populations @param nhap - @param pop - current population @param loc - current locus @param lsnp - SNP per locus per pop per site @param freq - array with the frequency of all the haplotypes @param val - array with all the haplotypes @result S_1 */ double calc_S_1(int nsites, int npop, int nhap, int pop,int loc,int ***lsnp,int **freq,char **val); void summStats(struct data *data,int *lsstats,char *outp,int foundSTR,int foundSNP,char *ltype){ int cloc,cdna,cpop,cpop2,csite,ic, //iterators maxDna, //maximum number of different haplotypes of all loci npop, //number of population nloc, //number of loci tpop, //iterator for two populations tsamp, //size of sample of two populations skip, //number of loci with 1 or less samples npair, //number of population pairs *tfreq, //sum of the frequencies of diferent microsatellites size of two populations *ldna, //number of diferent alleles sizes by loci **nsamp, //size of sample per locus per population ***freq; //all the frequencies of the diferent microsatellite sizes by pop and loci /*only used in microssatellites data*/ int **valM; //all the diferent microsatellite sizes by loci double sstats1_t, //heterozygosity for all populations pooled together *sstats1, //heterozygosity difference in each population *sstats2, //variance of allele length in each population *sstats3, //allele no. in each population *sstats4, //curtosis of allele lengths in each population *sstats5, //Shanon's index in each population *sstats6, //Nm_H in each population *sstats1_2, //heterozygosity for each pair of population pooled together *sstats2_2, //variance of allele length for each pair of population pooled together *sstats3_2, //allele no.for each pair of population pooled togther *sstats4_2, //curtosis of allele lengths for each pair of population pooled together *sstats5_2; //Shanon's index for each pair of population pooled togther /*only used in sequence data*/ int *lsites, //size of the haplotypes by loci ***mfs, //mutation frequency spectrum (MFS) by loci by pop **snp, //number of SNPs by loci by pop ***lsnp; //SNP per sites per loci per pop double sstats8_t, //segregate sites no. for all populations pooled together *sstats7, //pairwise difference in each population *sstats8, //segregate sites no. in each population *sstats9, //haplotypes no. in each population *sstats10, //Shanon's index in each population *sstats11, //mean of mutation frequency spectrum (MFS) in each population *sstats12, //stdev of mutation frequency spectrum (MFS) in each population *sstats13, //Nm_S in each population *sstats14, //private S in each population *sstats15, //S(1) in each population *sstats7_2, //pairwise difference for each pair of population pooled together *sstats8_2, //segregate sites no. for each pair of population pooled together *sstats9_2, //haplotypes no. for each pair of population pooled togther *sstats10_2, //Shanon's index for each pair of population pooled togther **mMFS; //mean of MFS by loci by pop char ***valS; //all the diferent haplotypes by loci npop = data->npop; nloc = data->nloc; ldna = data->ldna; nsamp = data->nsamp; freq = data->freq; npair = combinations(npop,2); if(foundSTR) valM = data->valM; if(foundSNP){ valS = data->valS; lsites = data->lsites; } /*allocate memory*/ maxDna = ldna[0]; for(cloc=1 ; cloc1){ tsamp=0; for(cdna=0;cdna1){ for(cpop = 0;cpop 1){ tsamp=0; for(cdna=0;cdna1){ for(cpop = 0;cpop 1){ for(cpop = 0;cpop 1){ for(cpop = 0;cpop 0) ++ndna; return ndna; } //end of calc_nhap double calc_shanon(int nhap, int nsamp, int *freq, int cpop){ int chap; //iterators double shan, p; shan = 0.0; for(chap=0;chap 0 in this pop */ for(chap=0;chap 0) break; ip = chap; for(csite=0;csite 0 in this pop */ for(chap=0;chap 0) break; ip = chap; for(csite=0;csitensamp/2) aux=nsamp-aux; mfs[cpop][cloc][csite]=aux; } } //end of fillMFS int segtoint(char c1){ if(c1=='0') return 0; else return 1; } //end of segtoint double calc_meanMFS(int nsites,int *mfs,int snp){ int csite; //iterators double aver; //average of MFS aver=0; for(csite=0; csite0) return aver/(double)snp; else return 0; } //end of calc_meanMFS double calc_stdevMFS(int nsites,int *mfs,int snp,int *lsnp,double mMFS){ int csite; //iterators double stdev; //stdev of MFS if(snp>1){ stdev = 0; for(csite=0; csite