/* @author: joao lopes @workplace: Reading University @date: 1th May 2009 NBB - based on Mark's make_target.c and make_targetS.c */ #include "maketarget.h" /* This funtion uses a freq_tab_length file and creates a output file which contains the summary statistics of the given file and its sample sizes @arg input filename (*.len) @arg input filename (*.sst) @arg output filename (*.trg AND *.szz) */ int main(int argc, char* argv[]){ int cloc, cpop, cpop2, cdna, i, //iterators dump, //variable just to put the information from data that isn't necessary, dump started, //auxiliar to help print sstats to the .txt file foundSTR, //check if STR's are present foundSNP, //check if SNP's are present nstats_aux, //auxiliar to calculate the number of summ stats nstats, //number of summ stats outsize, //size of output file name sum, //sample size in one population nloc, //number of loci npop, //number of populations ndna, //number of all the diferent haplotypes lsstats[MAXSSTATS]; //list of used summary statistics char c1, //auxiliar *ltype, //DNA type per loci *outline, //store the target data summary statistic *name_inf, //filename of the report file *name_trg, //name of the output.tfq file *lsstats2[MAXSSTATS] = {"H","varL","k_M","curL","sH_M","NmH","pi","S","k_S","sH_S","avMFS","sdMFS","NmS","privS","S(1)"}, *name_ssz; //name of the output.out file FILE *inp_len, //pntr to data file *inp_sst, //pntr to data file *out_inf, //pntr to the report file *out_trg, //pntr to target output file *out_ssz; //pntr to samp_size output file time_t startClock, //time_t when the program starts endClock; //time_t when the program ends struct data block, //target data *data; //pntr to target data const struct tm *startTime, //struct time when the program starts *endTime; //struct time when the program ends if(argc != 4) printerr("needs .len file, .sst file, output filename (no extension)"); inp_len = fopen(argv[1],"r"); //input filename if(inp_len == NULL) printerr("cannot open .len file"); inp_sst = fopen(argv[2],"r"); //input .sst if(inp_sst == NULL) printerr("cannot open .sst file"); outsize = strlen(argv[3]) + 5; name_trg = (char *)malloc(outsize*sizeof(char)); name_ssz = (char *)malloc(outsize*sizeof(char)); name_inf = (char *)malloc((outsize+4)*sizeof(char)); strcpy(name_trg,argv[3]); strcpy(name_ssz,argv[3]); strcpy(name_inf,argv[3]); out_trg = fopen(strcat(name_trg,".trg"),"w"); //out_trg filename if(out_trg == NULL) printerr("cannot create .trg file"); out_ssz = fopen(strcat(name_ssz,".ssz"),"w"); //out_sps filename if(out_ssz == NULL) printerr("cannot create .ssz file"); out_inf = fopen(strcat(name_inf,"_trg.txt"),"w"); //open out_inf if(out_inf == NULL) printerr("cannot create .txt file"); for(i=0; inpop = npop; data->nloc = nloc; data->ldna = (int *)malloc(nloc*sizeof(int)); data->tsamp = (int *)malloc(nloc*sizeof(int)); data->nsamp = (int **)malloc(nloc*sizeof(int *)); data->freq = (int ***)malloc(nloc*sizeof(int **)); if(foundSTR) data->valM = (int **)malloc(nloc*sizeof(int *)); if(foundSNP){ data->lsites = (int *)malloc(nloc*sizeof(int)); data->valS = (char ***)malloc(nloc*sizeof(char **)); } /*run through every locus*/ for(cloc=0;clocfreq[cloc] = (int **)malloc(npop*sizeof(int *)); data->nsamp[cloc] = (int *)malloc(npop*sizeof(int)); data->tsamp[cloc]=0; if(foundSNP) data->lsites[cloc]=0; if(ltype[cloc]=='m'||ltype[cloc]=='M'){ fscanf(inp_len,"%d",&ndna); data->ldna[cloc] = ndna; data->valM[cloc] = (int *)malloc(ndna*sizeof(int)); //run through every population for(cpop=0;cpopfreq[cloc][cpop] = (int *)malloc(ndna*sizeof(int)); for(cdna=0;cdnafreq[cloc][cpop][cdna])); sum += data->freq[cloc][cpop][cdna]; } data->nsamp[cloc][cpop] = sum; data->tsamp[cloc]+=sum; fprintf(out_ssz,"%d ",sum); //out_sps: sum } fprintf(out_ssz,"\n"); for(cdna=0;cdnavalM[cloc][cdna])); } else{ fscanf(inp_len,"%d",&ndna); data->ldna[cloc] = ndna; data->valS[cloc] = (char **)malloc(ndna*sizeof(char *)); //run through every population for(cpop=0;cpopfreq[cloc][cpop] = (int *)malloc(ndna*sizeof(int)); for(cdna=0;cdnafreq[cloc][cpop][cdna])); sum += data->freq[cloc][cpop][cdna]; } data->nsamp[cloc][cpop] = sum; data->tsamp[cloc]+=sum; fprintf(out_ssz,"%d ",sum); //NT- out_sps: sum } fprintf(out_ssz,"\n"); for(cdna=0;cdnalsites[cloc])); if(data->lsites[cloc]!=0) for(cdna=0;cdnavalS[cloc][cdna] = (char *)malloc((data->lsites[cloc]+1)*sizeof(char)); fscanf(inp_len,"%d %s ",&dump,data->valS[cloc][cdna]); } else printerr("use of Sequence data locus without information, this should be discarded"); } } /*counting the number of summary statistics*/ if(npop>1){ //applied to each population and to the populations pooled together nstats_aux = 0; if(foundSTR) for(i=0 ; ivalM[cloc]); else{ for(cdna=0; cdnaldna[cloc]; cdna++) free(data->valS[cloc][cdna]); free(data->valS[cloc]); } } if(foundSTR) free(data->valM); if(foundSNP){ free(data->valS); free(data->lsites); } for(cloc=0 ; clocfreq[cloc][cpop]); free(data->freq[cloc]); free(data->nsamp[cloc]); } free(data->freq); free(data->nsamp); free(data->tsamp); free(data->ldna); free(ltype); /*close files*/ fclose(inp_len); fclose(out_inf); fclose(out_trg); fclose(out_ssz); }//end of main