Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision e6a0334445b5755bb52a0d2209120ee4e251e7b4 authored by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC, committed by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC
First commit
0 parent
  • Files
  • Changes
  • af1b4d8
  • /
  • src
  • /
  • firstpass.c
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:e6a0334445b5755bb52a0d2209120ee4e251e7b4
directory badge
swh:1:dir:a574a6b710136758a352d12c14eaa4a6f2cefb24
content badge
swh:1:cnt:171d70efa98e9d2d3e00033cf940d7dc71c38185

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
firstpass.c
/*
	@author:	joao lopes
	@workplace: Reading University
	@date: 		1th May 2009
	 
	NBB - based on Mark's firstpass.c
*/

#include "firstpass.h"

/*
	This program prints out a proportion, specified by tolerance, of the simulated data with an ABC approach
	which are closer in Euclidian distance to the target summary statistics. It will also create a file
	which will contain the simulated populational tree parameters (only) in the first 10000 lines of the data.
	These can then be used to build a posterior distributions with which we can compare prior distributions

	@arg filename with simulated data
	@arg filename with target summary statistics (summary statistics from our "real" data)
	@arg filename of the output of the program
	@arg number of parameters
	@arg number of summstats
	@arg tolerance of analysis (between 0 and 1)
*/
int main(int argc, char *argv[]){
	int cstat,cparam,i,			//iterators
		outsize,				//size of the output filename
		nclosest,				//tolerance*number of simulations
		npriors,				//no. sets to choose from the random prior set values (0-10000)
		nstats,					//no. of total summary statistics calculated
		nparam,					//no. of parameters of the populacional model
		*index,					//stores the original position of sorted elements of the summary statistics
		gap,					//auxiliar to build .pri
		isim;					//index of a simulation
	long cline,csim,			//iterators
		 nsim;					//no. of simulations of genetic trees from the same populational tree
	double  d1,					//auxiliar to read text
			tol,				//tolerance value
			*trgStat,			//list of the target summary statistics
			**iparams,			//stores the value of each parameter for the first points (0-10000)
			**stats,			//stores the value of each summary statistics for each tree
			**params,			//stores the value of each parameter for each tree
			**fstats,			//stores the value of each summary statistics for the closest points
			**fparams,			//stores the value of each parameter for the closest points
			*ave,				//stores the average of each list of different summary statistic
			*sdev,				//stores the standard deviation of each list of different summary statistic
			d3,d4,d5,d6,		//auxiliar values to get the average and the variance of each list of each summary statistic
			*dist;				//stores the distance between each simulated data and the target data
	char *name_dat,				//filename with the simulated data
		 *name_trg,				//filename with the target summary statistics
		 *name_inf,             //filename of the report file
         *name_rej,				//filename of the first output (nearest simulations in a given tolerance)
		 *name_pri;				//filename of the second output (first 10000 simulations)
	FILE *out_dat,				//pntr to the simulated data
		 *out_trg,				//pntr to the target summary statitstics
         *out_inf,              //pntr to the report file
		 *out_rej,				//pntr to the file with first output (nearest simulations in a given tolerance)
		 *out_pri;				//pntr to the file with second output (first 10000 simulations)
    time_t startClock,          //time_t when the program starts
           endClock;            //time_t when the program ends
    const struct tm *startTime, //struct time when the program starts
                    *endTime;   //struct time when the program ends

 	if(!(argc==7))
		printerr("needs .dat file, .trg file, output filename (no extension), nparams, nsstats, tolerance.");

	name_dat = argv[1];
	name_trg = argv[2];
	sscanf(argv[4],"%d",&nparam);
	sscanf(argv[5],"%d",&nstats);
	sscanf(argv[6],"%lf",&tol);

	out_dat = fopen(name_dat,"r");						//open out_dat
	if(out_dat == NULL)
		printerr("cannot open .dat file");

	out_trg = fopen(name_trg,"r");						//open out_trg
	if(out_trg == NULL)
		printerr("cannot open .trg file");

	name_rej = malloc((strlen(argv[3])+5)*sizeof(char));
	strcpy(name_rej,argv[3]);
	out_rej = fopen(strcat(name_rej,".rej"),"w");		//open out_rej
	if(out_rej == NULL)
		printerr("cannot create .rej file");
	free(name_rej);

	name_pri = malloc((strlen(argv[3])+5)*sizeof(char));
	strcpy(name_pri,argv[3]);
	out_pri = fopen(strcat(name_pri,".pri"),"w");		//open out_pri
	if(out_trg == NULL)
		printerr("cannot create .pri file");
	free(name_pri);

    name_inf = malloc((strlen(argv[3])+9)*sizeof(char));
    strcpy(name_inf,argv[3]);
    out_inf = fopen(strcat(name_inf,"_rej.txt"),"w");       //open out_inf
    if(out_inf == NULL)
        printerr("cannot create .txt file");
    free(name_inf); 
    
    /*fill trgStat[]*/
	trgStat = (double *)malloc(nstats*sizeof(double));
	for(cstat=0;cstat<nstats;++cstat)
		fscanf(out_trg,"%lf",&trgStat[cstat]);

    /*get the starting time*/
    time( &startClock );                    // Get time in seconds
    startTime = localtime( &startClock );   // Convert time to struct tm form 

	printf("\n\n--\nCheck integrity of .dat:\n");
	printf("\nStart checking...\n");

	/*Verify if the data file is well build and get the number of geneological trees simulated*/
	cline=0;
	while(1){
		for(cparam=0 ; cparam<nparam ; ++cparam){
			i = fscanf(out_dat,"%lf",&d3);
			if(i == EOF){
				if(cparam==0)
					break;
				else
					printerr(".dat file ended prematurely");
			}
		}
		if(cparam==0)
			break;
		for(cstat=0;cstat<nstats;++cstat){
			i = fscanf(out_dat,"%lf",&d3);
			if(i == EOF)
				printerr(".dat file ended prematurely");
		}
		++cline;
		if(cline!=0 && cline%RecLine==0){
			printf("\nLines analysed so far: %d\n",cline);
		}
	}
	printf("\n...total lines analysed: %d\n\n--\n",cline);
	nsim = cline;

	if(nsim*(nparam+nstats)>MAXDATA)
		printerr(".dat file is too big to be analysed");

    nclosest = (int) nsim*tol;

    /*writting first part of output to .txt file*/
    fprintf(out_inf,"Rejection - Mark Beaumont & Joao Lopes\n\n");
    fprintf(out_inf," last updated 01/05/09\n\n");
    fprintf(out_inf,"INPUT AND STARTING INFORMATION\n");
    fprintf(out_inf,"-------------------------------\n");
    fprintf(out_inf,"Data file:                %s\n",argv[1]);
    fprintf(out_inf,"Target file:              %s\n",argv[2]);
    fprintf(out_inf,"\nNumber of used parameters:  %d\n",nparam);
    fprintf(out_inf,"Number of used summstats: %d\n",nstats);
    fprintf(out_inf,"\nOUTPUT FILES:\n");
    fprintf(out_inf,"-------------------------------\n");
    fprintf(out_inf,"Output file:            %s.rej\n",argv[3]);
    fprintf(out_inf,"Priors file:            %s.pri\n",argv[3]);
    fprintf(out_inf,"\nRUN INFORMATION\n");
    fprintf(out_inf,"---------------------------\n");
    fprintf(out_inf,"Number of points analysed: %d\n",nsim);
    fprintf(out_inf,"relative tolerance: %g\n",tol);
    fprintf(out_inf,"absolute tolerance: %d\n\n",nclosest);
    fprintf(out_inf,"             average      stdev        target \n\n");

	printf("Getting the average and the stdev of the sumStats\n");
    printf("           average    stdev      target \n\n");

	/*fill stats[][]*/
	stats = (double **)malloc(nstats*sizeof(double *));
	for(cstat=0;cstat<nstats;++cstat)
		stats[cstat] = (double *)malloc(nsim*sizeof(double));
	fseek(out_dat,0,SEEK_SET);
	for(csim=0;csim<nsim;++csim){
		for(cparam=0 ; cparam<nparam ; ++cparam){
			fscanf(out_dat,"%lf",&d1);
		}
		for(cstat=0 ; cstat<nstats ; ++cstat){
			fscanf(out_dat,"%lf",&stats[cstat][csim]);
		}
	}

	/*get the average and the variance of each summStat and normalize it ((X-average)/sdev)*/
	/*normalize target summary statistics*/
	ave = (double *)malloc(nstats*sizeof(double));
	sdev = (double *)malloc(nstats*sizeof(double));
	for(cstat=0;cstat<nstats;++cstat){
		mom(stats[cstat],nsim,&ave[cstat],&sdev[cstat],&d3,&d4,&d5,&d6);
		printf("\nsumstat%d  %lf %lf    %lf\n",cstat+1,ave[cstat],sdev[cstat],trgStat[cstat]);
		fprintf(out_inf,"\nsumstat%d  %10.4g %10.4g    %10.4g\n",cstat+1,ave[cstat],sdev[cstat],trgStat[cstat]);
        for(csim=0;csim<nsim;++csim){
			if(sdev[cstat]==0)
				stats[cstat][csim]=0;
			else
				stats[cstat][csim] = (stats[cstat][csim] - ave[cstat])/sdev[cstat];
		}
		if(sdev[cstat]==0)
			trgStat[cstat]=0;
		else
			trgStat[cstat] = (trgStat[cstat] - ave[cstat])/sdev[cstat];
	}

	printf("\n--\n");
	printf("Ordering and choosing the closest simulated sumStats (%d) to the \"real\" sumStats:\n",nclosest);
	printf("\nObtaining the points distances...");
	fflush(NULL);

	/*get the Euclidian distance between each simulated summary statistics and the target(empirical) summary statistics */
	dist = (double *)malloc(nsim*sizeof(double));
	for(csim=0;csim<nsim;++csim){
		dist[csim] = 0;
		for(cstat=0;cstat<nstats;++cstat){
			dist[csim] += (stats[cstat][csim] - trgStat[cstat])*(stats[cstat][csim] - trgStat[cstat]);
		}
		dist[csim] = sqrt(dist[csim]);
	}

	/*sort the elements position of dist to index*/
	index = (int *)malloc(nsim*sizeof(int));
	dsorti('a',nsim,dist,index);

    /*free memory 1*/
    free(trgStat);
    free(dist);

	printf(" done");
	printf("\nAllocate the points information in the memory...");
	fflush(NULL);

	/*get the sumstats of the closest points*/
	fstats = (double **)malloc(nstats*sizeof(double *));
	for(cstat=0;cstat<nstats;++cstat)
		fstats[cstat] = (double *)malloc(nclosest*sizeof(double));
	for(csim=0 ; csim<nclosest ; ++csim){
		isim = index[csim];
		for(cstat=0;cstat<nstats;++cstat)
			fstats[cstat][csim] = stats[cstat][isim]*sdev[cstat] + ave[cstat];
	}

	/*free memory 2*/
	for(cstat=0;cstat<nstats;++cstat)
		free(stats[cstat]);
	free(stats);

	/*fill params[][]*/
	params = (double **)malloc(nparam*sizeof(double *));
	for(cparam=0;cparam<nparam;++cparam)
		params[cparam] = (double *)malloc(nsim*sizeof(double));
	fseek(out_dat,0,SEEK_SET);
	for(csim=0;csim<nsim;++csim){
		for(cparam=0 ; cparam<nparam ; ++cparam){
			fscanf(out_dat,"%lf",&params[cparam][csim]);
		}
		for(cstat=0 ; cstat<nstats ; ++cstat){
			fscanf(out_dat,"%lf",&d1);
		}
	}

	/*fill iparam[][]*/
	if(MAXNPRIOR > nsim)
		npriors = nsim;
	else
		npriors = MAXNPRIOR;
	iparams = (double **)malloc(nparam*sizeof(double *));
	for(cparam=0;cparam<nparam;++cparam)
		iparams[cparam] = (double *)malloc(npriors*sizeof(double));
	fseek(out_dat,0,SEEK_SET);

	gap=nsim/npriors;
	for(i=0,csim=0; csim<npriors; ++csim){
		i = csim*gap;
		for(cparam=0 ; cparam<nparam ; ++cparam){
			iparams[cparam][csim] = params[cparam][i];
		}
	}

	/*prints a random sample of the parameters to a file*/
	for(csim=0;csim<npriors;++csim){
		for(cparam=0;cparam<nparam;++cparam)
			fprintf(out_pri,"%g ",iparams[cparam][csim]);
		fprintf(out_pri,"\n");
	}

	/*free memory 3*/
	for(cparam=0;cparam<nparam;++cparam)
		free(iparams[cparam]);
	free(iparams);

    printf(" done");
    printf("\nGetting the closest points to the target file...");
    fflush(NULL);

	/*get the params of the closest points*/
	fparams = (double **)malloc(nparam*sizeof(double *));
	for(cparam=0;cparam<nparam;++cparam)
		fparams[cparam] = (double *)malloc(nclosest*sizeof(double));
	for(csim=0 ; csim<nclosest ; ++csim){
		isim = index[csim];
		for(cparam=0;cparam<nparam;++cparam)
			fparams[cparam][csim] = params[cparam][isim];
	}

	/*free memory 4*/
	free(ave);
	free(sdev);
	free(index);
	for(cparam=0;cparam<nparam;++cparam)
		free(params[cparam]);
	free(params);

	printf(" done");
	printf("\nPrinting the points to a file...");
	fflush(NULL);

	/*prints the nearest series of simulated parameters to the empirical data to a file*/
	for(csim=0 ; csim<nclosest ; ++csim){
		for(cparam=0;cparam<nparam;++cparam){
			fprintf(out_rej,"%g ", fparams[cparam][csim]);
		}
		for(cstat=0;cstat<nstats;++cstat){
			fprintf(out_rej,"%g ",fstats[cstat][csim]);
		}
		fprintf(out_rej,"\n");
	}

	printf(" done\n\n--\n\n");

	/*free memory 5*/
	for(cstat=0;cstat<nstats;++cstat)
		free(fstats[cstat]);
	free(fstats);
	for(cparam=0;cparam<nparam;++cparam)
		free(fparams[cparam]);
	free(fparams);

    /*get ending time and save it to .txt file*/
    fprintf(out_inf,"\nStarting date: %s",asctime(startTime));
    time( &endClock );                  // Get time in seconds
    endTime = localtime( &endClock );   // Convert time to struct tm form 
    fprintf(out_inf,"Ending date:   %s\n",asctime(endTime));
    fprintf(out_inf,"\nEND OF OUTPUT\n");

	/*close files*/
	fclose(out_dat);
	fclose(out_trg);
    fclose(out_inf);
	fclose(out_rej);
	fclose(out_pri);

}	//end of main
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API