Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision e6a0334445b5755bb52a0d2209120ee4e251e7b4 authored by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC, committed by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC
First commit
0 parent
  • Files
  • Changes
  • af1b4d8
  • /
  • src
  • /
  • maketarget.c
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:e6a0334445b5755bb52a0d2209120ee4e251e7b4
directory badge
swh:1:dir:a574a6b710136758a352d12c14eaa4a6f2cefb24
content badge
swh:1:cnt:9d5bce9ceccfc4009595a2b9255181e1f9edc589

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
maketarget.c
/*
	@author:	joao lopes
	@workplace: Reading University
	@date: 		1th May 2009
	
	NBB - based on Mark's make_target.c and make_targetS.c
*/

#include "maketarget.h"

/*
	This funtion uses a freq_tab_length file and creates a output file which contains
	the summary statistics of the given file and its sample sizes
	
	@arg input filename (*.len)
	@arg input filename (*.sst)
	@arg output filename (*.trg AND *.szz)
*/
int main(int argc, char* argv[]){
	int cloc, cpop, cpop2, cdna, i,	//iterators
		dump,					//variable just to put the information from data that isn't necessary, dump
		started,				//auxiliar to help print sstats to the .txt file
		foundSTR,				//check if STR's are present
		foundSNP,				//check if SNP's are present
		nstats_aux,				//auxiliar to calculate the number of summ stats
		nstats,					//number of summ stats
		outsize,				//size of output file name
		sum,					//sample size in one population
		nloc,					//number of loci
		npop,					//number of populations
		ndna,					//number of all the diferent haplotypes
		lsstats[MAXSSTATS];	//list of used summary statistics
	char c1,					//auxiliar
		 *ltype,				//DNA type per loci
		 *outline,				//store the target data summary statistic
		 *name_inf,             //filename of the report file
 		 *name_trg,				//name of the output.tfq file
		 *lsstats2[MAXSSTATS] = {"H","varL","k_M","curL","sH_M","NmH","pi","S","k_S","sH_S","avMFS","sdMFS","NmS","privS","S(1)"},
		 *name_ssz;				//name of the output.out file
	FILE *inp_len,				//pntr to data file
		 *inp_sst,				//pntr to data file
         *out_inf,              //pntr to the report file
		 *out_trg,				//pntr to target output file
		 *out_ssz;				//pntr to samp_size output file
	time_t startClock,			//time_t when the program starts
		   endClock;	 		//time_t when the program ends
	struct data block,			//target data
				*data;			//pntr to target data	
	const struct tm *startTime,	//struct time when the program starts
					*endTime;	//struct time when the program ends

	if(argc != 4)
		printerr("needs .len file, .sst file, output filename (no extension)");
		
	inp_len = fopen(argv[1],"r");						//input filename
	if(inp_len == NULL)
		printerr("cannot open .len file");
	inp_sst = fopen(argv[2],"r");			//input .sst
	if(inp_sst == NULL)
		printerr("cannot open .sst file");
	outsize = strlen(argv[3]) + 5;
	name_trg = (char *)malloc(outsize*sizeof(char));
	name_ssz = (char *)malloc(outsize*sizeof(char));
    name_inf = (char *)malloc((outsize+4)*sizeof(char));
	strcpy(name_trg,argv[3]);
	strcpy(name_ssz,argv[3]);
    strcpy(name_inf,argv[3]);
	out_trg = fopen(strcat(name_trg,".trg"),"w");	//out_trg filename
	if(out_trg == NULL)
		printerr("cannot create .trg file");
	out_ssz = fopen(strcat(name_ssz,".ssz"),"w");	//out_sps filename
	if(out_ssz == NULL)
		printerr("cannot create .ssz file");
    out_inf = fopen(strcat(name_inf,"_trg.txt"),"w");       //open out_inf
    if(out_inf == NULL)
        printerr("cannot create .txt file");

	for(i=0; i<MAXSSTATS; i++){
		while(getc(inp_sst)!='#');	
		if(EOF == fscanf(inp_sst,"%d",&lsstats[i]))
			printerr("error reading .sst file");
	}
	fclose(inp_sst);
	/*check for errors when choosing summary statistics*/
	if(npop==1 && (lsstats[5] || lsstats[12] || lsstats[13] || lsstats[14]))
		printerr("Following sstats can only be chosen with 2 or more pops: Nm_H; Nm_S; privS; S(1)");
	//if we want to use the Nm_H we have to calculate H
	if(lsstats[5]== 1)
		lsstats[0] = 1;
	//if we want to use the sdMFS we have to calculate mMFS
	if(lsstats[11]== 1)
		lsstats[10] = 1;
	//if we want to use the mMFS, Nm_S, privateS or S(1) we have to calculate S
	if(lsstats[10]== 1 || lsstats[12]== 1 || lsstats[13]== 1 || lsstats[14]== 1 )
		lsstats[7] = 1;

	time( &startClock );   					// Get time in seconds
	startTime = localtime( &startClock );  	// Convert time to struct tm form 
	
	/*if # (commentary) run to end of line*/
	c1=getc(inp_len);
	while(c1 == '#'){
		while(!isendline(c1 = getc(inp_len)));
		while(isspace(c1=getc(inp_len))||isendline(c1)||c1=='\t');
	}
	ungetc(c1,inp_len);
	
	fscanf(inp_len,"%d %d",&npop,&nloc);

	ltype = (char*)malloc(1+nloc*sizeof(char));
	foundSTR = 0;
	foundSNP = 0;	
	for(cloc=0 ; cloc<nloc ; cloc++){
		fscanf(inp_len,"%s",&ltype[cloc]);
		if(ltype[cloc]=='m'||ltype[cloc]=='M')
			foundSTR = 1;
		if(ltype[cloc]=='s'||ltype[cloc]=='S')
			foundSNP = 1;
	}

	data = &block;			
	data->npop = npop;
	data->nloc = nloc;
	data->ldna = (int *)malloc(nloc*sizeof(int));
	data->tsamp = (int *)malloc(nloc*sizeof(int));		
	data->nsamp = (int **)malloc(nloc*sizeof(int *));
	data->freq = (int ***)malloc(nloc*sizeof(int **));

	if(foundSTR)
		data->valM = (int **)malloc(nloc*sizeof(int *));
	if(foundSNP){
		data->lsites = (int *)malloc(nloc*sizeof(int));
		data->valS = (char ***)malloc(nloc*sizeof(char **));
	}
		
	/*run through every locus*/
	for(cloc=0;cloc<nloc;++cloc){
		data->freq[cloc] = (int **)malloc(npop*sizeof(int *));
		data->nsamp[cloc] = (int *)malloc(npop*sizeof(int));
		data->tsamp[cloc]=0;
		if(foundSNP)
			data->lsites[cloc]=0;
		if(ltype[cloc]=='m'||ltype[cloc]=='M'){
			fscanf(inp_len,"%d",&ndna);
			data->ldna[cloc] = ndna;
			data->valM[cloc] = (int *)malloc(ndna*sizeof(int));
			
			//run through every population
			for(cpop=0;cpop<npop;++cpop){
				sum = 0;
				data->freq[cloc][cpop] = (int *)malloc(ndna*sizeof(int));
				for(cdna=0;cdna<ndna;++cdna){
					fscanf(inp_len,"%d",&(data->freq[cloc][cpop][cdna]));
					sum += data->freq[cloc][cpop][cdna];
				}
				data->nsamp[cloc][cpop] = sum;
				data->tsamp[cloc]+=sum;	
				fprintf(out_ssz,"%d ",sum);		//out_sps: sum						
			}
			fprintf(out_ssz,"\n");

			for(cdna=0;cdna<ndna;++cdna)
				fscanf(inp_len,"%d",&(data->valM[cloc][cdna]));
		}
		else{
			fscanf(inp_len,"%d",&ndna);
			data->ldna[cloc] = ndna;
			data->valS[cloc] = (char **)malloc(ndna*sizeof(char *));
		
			//run through every population
			for(cpop=0;cpop<npop;++cpop){
				sum = 0;
				data->freq[cloc][cpop] = (int *)malloc(ndna*sizeof(int));
				for(cdna=0;cdna<ndna;++cdna){
					fscanf(inp_len,"%d",&(data->freq[cloc][cpop][cdna]));
					sum += data->freq[cloc][cpop][cdna];
				}
				data->nsamp[cloc][cpop] = sum;
				data->tsamp[cloc]+=sum;	
				fprintf(out_ssz,"%d ",sum);		//NT- out_sps: sum						
			}
			fprintf(out_ssz,"\n");
						
			for(cdna=0;cdna<ndna;++cdna)
				fscanf(inp_len,"%d",&dump); 
	
			fscanf(inp_len,"%d",&(data->lsites[cloc]));
			if(data->lsites[cloc]!=0)
				for(cdna=0;cdna<ndna;++cdna){
					data->valS[cloc][cdna] = (char *)malloc((data->lsites[cloc]+1)*sizeof(char));
					fscanf(inp_len,"%d %s ",&dump,data->valS[cloc][cdna]);
				}
			else
				printerr("use of Sequence data locus without information, this should be discarded");
		}
	}

	/*counting the number of summary statistics*/
	if(npop>1){
		//applied to each population and to the populations pooled together
		nstats_aux = 0;
		if(foundSTR)
			for(i=0 ; i<MAXSSTATS_M ; i++){
				if((i==0||i==1||i==2||i==3||i==4) && lsstats[i])
					nstats_aux++;
			}
		if(foundSNP)
			for(i=MAXSSTATS_M ; i<MAXSSTATS ; i++){
				if((i==6||i==7||i==8||i==9) && lsstats[i])
					nstats_aux++;
			}
		nstats = npop*nstats_aux+(combinations(npop,2))*nstats_aux;	
		//applied only to each population
		nstats_aux = 0;
		if(foundSTR)
			for(i=0 ; i<MAXSSTATS_M ; i++){
				if((i==5) && lsstats[i])
					nstats_aux++;
			}
		if(foundSNP)
			for(i=MAXSSTATS_M ; i<MAXSSTATS ; i++){
				if((i==10||i==11||i==12||i==13||i==14) && lsstats[i])
					nstats_aux++;
			}
		nstats += npop*nstats_aux;
	}
	else{
		nstats = 0;
		if(foundSTR){
			for(i=0 ; i<MAXSSTATS_M ; i++){
				if(lsstats[i])
					nstats++;
			}
		}
		if(foundSNP){
			for(i=MAXSSTATS_M ; i<MAXSSTATS ; i++){
				if(lsstats[i])
					nstats++;
			}
		}
	}

	/*writting first part of output to .txt file*/
	fprintf(out_inf,"SummData - Mark Beaumont & Joao Lopes\n\n");
	fprintf(out_inf," last updated 01/05/09\n\n");
	fprintf(out_inf,"INPUT AND STARTING INFORMATION\n");
	fprintf(out_inf,"-------------------------------\n");
	fprintf(out_inf,"Table file :            %s\n",argv[1]);
	fprintf(out_inf,"SummaryStatistics file: %s\n\n",argv[2]);
	if(foundSTR && !foundSNP){
		fprintf(out_inf,"Type of genetic data analysed: microssatelites\n\n");
		fprintf(out_inf,"Number of summary statistics:  %d\n",nstats);
		if(lsstats[0])
			fprintf(out_inf," -heterozygosity\n"); 
		if(lsstats[1])
			fprintf(out_inf," -variance of alleles length\n"); 
		if(lsstats[2])
			fprintf(out_inf," -number of alleles\n"); 
		if(lsstats[3])
			fprintf(out_inf," -curtosis of alleles length\n"); 
		if(lsstats[4])
			fprintf(out_inf," -Shanon's index\n"); 
		if(lsstats[5])
			fprintf(out_inf," -Nm estimator based on H\n"); 
		fprintf(out_inf,"\nMutation model used: single-step model\n\n");
	}
	else if(foundSNP && !foundSTR){
		fprintf(out_inf,"Type of genetic data analysed: dna sequency\n\n");
		fprintf(out_inf,"Number of summary statistics:  %d\n",nstats);
		if(lsstats[6])
			fprintf(out_inf," -mean of pairwise differences\n"); 
		if(lsstats[7])
			fprintf(out_inf," -number of segregating sites\n"); 
		if(lsstats[8])
			fprintf(out_inf," -number of haplotypes\n"); 
		if(lsstats[9])
			fprintf(out_inf," -Shanon's index\n"); 
		if(lsstats[10])
			fprintf(out_inf," -mean of MFS\n"); 
		if(lsstats[11])
			fprintf(out_inf," -stdev of MFS\n"); 
		if(lsstats[12])
			fprintf(out_inf," -Nm estimator based on S\n"); 
		if(lsstats[13])
			fprintf(out_inf," -private S\n"); 
		if(lsstats[14])
			fprintf(out_inf," -S(1)\n"); 
		fprintf(out_inf,"\nMutation model used: infinite-segregated-sites model\n\n");
	}
	else{
		fprintf(out_inf,"Type of genetic data analysed: microssatelites\n");
		fprintf(out_inf,"                               dna sequency\n\n");
		fprintf(out_inf,"Number of summary statistics:  %d\n",nstats);
		fprintf(out_inf," Microssatelites:\n");
		if(lsstats[0])
			fprintf(out_inf," -heterozygosity\n"); 
		if(lsstats[1])
			fprintf(out_inf," -variance of alleles length\n"); 
		if(lsstats[2])
			fprintf(out_inf," -number of alleles\n"); 
		if(lsstats[3])
			fprintf(out_inf," -curtosis of alleles length\n"); 
		if(lsstats[4])
			fprintf(out_inf," -Shanon's index\n"); 
		if(lsstats[5])
			fprintf(out_inf," -Nm estimator based on H\n"); 
		fprintf(out_inf," Sequence data:\n");
		if(lsstats[6])
			fprintf(out_inf," -mean of pairwise differences\n"); 
		if(lsstats[7])
			fprintf(out_inf," -number of segregating sites\n"); 
		if(lsstats[8])
			fprintf(out_inf," -number of haplotypes\n"); 
		if(lsstats[9])
			fprintf(out_inf," -Shanon's index\n"); 
		if(lsstats[10])
			fprintf(out_inf," -mean of MFS\n"); 
		if(lsstats[11])
			fprintf(out_inf," -stdev of MFS\n"); 
		if(lsstats[12])
			fprintf(out_inf," -Nm estimator based on S\n"); 
		if(lsstats[13])
			fprintf(out_inf," -private S\n"); 
		if(lsstats[14])
			fprintf(out_inf," -S(1)\n"); 
		fprintf(out_inf,"\nMutation model used: \n");
		fprintf(out_inf,"  Microssatelites: single-step model\n");
		fprintf(out_inf,"  Sequence data: infinite-segregated-sites model\n\n");	
	}
	fprintf(out_inf,"Number of Modern Populations:  %d\n",npop);
	fprintf(out_inf,"Number of Loci analysed:       %d\n",nloc);
	fprintf(out_inf,"Type of the loci analysed:     ");
	for(cloc=0 ; cloc<nloc ; cloc++)
		fprintf(out_inf,"%c ",ltype[cloc]);
	fprintf(out_inf,"\n\nOUTPUT FILES:\n");
	fprintf(out_inf,"-------------------------------\n");
	fprintf(out_inf,"Sample size file:       %s.ssz\n",argv[3]);
	fprintf(out_inf,"Target file:            %s.trg\n",argv[3]);
	fprintf(out_inf,"%d summstats [",nstats);
	started=0;
	if(foundSTR)
		for(i=0;i<MAXSSTATS_M;i++){
			if(lsstats[i])
				for(cpop=0; cpop<npop; cpop++)
					if(started)
						fprintf(out_inf,"|%s%d",lsstats2[i],cpop+1);
					else{
						fprintf(out_inf,"%s%d",lsstats2[i],cpop+1);
						started=1;
					}
					
		}
	if(foundSTR)
		for(i=0;i<MAXSSTATS_M;i++){
			if((i==0||i==1||i==2||i==3||i==4||i==6||i==7||i==8||i==9) && lsstats[i])
				for(cpop = 0;cpop <npop;++cpop){
					for(cpop2 = cpop+1;cpop2 < npop;++cpop2){
						if(started)
							fprintf(out_inf,"|%s%d-%d",lsstats2[i],cpop+1,cpop2+1);
						else{
							fprintf(out_inf,"%s%d-%d",lsstats2[i],cpop+1,cpop2+1);
							started=1;
						}
					
					}
				}
		}
	if(foundSNP)
		for(i=MAXSSTATS_M;i<MAXSSTATS;i++){
			if(lsstats[i])
				for(cpop=0; cpop<npop; cpop++)
					if(started)
						fprintf(out_inf,"|%s%d",lsstats2[i],cpop+1);
					else{
						fprintf(out_inf,"%s%d",lsstats2[i],cpop+1);
						started=1;
					}
					
		}
	if(foundSNP)
		for(i=MAXSSTATS_M;i<MAXSSTATS;i++){
			if((i==0||i==1||i==2||i==3||i==4||i==6||i==7||i==8||i==9) && lsstats[i])
				for(cpop = 0;cpop <npop;++cpop){
					for(cpop2 = cpop+1;cpop2 < npop;++cpop2){
						if(started)
							fprintf(out_inf,"|%s%d-%d",lsstats2[i],cpop+1,cpop2+1);
						else{
							fprintf(out_inf,"%s%d-%d",lsstats2[i],cpop+1,cpop2+1);
							started=1;
						}
					
					}
				}
		}
		
	fprintf(out_inf,"]\n\nRUNTIME:\n");
	fprintf(out_inf,"-------------------------------\n");
	fprintf(out_inf,"Starting date: %s",asctime(startTime));

	
	outline = malloc(nstats*15*sizeof(char));
	
	summStats(data,lsstats,outline,foundSTR,foundSNP,ltype);
	
	fprintf(out_trg,"%s",outline);			//out_trg: outline

	/*get ending time and save it to .txt file*/
	time( &endClock );   				// Get time in seconds
	endTime = localtime( &endClock );  	// Convert time to struct tm form 
	fprintf(out_inf,"Ending date:   %s\n",asctime(endTime));
	fprintf(out_inf,"\nEND OF OUTPUT\n");

	/*free stuff*/
	free(outline);
	free(name_trg);
	free(name_ssz);
    free(name_inf); 
	
	for(cloc=0 ; cloc<nloc ; cloc++){
		if(ltype[cloc]=='M'||ltype[cloc]=='m')
			free(data->valM[cloc]);
		else{
			for(cdna=0; cdna<data->ldna[cloc]; cdna++)
					free(data->valS[cloc][cdna]);
				free(data->valS[cloc]);
		}
	}
	
	if(foundSTR)
		free(data->valM);
	if(foundSNP){
		free(data->valS);
		free(data->lsites);
	}
	
	for(cloc=0 ; cloc<nloc ; cloc++){
		for(cpop=0 ; cpop<npop ; cpop++)
			free(data->freq[cloc][cpop]);
		free(data->freq[cloc]);
		free(data->nsamp[cloc]);
	}
	free(data->freq);
	free(data->nsamp);
	free(data->tsamp);
	free(data->ldna);
	free(ltype);
	
	/*close files*/
	fclose(inp_len);
    fclose(out_inf);
	fclose(out_trg);
	fclose(out_ssz);
	
}//end of main
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API