Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision e6a0334445b5755bb52a0d2209120ee4e251e7b4 authored by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC, committed by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC
First commit
0 parent
  • Files
  • Changes
  • af1b4d8
  • /
  • src
  • /
  • pop_convertabc4.c
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:e6a0334445b5755bb52a0d2209120ee4e251e7b4
directory badge
swh:1:dir:a574a6b710136758a352d12c14eaa4a6f2cefb24
content badge
swh:1:cnt:5ab31538473ce08dc4ce00d97e31bf800262a7e3

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
pop_convertabc4.c
/*
	@author:	joao lopes
	@workplace: Reading University
	@date: 		8th May 2009

*/

#include "interface.h"

/*
	It creates an ABC .len file given a Nexus file (.nex)
	
	@arg length format filename
	@arg output filename
*/
int createFreqTab4(char *input,char *output){
	int cloc, cpop, cdna, csamp, csite, i, j,	//iterators
		nr1,					//retrieve info from input file
		nr2,					//retrieve info from input file
		reachEOF,				//check if EOF has been reached
		endSets,				//check if end of Sets has been reached
		endPop,					//check if end of pop has been reached
		firstInd,				//check if using '-' to define populations
		equalDna,				//auxiliar to build the .len file
		outsize,				//size of output file name 
  		npop,					//number of populations
  		nsites,					//number of bases
  		ndna,					//number of different dna data
		nsamp,					//total number of samples
		*lsamp,					//size of sample per pop	
		*id,					//identify the belonging population of each used individual
		 **freq;				//freq of dna data by no_pop by no. of diff dna data
	char c1,					//gets the value of a char temporarly
		 info1[MAXCHAR],		//retrieve info from the input file
		 info2[MAXCHAR],		//retrieve info from the input file
		 aux[MAXCHAR],			//auxiliar
		 *outname,				//name of the output file 
		 *mainseq,				//main sequence of a loci
		 **lpop=NULL,			//list of the populations names
		 **indiv,				//list of samples
		 **valS,				//list of all the diff dna sequencies
		 ***genotS;				//list of all the individual's haplotype per pop per loci 
	FILE *inp,					//pntr to the input file
		 *outp;					//pntr to the output file
	time_t startClock;			//time when the program starts
	const struct tm *startTime;	//struct time when the program starts

	inp = fopen(input,"r");
	if(inp == NULL)
		return 1; //cannot open .nex file
	
	outsize = strlen(output) + 5;
	outname = (char *)malloc(outsize*sizeof(char));
	strcpy(outname,output);
	outp = fopen(strcat(outname,".len"),"w");
	if(outp == NULL)
		return 2; //cannot create output file

	time( &startClock );   					// Get time in seconds
	startTime = localtime( &startClock );  	// Convert time to struct tm form 

	fgets(aux,MAXCHAR,inp);					//#NEXUS
	
	reachEOF=0;
	while(!reachEOF){
		//reads input file
		while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
		if(c1 == '[')
			while((c1 = getc(inp))!=']');
		else if(c1==EOF)
			reachEOF=1;
		else
			ungetc(c1,inp);
		if(!reachEOF){
			while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
			ungetc(c1,inp);
			fscanf(inp,"%s",aux);
			if(strcmp(aux,"begin")==0||strcmp(aux,"BEGIN")==0||strcmp(aux,"Begin")==0){
			   	fscanf(inp,"%s",aux);
				if(strcmp(aux,"data;")==0||strcmp(aux,"DATA;")==0||strcmp(aux,"Data;")==0){
				   	fscanf(inp,"%s",aux);		//DIMENSIONS
				   	fscanf(inp,"%s",aux);
					i=0; j=0;
				   	while(!isdigit(aux[i]))
				   		i++;
				   	while(isdigit(aux[i+j])){
				   		info1[j]=aux[i+j];
				   		j++;
				   	}
				   	nsamp=atoi(info1);
				   	fscanf(inp,"%s",aux);
					i=0; j=0;
				   	while(!isdigit(aux[i]))
				   		i++;
				   	while(isdigit(aux[i+j])){
				   		info1[j]=aux[i+j];
				   		j++;
				   	}
				   	nsites=atoi(info1);
				   	fscanf(inp,"%s",aux);		//FORMAT
				   	fscanf(inp,"%s",aux);
					i=0; j=0;
				   	while(aux[i]!='=')
				   		i++;
				   	while(isalpha(aux[1+i+j])){
				   		info1[j]=aux[1+i+j];
				   		j++;
				   	}
				   	info1[j]='\0';
				   	if(!(strcmp(info1,"DNA")==0||strcmp(info1,"dna")==0||strcmp(info1,"Dna")==0))
				   		return 3; //program can't deal with specified data
					
					//MATRIX: go through all the individuals
				   	fscanf(inp,"%s",aux);		//MATRIX
					indiv=(char**)malloc(nsamp*sizeof(char*));
				   	for(csamp=0; csamp<nsamp; csamp++)
						indiv[csamp]=(char*)malloc((nsites+1)*sizeof(char));
					csamp=0;
					while(csamp<nsamp){
						while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
						if(c1 == '[')
							while((c1 = getc(inp))!=']');
						else	
							ungetc(c1,inp);
						while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
						if(c1 == '[')
							ungetc(c1,inp);
						else{
							ungetc(c1,inp);
							fscanf(inp,"%s",aux);
							while(isspace(c1=getc(inp))||c1=='\t');
							ungetc(c1,inp);
							csite=0;
							while(csite<nsites){
								if((c1=getc(inp))=='T'||c1=='A'||c1=='C'||c1=='G'||
								               c1=='t'||c1=='a'||c1=='c'||c1=='g'||c1=='N'||c1=='-'){
									indiv[csamp][csite]=c1;
									csite++;
								}
								else if(isspace(c1)||isendline(c1)||c1=='\t'){
									while(isspace(c1=getc(inp))||c1=='\t');
									ungetc(c1,inp);
								}
								else
									return 4; //problem reading MATRIX
							}
							csamp++;
						}
					}
					while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
					ungetc(c1,inp);
					fscanf(inp,"%s",aux);		//;
					fscanf(inp,"%s",aux);		//END;	
				}
				else if(strcmp(aux,"sets;")==0||strcmp(aux,"SETS;")==0||strcmp(aux,"Sets;")==0){
				   	id=(int*)malloc(nsamp*sizeof(int));
				   	for(csamp=0; csamp<nsamp; csamp++)
						id[csamp]=-1;
					//SETS: go through all the populations
				    cpop=0;
				   	endSets=0;
				   	while(!endSets){
				   		fscanf(inp,"%s",aux);
				   		if(strcmp(aux,"end;")==0||strcmp(aux,"END;")==0||strcmp(aux,"End;")==0){
				   			endSets=1;
				   		}
				   		else if(strcmp(aux,"TAXSET")==0||strcmp(aux,"taxset")==0||strcmp(aux,"Taxset")==0){
							lpop=(char**)myAlloc(lpop,(cpop+1)*sizeof(char*));
					   		lpop[cpop]=(char*)malloc(MAXCHAR*sizeof(char));
					   		fscanf(inp,"%s",aux);
							strcpy(lpop[cpop],aux);
							fscanf(inp,"%s",aux);	//=
							//get individuals belonging to pop
							endPop=0;
							while(!endPop){
								fscanf(inp,"%s",aux);
								i=0;
								firstInd=1;
								while(i<strlen(aux)){
									if(aux[i]==';'){
										endPop=1;
										i++;
									}							
									else if(aux[i]=='-'){
										firstInd=0;
										i++;
									}							
									else if(isdigit(aux[i])&&firstInd){
										j=0;
										while(isdigit(aux[i])){
											info1[j]=aux[i];
											i++;
											j++;
										}
										info1[j]='\0';
										nr1=atoi(info1);
										
									}
									else if(isdigit(aux[i])&&(!firstInd)){
										j=0;
										while(isdigit(aux[i])){
											info2[j]=aux[i];
											i++;
											j++;
										}
										info2[j]='\0';
										nr2=atoi(info2);
									}
									else
										return 5; //problem reading TAXSET
								}
								if(firstInd)
									id[nr1-1]=cpop;
								else{
									if(nr1>=nr2)
										return 6; //problem defining TAXSET: n1>=n2
									for(i=nr1;i<=nr2;i++)
										id[i-1]=cpop;
								}
							}
					   		cpop++;
				   		}
				   		else
				   			return 7; //unrecognized specifier after BEGIN SETS (use TAXSET)
				   	}
				   	npop=cpop;
				}
				else
					return 8; //unrecognized specifier after BEGIN (use DATA or SETS)
			}
			else if(aux[0]=='['){
				for(i=strlen(aux)-1; i>=0; i--)
					ungetc(aux[i],inp);
			}
			else
				return 9; //unrecognized command (only BEGIN accepted)
		}
	}
	
	
	//fill lsamp and genotS
	genotS = (char***)malloc(npop*sizeof(char**));
	lsamp = (int *)malloc(npop*sizeof(int));
	for(cpop=0; cpop<npop; cpop++){
		lsamp[cpop]=0;
		genotS[cpop] = (char**)malloc(nsamp*sizeof(char*));
		for(csamp=0; csamp<nsamp; csamp++){
			genotS[cpop][csamp] = (char *)malloc(nsites*sizeof(char));
		}
	}
	for(csamp=0; csamp<nsamp; csamp++){
		if(id[csamp]!=-1){
			for(csite=0; csite<nsites; csite++){
				genotS[id[csamp]][lsamp[id[csamp]]][csite] = indiv[csamp][csite];
			}
			lsamp[id[csamp]]++;
		}
	}
		
	//allocate memory to freq, IDsort, valS and ldna
	freq = (int **)malloc(npop*sizeof(int *));
	valS = (char **)malloc(nsamp*sizeof(char *));
	for(cpop=0;cpop<npop;++cpop){
		freq[cpop] = (int *)malloc(nsamp*sizeof(int));
	}	

	//fill freq[cpop][cdna] and valS[cdna][csite]
	ndna=0;
	for(cpop=0;cpop<npop;cpop++){
		//run through every individual		
		for(csamp=0;csamp<lsamp[cpop];++csamp){
			//check if the current haplotype is already listed
			for(cdna=0;cdna<ndna;++cdna){
				equalDna = 1;						
				for(csite=0 ; csite<nsites ; csite++){
					if(genotS[cpop][csamp][csite] != valS[cdna][csite]){
						if(valS[cdna][csite]=='N'||valS[cdna][csite]=='-'){
							valS[cdna][csite] = genotS[cpop][csamp][csite];
						}
						else if(genotS[cpop][csamp][csite]=='A'||genotS[cpop][csamp][csite]=='a'||
								genotS[cpop][csamp][csite]=='T'||genotS[cpop][csamp][csite]=='t'||
						  		genotS[cpop][csamp][csite]=='G'||genotS[cpop][csamp][csite]=='g'||
						  		genotS[cpop][csamp][csite]=='C'||genotS[cpop][csamp][csite]=='c'){
						  	equalDna = 0;
							break;
						}
					}
				}
				if(equalDna){
					++freq[cpop][cdna];	//increase freq of current haplotype
					break;
				}
			}
			//current haplotype is not listed yet
			if(cdna>=ndna){
				for(j=0;j<npop;++j){
					freq[j][ndna] = 0;//initiate as 0 the freq of current haplotype in other populations
				}
				freq[cpop][ndna] = 1;	//initiate as 1 the freq of current haplotype in its population
				
				valS[cdna] = (char *)malloc((nsites+1)*sizeof(char));
				for(csite=0; csite<nsites ; csite++){
					valS[cdna][csite] = genotS[cpop][csamp][csite];		//adds current haplotype to valS[][][]	
				}
				valS[cdna][csite] = '\0';
				++ndna;	//increase by one the number of different haplotypes of a particular locus
			}
		}
	}
	//transforms dna sequences into an array of segregating sites	
	mainseq = (char *)malloc(nsites*sizeof(char));
    for(csite=0; csite<nsites; csite++)
		mainseq[csite] = valS[0][csite];
	for(cdna=0;cdna<ndna;++cdna){
		for(csite=0;csite<nsites;++csite){
			if(valS[cdna][csite]!=mainseq[csite]&&(valS[cdna][csite]=='A'||
												   valS[cdna][csite]=='a'||
												   valS[cdna][csite]=='G'||
												   valS[cdna][csite]=='g'||
												   valS[cdna][csite]=='T'||
												   valS[cdna][csite]=='t'||
												   valS[cdna][csite]=='C'||
												   valS[cdna][csite]=='c')){
				valS[cdna][csite] = '1';
			}
			else{
				valS[cdna][csite] = '0';
			}
		}
	}

	//writes the output file
	fprintf(outp,"# Nexus file converted to .len file with nexus2table1.0\n");
	fprintf(outp,"# input file:  %s\n",input);
	fprintf(outp,"# output file: %s.len\n",output);
	fprintf(outp,"# date: %s#\n",asctime(startTime));
	for(cpop=0;cpop<npop;++cpop){
		fprintf(outp,"#Population %d - %s\n",cpop+1,lpop[cpop]);
	}
	fprintf(outp,"#Locus 1 - locus1\n");
	fprintf(outp,"\n%d\n",npop);												//outp: npop
	fprintf(outp,"1\ns\n\n");	 												//outp: nloc, ltype		
	fprintf(outp,"%d\n",ndna);													//outp: ndna
	for(cpop=0;cpop<npop;++cpop){
		for(cdna=0;cdna<ndna;++cdna){
			fprintf(outp,"%4d ",freq[cpop][cdna]);								//(S)outp: freq[][]
		}
		fprintf(outp,"\n");
	}
	fprintf(outp,"\n");
	for(cdna=0;cdna<ndna;++cdna)
		fprintf(outp,"%4d ",cdna);												//(S)outp: cdna
	fprintf(outp,"\n\n");
	fprintf(outp,"%d \n",nsites);												//(S)outp: nsites
	
	for(cdna=0;cdna<ndna;++cdna)
		fprintf(outp,"%d %s\n",cdna,valS[cdna]);								//(S)outp: cdna, valS[]
	fprintf(outp,"\n");
	
	//free stuff
	for(csamp=0 ; csamp<nsamp ; csamp++){
		free(indiv[csamp]);	
	}
	free(indiv);
	for(cpop=0 ; cpop<npop ; cpop++){
		for(csamp=0 ; csamp<nsamp ; csamp++){
			free(genotS[cpop][csamp]);	
		}
		free(genotS[cpop]);
	}	
	free(genotS);
	for(cpop=0; cpop<npop ; cpop++){
		free(freq[cpop]);
		free(lpop[cpop]);
	}
	free(freq);
	free(lpop);
	for(cdna=0 ; cdna<ndna; cdna++)
		free(valS[cdna]);
	free(valS);
	free(mainseq);
	free(lsamp);
	free(outname);
	free(id);
		
	//close files
	fclose(inp);
	fclose(outp);

	return 0;

} //end of main
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API