Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision e6a0334445b5755bb52a0d2209120ee4e251e7b4 authored by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC, committed by Joao Sollari Lopes on 13 November 2017, 18:32:56 UTC
First commit
0 parent
  • Files
  • Changes
  • af1b4d8
  • /
  • src
  • /
  • pop_convertabc2.c
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:e6a0334445b5755bb52a0d2209120ee4e251e7b4
directory badge
swh:1:dir:a574a6b710136758a352d12c14eaa4a6f2cefb24
content badge
swh:1:cnt:202b02985df08428891ead9f84a1e12b44d49a82

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
pop_convertabc2.c
/*
	@author:	joao lopes
	@workplace: Reading University
	@date: 		6th May 2009

*/

#include "interface.h"

/*
	It creates an ABC .len file given an IM input file.
	
	@arg length format filename
	@arg output filename
*/
int createFreqTab2(char *input,char *output){
	int cloc, cpop, cdna, cindiv, csite, i, j, cmark,	//iterators
		nmark,					//nr. of total markers used
		nloc,					//number of loci
		equalDna,				//auxiliar to build the .len file
		outsize,				//size of output file name 
  		*ldna,					//number of different dna data by loci
		*lmark,					//nr of markers per loci
		*nsamp,					//total number of samples per loci
		**lsamp,				//size of sample per pop per loci	
		***freq;				//freq of dna data by no_pop by no. of diff dna data
	char type,					//type of analysis choosen
		 c1,					//gets the value of a char temporarly
		 aux[MAXCHAR],			//auxiliar
		 pop1[MAXCHAR],			//name of population 1
		 pop2[MAXCHAR],			//name of population 2
		 DNAtype[MAXCHAR],		//current locus mutation model used in IM
		 *ltype,				//DNA type per loci
		 *outname,				//name of the output file 
		 **locname;				//name of the loci
	FILE *inp,					//pntr to the input file
		 *outp;					//pntr to the output file
	time_t startClock;			//time when the program starts
	const struct tm *startTime;	//struct time when the program starts
// only used in microssatelite dna analysis	
	int **valM,					//list of all the diff microsatellite sizes
		**IDsort,				//list of the order of the diff microsatellite sizes
		***genotM2,				//list of all the individual's haplotype per pop per loci
		****genotM;				//list of all the individual's haplotype per pop per loci
// only used in sequence dna analysis
	char **mainseq,				//main sequence of a loci
		 ***valS,				//list of all the diff dna sequencies
		 ****genotS2,			//list of all the individual's haplotype per pop per loci 
		 ****genotS;			//list of all the individual's haplotype per pop per loci 
	int *lsites;				//number of bases per loci

	inp = fopen(input,"r");
	if(inp == NULL)
		return 1; //cannot open sample file
	
	outsize = strlen(output) + 5;
	outname = (char *)malloc(outsize*sizeof(char));
	strcpy(outname,output);
	outp = fopen(strcat(outname,".len"),"w");
	if(outp == NULL)
		return 2; //cannot create output file

	time( &startClock );   					// Get time in seconds
	startTime = localtime( &startClock );  	// Convert time to struct tm form 

	//print first line to output file
	fprintf(outp,"# IMa file converted to .len file with im2table1.0\n");
	fprintf(outp,"# input file:  %s\n",input);
	fprintf(outp,"# output file: %s.len\n",output);
	fprintf(outp,"# date: %s#\n#",asctime(startTime));
	while(!isendline(c1=getc(inp))){
		fprintf(outp,"%c",c1);
	}
	fprintf(outp,"\n");

	//if # (commentary) run to end of line
	c1 = getc(inp);
	while(c1 == '#'){
		while(!isendline(c1 = getc(inp)));
		while(isspace(c1=getc(inp))||isendline(c1)||c1=='\t');
	}
	ungetc(c1,inp);

	fscanf(inp,"%s",&pop1);
	fscanf(inp,"%s",&pop2);
	fscanf(inp,"%d",&nloc);

	//allocate memory to DNAtype, lsamp, locname, genotM or genotS and lsites
	ltype = (char *)malloc(nloc*sizeof(char));
	lmark = (int *)malloc(nloc*sizeof(int));
	lsamp = (int **)malloc(nloc*sizeof(int*));
	nsamp = (int *)malloc(nloc*sizeof(int));
	locname = (char **)malloc(nloc*sizeof(char*));
	lsites = (int *)malloc(nloc*sizeof(int));
	genotM = (int****)malloc(nloc*sizeof(int***));
	genotS = (char****)malloc(nloc*sizeof(char***));
	for(cloc=0;cloc<nloc;cloc++){
		lsamp[cloc] = (int*)malloc(NPOP*sizeof(int));
		locname[cloc] = (char *)malloc(MAXCHAR*sizeof(char));
		genotM[cloc] = (int***)malloc(NPOP*sizeof(int**));
		genotS[cloc] = (char***)malloc(NPOP*sizeof(char**));
	}

	//fill genotM[cloc][cpop][cindiv] or genotS[cloc][cpop][cindiv][csite]
	for(cloc=0;cloc<nloc;cloc++){
		while(isendline(c1=getc(inp)));
		ungetc(c1,inp);
		for(i=0 ; !isspace(c1=getc(inp)) ; i++)
			locname[cloc][i]=c1;
		locname[cloc][i]='\0';
		fscanf(inp,"%d",&lsamp[cloc][0]);
		fscanf(inp,"%d",&lsamp[cloc][1]);
		fscanf(inp,"%d",&lsites[cloc]);
		fscanf(inp,"%s",&DNAtype);
		if(DNAtype[0]=='I'){
			ltype[cloc]='s';
			lmark[cloc]=1;
		}
		else if(DNAtype[0]=='S'){
			ltype[cloc]='m';
			for(i=1; isdigit(DNAtype[i]); i++)
				aux[i-1]=DNAtype[i];
			lmark[cloc]=atoi(aux);
		}
		else
			return 3; //program can't deal with specified data
		while(!isendline(c1=getc(inp)));
		ungetc(c1,inp);
		while(isendline(c1=getc(inp)));
		ungetc(c1,inp);
		
		for(cpop=0 ; cpop<NPOP ; cpop++){
			//choose between diferent analysis		
			if(ltype[cloc] == 'm'){
				genotM[cloc][cpop] = (int **)malloc((lsamp[cloc][cpop])*sizeof(int*));
				for(cindiv=0 ; cindiv<lsamp[cloc][cpop] ; cindiv++){
					genotM[cloc][cpop][cindiv] = (int *)malloc(lmark[cloc]*sizeof(int));
					for(i=0 ; i<=LOCMAXCHAR ; i++)
						c1=getc(inp);
					if(isspace(c1)){
						while(isspace(c1=getc(inp)));
					}
					ungetc(c1,inp);
					for(cmark=0; cmark<lmark[cloc]; cmark++)
						fscanf(inp,"%d",&genotM[cloc][cpop][cindiv][cmark]);
					while(!isendline(c1=getc(inp)));
					ungetc(c1,inp);
					while(isendline(c1=getc(inp)));
					ungetc(c1,inp);
				}
			}
			else{
				genotS[cloc][cpop] = (char **)malloc((lsamp[cloc][cpop])*sizeof(char *));
				for(cindiv=0 ; cindiv<lsamp[cloc][cpop] ; cindiv++){
					genotS[cloc][cpop][cindiv] = (char *)malloc(lsites[cloc]*sizeof(char));
					for(i=0 ; i<=LOCMAXCHAR ; i++)
						c1=getc(inp);
					if(isspace(c1)){
						while(isspace(c1=getc(inp)));
					}
					ungetc(c1,inp);
					for(csite=0 ; csite < lsites[cloc] ; csite++)
						fscanf(inp,"%c",&genotS[cloc][cpop][cindiv][csite]);
					while(!isendline(c1=getc(inp)));
					ungetc(c1,inp);
					while(isendline(c1=getc(inp)));
					ungetc(c1,inp);
				}	
			}
		}
	}
	for(cloc=0; cloc<nloc; cloc++){
		nsamp[cloc]=0;
		for(cpop=0; cpop<NPOP; cpop++){
			nsamp[cloc]+=lsamp[cloc][cpop];
		}
	}
	
	nmark=0;
	for(cloc=0; cloc<nloc; cloc++)
		nmark+=lmark[cloc];

	genotM2=(int***)malloc(nmark*sizeof(int**));
	for(i=0,cloc=0; cloc<nloc; cloc++){
		for(cmark=0; cmark<lmark[cloc]; cmark++){
			if(ltype[cloc]=='m'){
				genotM2[i] = (int**)malloc(NPOP*sizeof(int*));
				for(cpop=0; cpop<NPOP; cpop++){
					genotM2[i][cpop] = (int*)malloc((lsamp[cloc][cpop])*sizeof(int));
					for(cindiv=0; cindiv<lsamp[cloc][cpop]; cindiv++)
						genotM2[i][cpop][cindiv]=genotM[cloc][cpop][cindiv][cmark];
				}
			}
			i++;
		}
	}

	genotS2=(char****)malloc(nmark*sizeof(char***));
	for(i=0,cloc=0; cloc<nloc; cloc++){
		for(cmark=0; cmark<lmark[cloc]; cmark++){
			if(ltype[cloc]=='s'){
				genotS2[i] = (char***)malloc(NPOP*sizeof(char**));
				for(cpop=0; cpop<NPOP; cpop++){
					genotS2[i][cpop] = (char**)malloc((lsamp[cloc][cpop])*sizeof(char*));
					for(cindiv=0; cindiv<lsamp[cloc][cpop]; cindiv++){
						genotS2[i][cpop][cindiv] = (char *)malloc(lsites[cloc]*sizeof(char));
						for(csite=0 ; csite < lsites[cloc] ; csite++)
							genotS2[i][cpop][cindiv][csite]=genotS[cloc][cpop][cindiv][csite];
					}
				}
			}
			i++;
		}
	}
		
	//allocate memory to freq, valM, IDsort, valS and ldna
	valM = (int **)malloc(nmark*sizeof(int *));
	IDsort = (int **)malloc(nmark*sizeof(int *));	
	valS = (char ***)malloc(nmark*sizeof(char **));
	ldna = (int *)malloc((nmark+1)*sizeof(int));
	for(i=0,cloc=0; cloc<nloc ;++cloc){
		for(cmark=0; cmark<lmark[cloc];cmark++){
			ldna[i]=0;
			if(ltype[cloc]=='m'){
				valM[i] = malloc(nsamp[cloc]*sizeof(int));
				IDsort[i] = malloc(nsamp[cloc]*sizeof(int));
			}
			else{
				valS[i] = (char **)malloc((nsamp[cloc]+1)*sizeof(char *));
			}
			i++;	
		}
	}
	freq = (int ***)malloc(NPOP*sizeof(int **));
	for(cpop=0;cpop<NPOP;++cpop){
		freq[cpop] = (int **)malloc(nmark*sizeof(int *));	
		for(i=0,cloc=0; cloc<nloc ;++cloc){
			for(cmark=0; cmark<lmark[cloc];cmark++){
				freq[cpop][i] = (int *)malloc(nsamp[cloc]*sizeof(int));
				i++;
			}
		}
	}	

	//fill freq[cpop][cloc][cdna], valM[cloc][cdna] and IDsort[cloc][cdna]
	for(i=0,cloc=0;cloc<nloc;++cloc){
		for(cmark=0; cmark<lmark[cloc];cmark++){
			if(ltype[cloc]=='m'){
				for(cpop=0;cpop<NPOP;cpop++){
					//run through every individual		
					for(cindiv=0;cindiv<lsamp[cloc][cpop];++cindiv){
						//check if there is information in the current individual			
						if(genotM2[i][cpop][cindiv] == 0){
							continue;
						}
				
						//check if the current haplotype is already listed
						for(cdna=0;cdna<ldna[i];++cdna){
							equalDna = 1;						
						
							if(genotM2[i][cpop][cindiv] != valM[i][cdna])
								equalDna = 0;
							if(equalDna){
								++freq[cpop][i][cdna];	//increase freq of current haplotype
								break;
							}
						}
						//current haplotype is not listed yet
						if(cdna>=ldna[i]){
							for(j=0;j<NPOP;++j){
								freq[j][i][ldna[i]] = 0;	//initiate as 0 the freq of current haplotype in other populations
							}
							freq[cpop][i][ldna[i]] = 1;	//initiate as 1 the freq of current haplotype in its population
							
							valM[i][cdna] = genotM2[i][cpop][cindiv];		//adds current haplotype to valM[][]
							++ldna[i];	//increase by one the number of different haplotypes of a particular locus
						}
					}
				}
			}
			i++;
		}
	}

	//fill freq[cpop][cloc][cdna] and valS[cloc][cdna][csite]
	for(i=0,cloc=0;cloc<nloc;++cloc){
		for(cmark=0; cmark<lmark[cloc];cmark++){
			if(ltype[cloc]=='s'){
				for(cpop=0;cpop<NPOP;cpop++){
					//run through every individual		
					for(cindiv=0;cindiv<lsamp[cloc][cpop];++cindiv){
						//check if the current haplotype is already listed
						for(cdna=0;cdna<ldna[i];++cdna){
							equalDna = 1;						
							for(csite=0 ; csite<lsites[cloc] ; csite++){
								if(genotS2[i][cpop][cindiv][csite] != valS[i][cdna][csite]){
									if(valS[i][cdna][csite]=='N'){
										valS[i][cdna][csite] = genotS2[i][cpop][cindiv][csite];
									}
									else if(genotS2[i][cpop][cindiv][csite]=='A'||genotS2[i][cpop][cindiv][csite]=='a'||
											genotS2[i][cpop][cindiv][csite]=='T'||genotS2[i][cpop][cindiv][csite]=='t'||
									  		genotS2[i][cpop][cindiv][csite]=='G'||genotS2[i][cpop][cindiv][csite]=='g'||
									  		genotS2[i][cpop][cindiv][csite]=='C'||genotS2[i][cpop][cindiv][csite]=='c'){
										equalDna = 0;
										break;
									}
								}
							}
							if(equalDna){
								++freq[cpop][i][cdna];	//increase freq of current haplotype
								break;
							}
						}
						//current haplotype is not listed yet
						if(cdna>=ldna[i]){
							for(j=0;j<NPOP;++j){
								freq[j][i][ldna[i]] = 0;//initiate as 0 the freq of current haplotype in other populations
							}
							freq[cpop][i][ldna[i]] = 1;	//initiate as 1 the freq of current haplotype in its population
							
							valS[i][cdna] = (char *)malloc((lsites[cloc]+1)*sizeof(char));
							for(csite=0; csite<lsites[cloc] ; csite++){
								valS[i][cdna][csite] = genotS2[i][cpop][cindiv][csite];		//adds current haplotype to valS[][][]	
							}
							valS[i][cdna][csite] = '\0';
							++ldna[i];	//increase by one the number of different haplotypes of a particular locus
						}
					}
				}
			}
			i++;
		}
	}
	//transforms dna sequences into an array of segregating sites	
	mainseq = (char **)malloc(nmark*sizeof(char *));
    for(i=0,cloc=0 ; cloc<nloc ; cloc++){
    	for(cmark=0; cmark<lmark[cloc]; cmark++){
			if(ltype[cloc]=='s'){					
		        mainseq[i] = (char *)malloc(lsites[cloc]*sizeof(char));
	            for(csite=0; csite<lsites[cloc]; csite++){
					mainseq[i][csite] = valS[i][0][csite];
		        }
				for(cdna=0;cdna<ldna[i];++cdna){
					for(csite=0;csite<lsites[cloc];++csite){
						if(valS[i][cdna][csite]!=mainseq[i][csite]&&(valS[i][cdna][csite]=='A'||
																	 valS[i][cdna][csite]=='a'||
																	 valS[i][cdna][csite]=='G'||
																	 valS[i][cdna][csite]=='g'||
																	 valS[i][cdna][csite]=='T'||
																	 valS[i][cdna][csite]=='t'||
																	 valS[i][cdna][csite]=='C'||
																	 valS[i][cdna][csite]=='c')){
							valS[i][cdna][csite] = '1';
						}
						else{
							valS[i][cdna][csite] = '0';
						}
					}
				} 
			}
			i++;
    	}
    }

	//writes the output file
	fprintf(outp,"#Population 1 - %s\n",pop1);
	fprintf(outp,"#Population 2 - %s\n",pop2);	

	for(cloc=0,i=0;cloc<nloc;++cloc){
	   	for(cmark=0; cmark<lmark[cloc]; cmark++){
			if(lmark[cloc]>1)
				fprintf(outp,"#Locus %d - %s_%d\n",i+1,locname[cloc],cmark+1);
			else
				fprintf(outp,"#Locus %d - %s\n",i+1,locname[cloc]);
			i++;
	   	}
	}
	fprintf(outp,"\n2\n"); 														//outp: npop
	fprintf(outp,"%d\n",nmark);	 												//outp: nloc		
	
	for(cloc=0 ; cloc<nloc ; cloc++){
	   	for(cmark=0; cmark<lmark[cloc]; cmark++)
			fprintf(outp,"%c ",ltype[cloc]);									//outp: ltype
	}
	fprintf(outp,"\n\n"); 														
	
	for(cloc=0,i=0 ; cloc<nloc ; cloc++){
	   	for(cmark=0; cmark<lmark[cloc]; cmark++){
			fprintf(outp,"%d\n",ldna[i]);										//outp: ndna
		
			if(ltype[cloc]=='m')
				isorti('a',ldna[i],valM[i],IDsort[i]);
			
			for(cpop=0;cpop<NPOP;++cpop){
				for(cdna=0;cdna<ldna[i];++cdna){
					if(ltype[cloc]=='m')
						fprintf(outp,"%4d ",freq[cpop][i][IDsort[i][cdna]]);	//(M)outp: freq[][]
					else
						fprintf(outp,"%4d ",freq[cpop][i][cdna]);				//(S)outp: freq[][]
				}
				fprintf(outp,"\n");
			}
			fprintf(outp,"\n");
			
			if(ltype[cloc]=='s'){
				for(cdna=0;cdna<ldna[i];++cdna)
					fprintf(outp,"%4d ",cdna);										//(S)outp: cdna
				fprintf(outp,"\n\n");
				fprintf(outp,"%d \n",lsites[cloc]);									//(S)outp: lsites[]
			}	
			
			for(cdna=0;cdna<ldna[i];++cdna){
				if(ltype[cloc]=='m')
					fprintf(outp,"%4d ",valM[i][IDsort[i][cdna]]);			//(M)outp: valM[]
				else
					fprintf(outp,"%d %s\n",cdna,valS[i][cdna]);					//(S)outp: cdna, valS[]
			}
			if(ltype[cloc]=='m')
				fprintf(outp,"\n\n");
			else
				fprintf(outp,"\n");
			i++;
		}
	}
	
	//free stuff
	for(cloc=0 ; cloc<nloc ; cloc++){
		if(ltype[cloc]=='s'){
			for(cpop=0 ; cpop<NPOP ; cpop++){
				for(cindiv=0 ; cindiv<lsamp[cloc][cpop] ; cindiv++){
						free(genotS[cloc][cpop][cindiv]);	
				}
					free(genotS[cloc][cpop]);
			}	
		}
		free(genotS[cloc]);
	}
	free(genotS);
	for(cloc=0 ; cloc<nloc ; cloc++){
		if(ltype[cloc]=='m'){
			for(cpop=0 ; cpop<NPOP ; cpop++){
				for(cindiv=0 ; cindiv<lsamp[cloc][cpop] ; cindiv++){
					free(genotM[cloc][cpop][cindiv]);
				}
					free(genotM[cloc][cpop]);
			}	
		}
		free(genotM[cloc]);
	}
	free(genotM);
	for(cloc=0, i=0; cloc<nloc ; cloc++){
		for(cmark=0;cmark<lmark[cloc];cmark++){
			if(ltype[cloc]=='s'){
				for(cpop=0 ; cpop<NPOP ; cpop++){
					for(cindiv=0 ; cindiv<lsamp[cloc][cpop] ; cindiv++){
							free(genotS2[i][cpop][cindiv]);	
					}
						free(genotS2[i][cpop]);
				}	
				free(genotS2[i]);
			}
			i++;
		}
	}
	free(genotS2);
	for(cloc=0, i=0; cloc<nloc ; cloc++){
		for(cmark=0;cmark<lmark[cloc];cmark++){
			if(ltype[cloc]=='m'){
				for(cpop=0 ; cpop<NPOP ; cpop++){
					free(genotM2[i][cpop]);
				}	
				free(genotM2[i]);
			}
			i++;
		}
	}
	free(genotM2);
	for(cpop=0; cpop<NPOP ; cpop++){
		for(cloc=0,i=0; cloc<nloc ; cloc++){
			for(cmark=0;cmark<lmark[cloc];cmark++){
				free(freq[cpop][i]);
				i++;
			}
		}
	    free(freq[cpop]);
	}
	free(freq);		
	for(cloc=0, i=0; cloc<nloc ; cloc++){
		for(cmark=0; cmark<lmark[cloc];cmark++){
			if(ltype[cloc]=='s'){
				for(cdna=0 ; cdna<ldna[i] ; cdna++)
					free(valS[i][cdna]);
				free(valS[i]);
				free(mainseq[i]);
			}
			else{
				free(valM[i]);
				free(IDsort[i]);
			}
			i++;
		}
	}
	free(mainseq);
	free(valS);
	free(valM);
	free(IDsort);
	for(cloc=0 ; cloc<nloc ; cloc++){
		free(lsamp[cloc]);
		free(locname[cloc]);
	}
	free(lsamp);
	free(locname);			
	free(lmark);
	free(ldna);		
	free(nsamp);
	free(lsites);
	free(ltype);
	free(outname);
		
	//close files
	fclose(inp);
	fclose(outp);

	return 0;

} //end of main
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API