Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 033e4b3f79057958cd18c17a5701f8fd4f766ec7 authored by jferna10 on 15 July 2021, 06:14:41 UTC, committed by GitHub on 15 July 2021, 06:14:41 UTC
formatting
1 parent 95bd54a
  • Files
  • Changes
  • c6c518b
  • /
  • process_fastqs
  • /
  • countDMScodons.java
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:033e4b3f79057958cd18c17a5701f8fd4f766ec7
directory badge
swh:1:dir:8f88d86809da017893bf5112f721178853f151e0
content badge
swh:1:cnt:64327057063fea0e2b95a95daefebaab137d8c52

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
countDMScodons.java
import java.io.*;
import java.lang.*;
import java.util.*;


public class countDMScodons
{	
	public int[][] codoncount;
	public int[][] aacount;
	public String[] CODONS = new String[] {"AAA", "AAG", "AAC", "AAT",
											"AGA", "AGG", "AGC", "AGT", 
											"ACA", "ACG", "ACC","ACT", 
											"ATA", "ATG", "ATC","ATT", 
											"GAA", "GAG", "GAC","GAT", 
											"GGA", "GGG", "GGC","GGT", 
											"GCA", "GCG", "GCC","GCT",
											"GTA", "GTG", "GTC","GTT",
											"CAA", "CAG", "CAC","CAT",
											"CGA", "CGG", "CGC","CGT",
											"CCA", "CCG", "CCC","CCT",
											"CTA", "CTG", "CTC","CTT",
											"TAA", "TAG", "TAC","TAT",
											"TGA", "TGG", "TGC","TGT",
											"TCA", "TCG", "TCC","TCT",
											"TTA", "TTG", "TTC","TTT"};
											
	public String[] AACIDS = new String[] {"K", "N", "R", "S", "T", "I", "M", "E","D","G","A","V","Q","H","P","L","*","Y","W","C","F"};

	
	public static void main(String args[])
	{
		new countDMScodons(args[0], Integer.valueOf(args[1]), Integer.valueOf(args[2]));
	}
	
	public countDMScodons(String filename, int start, int stop)
	{
		 codoncount = new int[(stop-start)/3+1][64];
		 aacount = new int[(stop-start)/3+1][21];
		 
		 for(int i =0; i < codoncount.length;i++)
		 	 for(int j =0; j <codoncount[i].length; j++)
		 	 	codoncount[i][j] = 0;
		 	
		 for(int i =0; i < aacount.length;i++)
		 	 for(int j =0; j < aacount[i].length; j++)
		 	 	aacount[i][j] = 0;
		 
		 readSAM(filename, start, stop);
	}
	
	public void readSAM(String filename, int start, int stop)
	{
		try
		{
			BufferedReader br = new BufferedReader(new FileReader(new File(filename)));
			
			String temp = br.readLine(); 
			
			while (temp != null)
			{
				if (temp.charAt(0) == '@')
				{
					temp = br.readLine(); //move through header
				}
				else
				{
					StringTokenizer tk1 = new StringTokenizer(temp, "\t");
					String name1 = tk1.nextToken(); //read name
					
					temp = br.readLine(); //read next record (should be mate)
					
					StringTokenizer tk2 = new StringTokenizer(temp, "\t");
					String name2 = "";
					if (tk2.hasMoreTokens())
						 name2 = tk2.nextToken(); //read name
					
					if(name2.equals(name1))
					{						
						SAMString one = new SAMString(name1, tk1.nextToken(), tk1.nextToken(), Integer.valueOf(tk1.nextToken()), Integer.valueOf(tk1.nextToken()), tk1.nextToken(), tk1.nextToken(), Integer.valueOf(tk1.nextToken()), Integer.valueOf(tk1.nextToken()), tk1.nextToken(), tk1.nextToken()); //name, flag, genome_name, start, MQ, cigar, mate,  of mate1
						SAMString two = new SAMString(name2, tk2.nextToken(), tk2.nextToken(), Integer.valueOf(tk2.nextToken()), Integer.valueOf(tk2.nextToken()), tk2.nextToken(), tk2.nextToken(), Integer.valueOf(tk2.nextToken()), Integer.valueOf(tk2.nextToken()), tk2.nextToken(), tk2.nextToken()); //name, flag, genome_name, start, MQ, cigar, mate,  of mate1
						
						countCodons(start, stop, new SAMJoin(one, two));
						
						
						temp = br.readLine(); //read next record
					}
					else 
					{
						System.out.println(name1 + " has no mate or SAM needs to be sorted");
					}
				}
			}
			br.close();
		}
		catch (Exception e)
		{
			System.out.println("crap");
			e.printStackTrace();
		}
		writeCodons(filename, start, stop);
		writeAA(filename, start, stop);
	}
	
	public void countCodons(int start, int stop, SAMJoin cluster)
	{
		//System.out.print(cluster.getName()+"\t");
		
		SAMString mate1 = cluster.getMate1();
		SAMString mate2 = cluster.getMate2();
		String full = "";
		
		if (start > cluster.getStart() && stop < cluster.getEnd())
		{
			for (int i = 0; (start-cluster.getStart()+i+3) < (stop-cluster.getStart()); i = i+3)
			{
				try
				{
					if ((start-cluster.getStart()+i+3) < mate1.getaRead().length() && i < (mate2.getStart()-mate1.getStart())) //if mate 1 does not overlap mate 2
					{
						full = full + mate1.getaRead().substring(start-cluster.getStart()+i, start-cluster.getStart()+i+3);
					}
					else if((start-cluster.getStart()+i+3) < mate1.getaRead().length() && i +start > mate2.getStart() && (start+i-mate2.getStart()+3) < mate2.getaRead().length()) //if still within mate1 and mate2 has started and hasn't ended (i.e. overlapped)
					{							
						String s = "";
						for (int j = i; j < i+3; j++)
						{
							int offset = start+j-mate2.getStart();
							if (mate1.getQual().charAt(start-cluster.getStart()+j) > mate2.getQual().charAt(offset)) //(check who has better quality at this base, add to small substring
								s = s + mate1.getaRead().charAt(start-cluster.getStart()+j);
							else
								s = s + mate2.getaRead().charAt(offset);
						}
						
						full = full + s; //add highest quality substring to  full
					}
					else if(i +start > mate2.getStart() && start+i-mate2.getStart()+3 < mate2.getaRead().length()) //mate 2 non-overlap
					{
						int offset = start+i-mate2.getStart();
						full = full + mate2.getaRead().substring(offset,offset+3);
					}	
					
				}
				catch (Exception e)
				{
					System.out.println("Error parsing cigar");
					e.printStackTrace();;
					System.out.println(mate1.getStart() + " " + mate2.getStart()+" " +i+" " +mate1.getName()+" " +mate1.getaRead()); 
				}
				//full = full + " ";	
			}
		}
		else
		{
			;
		}
		if (!full.contains("~") && !full.contains("-"))
			for (int i = 0; (3*i+3) < full.length(); i++)
				countCodon(full.substring(3*i, 3*i+3), i);
	}
	
	private void writeCodons(String filename, int start, int stop)
	{
		int sum = 0; 
		try
		{
			StringTokenizer outfile = new StringTokenizer(filename, ".");
			String of = outfile.nextToken() + "_"+start+"_"+stop+"_codons.tab";
			FileWriter fw = new FileWriter(new File(of));
			
			for (int i=0; i <CODONS.length; i++)
				fw.write("\t"+CODONS[i]);
			fw.write("\tSUM\n");
			
			for(int i =0; i < codoncount.length;i++)
			{
				 fw.write(i+"\t");
				 for(int j =0; j <codoncount[i].length; j++)
				 {
					fw.write(codoncount[i][j]+"\t");
					sum = sum + codoncount[i][j];
				 }
				 fw.write(sum+"\n");
				// System.out.println(sum);
				 sum = 0;
			}
			
			fw.close();
		}
		catch (Exception e)
		{
			System.out.println("Boo");
		}
		//for (int i = 0; i < 21; i++)
			//System.out.println(AACIDS[i]+"\t"+(aacount[j][i]/sum));	
		
	}
	
	private void writeAA(String filename,int start, int stop)
	{
		int sum = 0;
		try
		{
			StringTokenizer outfile = new StringTokenizer(filename, ".");
			String of = outfile.nextToken() + "_"+start+"_"+stop+"_aa.tab";
			FileWriter fw = new FileWriter(new File(of));
			
			for (int i=0; i <AACIDS.length; i++)
				fw.write("\t"+AACIDS[i]);
			
			fw.write("\tSUM\n");
			
			for(int i =0; i < aacount.length;i++)
			{
				 fw.write(i+"\t");
				 for(int j =0; j <aacount[i].length; j++)
				 {
					fw.write(aacount[i][j]+"\t");
					sum = sum + aacount[i][j];
				 }
				fw.write(sum+"\n");
				sum = 0;
			}
			
			fw.close();
		}
		catch (Exception e)
		{
			System.out.println("Boo");
		}
		//for (int i = 0; i < 21; i++)
			//System.out.println(AACIDS[i]+"\t"+(aacount[j][i]/sum));	
		
	}
	
	private void countCodon(String codon, int i)
	{
		if (codon.equals("AAA")) //K
		{
			codoncount[i][0]++;
			aacount[i][0]++;
		}
		else if (codon.equals("AAG")) //K
		{
			codoncount[i][1]++;
			aacount[i][0]++;
		}
		else if (codon.equals("AAC")) //N
		{
			codoncount[i][2]++;
			aacount[i][1]++;
		}
		else if (codon.equals("AAT")) //N
		{
			codoncount[i][3]++;
			aacount[i][1]++;
		}
		else if (codon.equals("AGA")) //R
		{
			codoncount[i][4]++;
			aacount[i][2]++;
		}
		else if (codon.equals("AGG")) //R
		{
			codoncount[i][5]++;
			aacount[i][2]++;
		}
		else if (codon.equals("AGC")) //S
		{
			codoncount[i][6]++;
			aacount[i][3]++;
		}
		else if (codon.equals("AGT")) //S
		{
			codoncount[i][7]++;
			aacount[i][3]++;
		}
		else if (codon.equals("ACA"))//T
		{
			codoncount[i][8]++;
			aacount[i][4]++;
		}
		else if (codon.equals("ACG")) //T
		{
			codoncount[i][9]++;
			aacount[i][4]++;
		}
		else if (codon.equals("ACC")) //T
		{
			codoncount[i][10]++;
			aacount[i][4]++;
		}
		else if (codon.equals("ACT")) //T
		{
			codoncount[i][11]++;
			aacount[i][4]++;
		}
		else if (codon.equals("ATA")) //I
		{
			codoncount[i][12]++;
			aacount[i][5]++;
		}
		else if (codon.equals("ATG")) //M
		{
			codoncount[i][13]++;
			aacount[i][6]++;
		}
		else if (codon.equals("ATC")) //I
		{
			codoncount[i][14]++;
			aacount[i][5]++;
		}
		else if (codon.equals("ATT")) //I
		{
			codoncount[i][15]++;
			aacount[i][5]++;
		}
		else if (codon.equals("GAA")) //E
		{
			codoncount[i][16]++;
			aacount[i][7]++;
		}
		else if (codon.equals("GAG")) //E
		{
			codoncount[i][17]++;
			aacount[i][7]++;
		}
		else if (codon.equals("GAC"))//D
		{
			codoncount[i][18]++;
			aacount[i][8]++;
		}
		else if (codon.equals("GAT")) //D
		{
			codoncount[i][19]++;
			aacount[i][8]++;
		}
		else if (codon.equals("GGA")) //G
		{
			codoncount[i][20]++;
			aacount[i][9]++;
		}
		else if (codon.equals("GGG")) //G
		{
			codoncount[i][21]++;
			aacount[i][9]++;
		}
		else if (codon.equals("GGC")) //G
		{
			codoncount[i][22]++;
			aacount[i][9]++;
		}
		else if (codon.equals("GGT")) //G
		{
			codoncount[i][23]++;
			aacount[i][9]++;
		}
		else if (codon.equals("GCA")) //A
		{
			codoncount[i][24]++;
			aacount[i][10]++;
		}
		else if (codon.equals("GCG")) //A
		{
			codoncount[i][25]++;
			aacount[i][10]++;
		}
		else if (codon.equals("GCC")) //A
		{
			codoncount[i][26]++;
			aacount[i][10]++;
		}
		else if (codon.equals("GCT")) //A
		{
			codoncount[i][27]++;
			aacount[i][10]++;
		}
		else if (codon.equals("GTA")) //V
		{
			codoncount[i][28]++;
			aacount[i][11]++;
		}
		else if (codon.equals("GTG")) //V
		{
			codoncount[i][29]++;
			aacount[i][11]++;
		}
		else if (codon.equals("GTC")) //V
		{
			codoncount[i][30]++;
			aacount[i][11]++;
		}
		else if (codon.equals("GTT")) //V
		{
			codoncount[i][31]++;
			aacount[i][11]++;
		}
		else if (codon.equals("CAA")) //Q
		{
			codoncount[i][32]++;
			aacount[i][12]++;
		}
		else if (codon.equals("CAG")) //Q
		{
			codoncount[i][33]++;
			aacount[i][12]++;
		}
		else if (codon.equals("CAC")) //H
		{
			codoncount[i][34]++;
			aacount[i][13]++;
		}
		else if (codon.equals("CAT")) //H
		{
			codoncount[i][35]++;
			aacount[i][13]++;
		}
		else if (codon.equals("CGA")) //R
		{
			codoncount[i][36]++;
			aacount[i][2]++;
		}
		else if (codon.equals("CGG")) //R
		{
			codoncount[i][37]++;
			aacount[i][2]++;
		}
		else if (codon.equals("CGC")) //R
		{
			codoncount[i][38]++;
			aacount[i][2]++;
		}
		else if (codon.equals("CGT")) //R
		{
			codoncount[i][39]++;
			aacount[i][2]++;
		}else if (codon.equals("CCA")) //P
		{
			codoncount[i][40]++;
			aacount[i][14]++;
		}
		else if (codon.equals("CCG")) //P
		{
			codoncount[i][41]++;
			aacount[i][14]++;
		}
		else if (codon.equals("CCC")) //P
		{
			codoncount[i][42]++;
			aacount[i][14]++;
		}
		else if (codon.equals("CCT")) //P
		{
			codoncount[i][43]++;
			aacount[i][14]++;
		}
		else if (codon.equals("CTA")) //L
		{
			codoncount[i][44]++;
			aacount[i][15]++;
		}
		else if (codon.equals("CTG")) //L
		{
			codoncount[i][45]++;
			aacount[i][15]++;
		}
		else if (codon.equals("CTC")) //L
		{
			codoncount[i][46]++;
			aacount[i][15]++;
		}
		else if (codon.equals("CTT")) //L
		{
			codoncount[i][47]++;
			aacount[i][15]++;
		}
		else if (codon.equals("TAA")) //*
		{
			codoncount[i][48]++;
			aacount[i][16]++;
		}
		else if (codon.equals("TAG")) //*
		{
			codoncount[i][49]++;
			aacount[i][16]++;
		}
		else if (codon.equals("TAC")) //Y
		{
			codoncount[i][50]++;
			aacount[i][17]++;
		}
		else if (codon.equals("TAT")) //Y
		{
			codoncount[i][51]++;
			aacount[i][17]++;
		}
		else if (codon.equals("TGA")) //*
		{
			codoncount[i][52]++;
			aacount[i][16]++;
		}
		else if (codon.equals("TGG")) //W
		{
			codoncount[i][53]++;
			aacount[i][18]++;
		}
		else if (codon.equals("TGC")) //C
		{
			codoncount[i][54]++;
			aacount[i][19]++;
		}
		else if (codon.equals("TGT")) //C
		{
			codoncount[i][55]++;
			aacount[i][19]++;
		}
		else if (codon.equals("TCA")) //S
		{
			codoncount[i][56]++;
			aacount[i][3]++;
		}
		else if (codon.equals("TCG")) //S
		{
			codoncount[i][57]++;
			aacount[i][3]++;
		}
		else if (codon.equals("TCC")) //S
		{
			codoncount[i][58]++;
			aacount[i][3]++;
		}
		else if (codon.equals("TCT")) //S
		{
			codoncount[i][59]++;
			aacount[i][3]++;
		}
		else if (codon.equals("TTA")) //L
		{
			codoncount[i][60]++;
			aacount[i][15]++;
		}
		else if (codon.equals("TTG")) //L
		{
			codoncount[i][61]++;
			aacount[i][15]++;
		}
		else if (codon.equals("TTC")) //F
		{
			codoncount[i][62]++;
			aacount[i][20]++;
		}
		else if (codon.equals("TTT")) //F
		{
			codoncount[i][63]++;
			aacount[i][20]++;
		}	
	}

	private class SAMString
	{
		private String name; //read name (cluster ID)
		private String flag; //flag
		private String gname;
		private int start; //mapping start
		private int mq; //mapping quality
		private String cigar; //cigar mutations from ref
		private String mate; //name of mate
		private int mpos; //mpos mapping
		private int tlen; //frag length
		private String read; //actual read
		private String qual; //actual quality 
		private String aligned_read;
		
		public SAMString(String n, String f, String g, int s, int mpq, String cig, String mname, int mp, int t, String r, String q)
		{
			name = n;
			flag = f;
			gname = g;
			start = s;
			mq = mpq;
			cigar = cig;
			mate = mname;
			mpos = mp;
			tlen = t;
			read = r;
			qual = q;
			aligned_read = parseCIGAR();
		}
		
		public String getaRead()
		{
			return aligned_read;
		}
		
		public String parseCIGAR()
		{
			String m1 = read;
			
			StringTokenizer ct = new StringTokenizer(cigar, "DHIMNPSX=", true);
			
			int j = 0;
			
			while (ct.hasMoreTokens())
			{
				String tmp = ct.nextToken();
				
				if (!tmp.equals("*"))
				{
					int i = Integer.valueOf(tmp); //cigar int
					int k=j+i-1; //position in string, cigar is +1 instead of zero
					
					String delim = ct.nextToken();
		
					if (delim.equals("D"))
					{
						String s = "";
						for (int a=0; a<i; a++)
						{
							s = s + "-";
						}
						m1 = m1.substring(0,k) + s + m1.substring(k+1,m1.length());
					}
					else if (delim.equals("S"))
					{
						String s = "";
						for (int a=0; a<i; a++)
						{
							s = s + "N";
						}
						m1 = m1.substring(0,j) + s + m1.substring(k+1,m1.length());
					}
					else if (delim.equals("I"))
					{
						m1 = m1.substring(0,j-1) + "~" + m1.substring(k, m1.length());
					}
					
					j = k;
				}
			}			
			m1 = m1.replace("N","");
			return m1;
		}
		
		
		public String getName()
		{
			return name;	
		}
		
		public String getRead()
		{
			return read;
		}
		
		public String getQual()
		{
			return qual;
		}
		
		public int getTLength()
		{
			return tlen;
		}
		
		public int getStart()
		{
			return start;
		}
		
		public int getEnd()
		{
			return mpos;
		}
		
		private String reverseComplementRead()
		{
			String b = new StringBuilder(read).reverse().toString();
			b = b.replace("A", "W");
			b = b.replace("T", "X");
			b = b.replace("G", "Y");
			b = b.replace("C", "Z");
			b = b.replace("W", "T");
			b = b.replace("X", "A");
			b = b.replace("Y", "C");
			b = b.replace("Z", "G");
			
			return b;
		}
		
		public String getCig()
		{
			return cigar;
		}
	}
	
	private class SAMJoin
	{
		private SAMString mate1;
		private SAMString mate2;
		private int mstart;
		private int mend;
		private String name;
		
		public SAMJoin(SAMString m1, SAMString m2)
		{
			mate1 = m1;
			mate2 = m2;
			mstart = m1.getStart();
			mend = m1.getEnd();
			name = m1.getName();
		}
		
		public SAMString getMate1()
		{
			return mate1;
		}
		
		public SAMString getMate2()
		{
			return mate2;
		}
		
		public String getName()
		{
			return name;
		}
		
		public int getEnd()
		{
			return (mstart + mate1.getTLength());
		}
		
		public int getStart()
		{
			return mstart;
		}
	}
}
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API