https://github.com/jhbadger/scripts
Raw File
Tip revision: 6ad694835e70be1c38b4cc13806c5b4361f669fc authored by Jonathan Badger on 12 January 2024, 20:33:42 UTC
report misses in virtualPCR
Tip revision: 6ad6948
buildTaxItSeq_Info
#!/usr/bin/env ruby

require 'rubygems'
require 'optimist'
require 'fcsv'
require 'bio'

ARGV.push("--help") if ARGV.empty?
opts = Optimist::options do
  banner File.basename($0)
  opt :input, "Input fasta file", :required=>true, :type=>:string
  opt :taxids, "taxids.txt", :required=>true, :type=>:string
end

taxids = Hash.new
File.new(opts.taxids).each do |line|
	num, name = line.chomp.split(" # ")
	taxids[name] = num.to_i
end

print ["seqname","accession","tax_id","species_name","is_type"].to_csv

Bio::FlatFile.new(Bio::FastaFormat, File.new(opts.input)).each do |seq|
	sp = seq.entry_id.split("__").last.gsub("_", " ")
	while !taxids[sp] && sp != ""
		fields = sp.split(" ")
		fields.pop
		sp = fields.join(" ")
	end
	if taxids[sp]
		print [seq.entry_id, seq.entry_id, taxids[sp], sp, "FALSE"].to_csv
	else
		STDERR << "I can't find taxid for " << seq.entry_id << "\n"
	end
end
back to top