https://github.com/jhbadger/scripts
Raw File
Tip revision: 6ad694835e70be1c38b4cc13806c5b4361f669fc authored by Jonathan Badger on 12 January 2024, 20:33:42 UTC
report misses in virtualPCR
Tip revision: 6ad6948
readsNotInPosmap
#!/usr/bin/env ruby

require 'optparse'
require 'ostruct'
require 'ZFile'
require 'rubygems'
require 'bio'

opt = OpenStruct.new
o = OptionParser.new
o.banner << " posmap alias fna [...fna...]"
begin
  o.parse!
rescue
  STDERR << $!.message << "\n"
  STDERR << o
  exit(1)
end
if (ARGV.size < 3)
  STDERR << o
  exit(1)
end

posmap, ali  = ARGV.shift, ARGV.shift

seen = Hash.new

STDERR << "Loading posmap...\n"

count = Hash.new
ZFile.new(posmap).each do |line|
  contig, read = line.chomp.split("\t")
  seen[read] = true
  count[contig] = 0 if (!count[contig])
  count[contig] += 1
end

singletons = count.keys.select {|x| count[x] == 0}
STDERR.printf("There are %d singletons in the assembly\n", singletons.size)

alin = Hash.new
STDERR << "Loading alias...\n"
ZFile.new(ali).each do |line|
  read, newname = line.chomp.split("\t")
  alin[newname] = read
end

ARGV.each do |file|
  Bio::FlatFile.new(Bio::FastaFormat, ZFile.new(file)).each do |seq|
    if (!seen[alin[seq.entry_id]])
      seq.definition = seq.entry_id + " " + alin[seq.entry_id]
      print seq 
    end
  end
end
back to top