https://github.com/csw/bioruby-maf
Revision f443fdc142c5992c47b6639f3a613d2496350f7e authored by Clayton Wheeler on 04 August 2012, 22:34:14 UTC, committed by Clayton Wheeler on 04 August 2012, 22:34:14 UTC
1 parent 4fc31e2
Raw File
Tip revision: f443fdc142c5992c47b6639f3a613d2496350f7e authored by Clayton Wheeler on 04 August 2012, 22:34:14 UTC
Use GZipReader instead of a gzip pipe. Performs better.
Tip revision: f443fdc
maf_bgzip
#!/usr/bin/env ruby

require 'optparse'
require 'ostruct'

require 'bio-maf'
require 'bio-bgzf'

$options = OpenStruct.new
$options.dir = '.'
$options.ref_only = true

op = OptionParser.new do |opts|
  opts.banner = "Usage: maf_bgzip [options] [<maf> ...]"
  opts.separator ""
  opts.separator "Options:"
  opts.on("-d", "--dir DIR",
          "Directory to write compressed MAF to",
          "(default is current directory)") do |dir|
    $options.dir = dir
  end
  opts.on("-i", "--index", "Index MAF files after writing") do
    $options.index = true
  end
  opts.on("-a", "--all",
          "Index all sequences, not just reference seq",
          "(has no effect without --index)") do
    $options.ref_only = false
  end
  Bio::MAF::handle_logging_options(opts)
end

op.parse!(ARGV)
Bio::Log::CLI.configure('bio-maf')

until ARGV.empty?
  maf_path = ARGV.shift
  maf_base = File.basename(maf_path)
  base = maf_base.gsub(/\.maf.*/, '')
  bgz_path = "#{$options.dir}/#{base}.maf.bgz"
  p = Bio::MAF::Parser.new(maf_path,
                           :parse_extended => true,
                           :parse_empty => true)
  File.open(bgz_path, 'w') do |out_f|
    Bio::BGZF::Writer.new(out_f) do |bgz_w|
      maf_w = Bio::MAF::Writer.new(bgz_w)
      maf_w.write_header(p.header)
      p.each_block do |block|
        maf_w.write_block(block)
      end
    end
  end
  p.close
  if $options.index
    p2 = Bio::MAF::Parser.new(bgz_path)
    idx_path = "#{$options.dir}/#{base}.kct"
    Bio::MAF::KyotoIndex.build(p2, idx_path, $options.ref_only)
  end
end
back to top