https://github.com/dellch/forester
Raw File
Tip revision: 03eea349b23722447a20edcb903248a5a8b764b4 authored by Chris on 18 September 2017, 03:43:22 UTC
Update trees.rb
Tip revision: 03eea34
seedlings.rb
##########################################
##                                      ##
##               USAGE                  ##
##  type: "ruby seedlings.rb NAME_OF_FILE" ##
##  from command line in the directory  ##
##  of the file you're using            ##
##                                      ##
##########################################

original_file = ARGV[0]
filename = original_file
unless filename.nil?
  file = File.open(filename)
  str = file.read
  lines = str.split(/\n/)
  new_filename = "altered_#{filename}.txt"
  #get rid of count line which is first line
  num_chars = lines.delete_at(0).split(' ')[1].to_i
  begin_char_index = lines[0].index(/(?<=\s)[0-9?]/)
  last_char_index = begin_char_index + num_chars
  taxon_regex = /[A-Za-z_\(\)0-9]+/
  outgroup_index = lines.find_index{ |line| line[begin_char_index..last_char_index].match(/^0*$/) rescue false }
  outgroup = lines[outgroup_index].match(taxon_regex)[0]
  five_percent_of_characters = (1.0*num_chars/20/100).floor*100

  puts 'running...'
  puts 'Getting outgroup taxon...'
  puts "#{outgroup} is the outgroup taxon"
  puts "processing file with #{num_chars.to_s} characters (#{lines.length.to_s} taxa)..."
  
  (begin_char_index..last_char_index).each do |index|
    ones, zeros = [], []
    
    lines.each_with_index do |line, line_num|
      next if line_num == outgroup_index
      spot = line[index]
      next if spot =='?' || spot == ' '
      case line[index]
        when '1'
          ones.push(line.match(taxon_regex)[0])
        when '0'
          zeros.push(line.match(taxon_regex)[0])
      end
    end


    unless ones.empty? and zeros.empty?
      puts "MORE THAN ONE 0s for #{outgroup}" if zeros.length > 1
      puts "MORE THAN TWO 1s for #{outgroup}" if ones.length > 2
      File.open(new_filename, 'a'){ |f| f.puts("(#{outgroup},(#{zeros[0]},(#{ones[0]},#{ones[1]})));\r\n") }
    end

    percent_complete = (100.0*index/num_chars).floor
    print("#{percent_complete}% complete: #{index.to_s} of #{num_chars.to_s} characters processed#{' '*25}\r")

  end
  puts "\nProcessing complete\r\nNEW FILE CREATED:  #{new_filename}"
else
  puts "ALERT: You must declare a filename:  usage should be \"ruby seedlings.rb NAME_OF_FILE\""
end
back to top