https://github.com/dellch/forester
Raw File
Tip revision: 03eea349b23722447a20edcb903248a5a8b764b4 authored by Chris on 18 September 2017, 03:43:22 UTC
Update trees.rb
Tip revision: 03eea34
trees.rb
################################################
##                                            ##
##. Represents conventional binary matrix as  ##
##  an array of the Newick trees.             ##
##.                                           ##
##               USAGE                        ##
##  type: "ruby trees.rb NAME_OF_FILE"        ##
##  from command line in the directory        ##
##  of the file you're using                  ##
##                                            ##
################################################

filename = ARGV[0]

unless filename.nil?
  file = nil
  File.open(filename){|f| file = f.read}

  #create blank files
  file_no_poly = "NO_POLY_"+filename+".txt"
  file_with_poly = "WITH_POLY_"+filename+".txt"
  file_additional = "ADDITIONAL_FILE_"+filename+".txt"

  array = file.split(/\n/)
  #get number of characters from first line of array
  num_chars = array.delete_at(0).split(' ')[1].to_i
  begin_char_index = array[0].index(/(?<=\s)[0-9?]/)
  last_char_index = begin_char_index + num_chars
  taxon_regex = /[A-Za-z_\(\)0-9]+/
  out = array.pop.match(taxon_regex)[0]
  puts 'running...'
  puts "#{num_chars.to_s} characters to process"
  #from first character, to last character
  (begin_char_index..last_char_index).each do |index|
    ones = []
    zeros = []

    #use same spot in each line of array
    array.each do |line|
      spot = line[index]
      next if spot == '?' || spot == ' '  #ignore ? spots
      case spot
        when '1'
          ones.push(line.match(taxon_regex)[0])
        when '0'
          zeros.push(line.match(taxon_regex)[0])
      end
    end

    #append to file after you've done each line
    #this is how things should look:
    #################################################################################################
    # Matrix
    #
    # A      010011
    # B      010011
    # C      110010
    # D      100110
    # Out    000000
    #
    # I. Expected tree outputs:
    #
    # a. No POLY (exclude polytomies in output):
    #
    #     (Out,(A,B,(C,D))); - first character,
    #     (Out,(D,(A,B,C))); - second character,
    #     (Out,(A,B,C,D)); - fifth character,
    #     (Out,(C,D,(A,B))); - character six,
    #
    #     Characters three and four are polytomies = must be skipped!
    #
    # b. With POLY (include polytomies in output):
    #
    #     (Out,(A,B,(C,D))); - first character,
    #     (Out,(D,(A,B,C))); - second character,
    #     (Out,A,B,C,D); - third character,
    #     (Out,A,B,C,D); - fourth character,
    #     (Out,(A,B,C,D)); - fifth character,
    #     (Out,(C,D,(A,B))); - character six,​
    #
    #     So, all characters are represented in Newick form - neither are
    #     skipped = polytomies are included.
    #
    #     c. ADDITIONAL:
    #
    #     (Out,A,B,(C,D)); - first character,
    #     (Out,D,(A,B,C)); - second character,
    #     (Out,(A,B,C,D)); - fifth character,
    #     (Out,C,D,(A,B)); - character six,
    #
    #     Characters three and four are polytomies = must be skipped!
    ################################################################################
    unless ones.empty? and zeros.empty?
      if ones.length <= 1
        out_with_poly = "(#{([out]+ones+zeros).join(',')});\r\n"
        File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
      else
        if zeros.length == 0
          out_with_poly = out_no_poly = out_additional = "(#{out},(#{ones.join(',')}));\r\n"
          File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
          File.open(file_no_poly,   'a'){|f| f.puts(out_no_poly)}
          File.open(file_additional, 'a'){|f| f.puts(out_additional) }
        else
          out_no_poly = out_with_poly = "(#{out},(#{ zeros.join(',') },(#{ ones.join(',') })));\r\n"
          File.open(file_no_poly,   'a'){|f| f.puts(out_no_poly)}
          File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
          out_additional = "(#{([out]+zeros).join(',')},(#{ones.join(',')}));\r\n"
          File.open(file_additional, 'a'){|f| f.puts(out_additional) }
        end
      end
    end

    percent_complete = (100.0*index/num_chars).floor
    print "#{percent_complete}% complete - lines processed: #{index.to_s} out of #{num_chars.to_s}\r"
  end
  puts "\nProcessing complete."
  puts "NEW FILE CREATED:  #{file_no_poly}"
  puts "NEW FILE CREATED:  #{file_with_poly}"
  puts "NEW FILE CREATED:  #{file_additional}"
else
  puts "ALERT: You must declare a filename:  usage should be \"ruby trees.rb NAME_OF_FILE\""
end
back to top