https://github.com/dellch/forester
Tip revision: 03eea349b23722447a20edcb903248a5a8b764b4 authored by Chris on 18 September 2017, 03:43:22 UTC
Update trees.rb
Update trees.rb
Tip revision: 03eea34
trees.rb
################################################
## ##
##. Represents conventional binary matrix as ##
## an array of the Newick trees. ##
##. ##
## USAGE ##
## type: "ruby trees.rb NAME_OF_FILE" ##
## from command line in the directory ##
## of the file you're using ##
## ##
################################################
filename = ARGV[0]
unless filename.nil?
file = nil
File.open(filename){|f| file = f.read}
#create blank files
file_no_poly = "NO_POLY_"+filename+".txt"
file_with_poly = "WITH_POLY_"+filename+".txt"
file_additional = "ADDITIONAL_FILE_"+filename+".txt"
array = file.split(/\n/)
#get number of characters from first line of array
num_chars = array.delete_at(0).split(' ')[1].to_i
begin_char_index = array[0].index(/(?<=\s)[0-9?]/)
last_char_index = begin_char_index + num_chars
taxon_regex = /[A-Za-z_\(\)0-9]+/
out = array.pop.match(taxon_regex)[0]
puts 'running...'
puts "#{num_chars.to_s} characters to process"
#from first character, to last character
(begin_char_index..last_char_index).each do |index|
ones = []
zeros = []
#use same spot in each line of array
array.each do |line|
spot = line[index]
next if spot == '?' || spot == ' ' #ignore ? spots
case spot
when '1'
ones.push(line.match(taxon_regex)[0])
when '0'
zeros.push(line.match(taxon_regex)[0])
end
end
#append to file after you've done each line
#this is how things should look:
#################################################################################################
# Matrix
#
# A 010011
# B 010011
# C 110010
# D 100110
# Out 000000
#
# I. Expected tree outputs:
#
# a. No POLY (exclude polytomies in output):
#
# (Out,(A,B,(C,D))); - first character,
# (Out,(D,(A,B,C))); - second character,
# (Out,(A,B,C,D)); - fifth character,
# (Out,(C,D,(A,B))); - character six,
#
# Characters three and four are polytomies = must be skipped!
#
# b. With POLY (include polytomies in output):
#
# (Out,(A,B,(C,D))); - first character,
# (Out,(D,(A,B,C))); - second character,
# (Out,A,B,C,D); - third character,
# (Out,A,B,C,D); - fourth character,
# (Out,(A,B,C,D)); - fifth character,
# (Out,(C,D,(A,B))); - character six,
#
# So, all characters are represented in Newick form - neither are
# skipped = polytomies are included.
#
# c. ADDITIONAL:
#
# (Out,A,B,(C,D)); - first character,
# (Out,D,(A,B,C)); - second character,
# (Out,(A,B,C,D)); - fifth character,
# (Out,C,D,(A,B)); - character six,
#
# Characters three and four are polytomies = must be skipped!
################################################################################
unless ones.empty? and zeros.empty?
if ones.length <= 1
out_with_poly = "(#{([out]+ones+zeros).join(',')});\r\n"
File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
else
if zeros.length == 0
out_with_poly = out_no_poly = out_additional = "(#{out},(#{ones.join(',')}));\r\n"
File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
File.open(file_no_poly, 'a'){|f| f.puts(out_no_poly)}
File.open(file_additional, 'a'){|f| f.puts(out_additional) }
else
out_no_poly = out_with_poly = "(#{out},(#{ zeros.join(',') },(#{ ones.join(',') })));\r\n"
File.open(file_no_poly, 'a'){|f| f.puts(out_no_poly)}
File.open(file_with_poly, 'a'){|f| f.puts(out_with_poly)}
out_additional = "(#{([out]+zeros).join(',')},(#{ones.join(',')}));\r\n"
File.open(file_additional, 'a'){|f| f.puts(out_additional) }
end
end
end
percent_complete = (100.0*index/num_chars).floor
print "#{percent_complete}% complete - lines processed: #{index.to_s} out of #{num_chars.to_s}\r"
end
puts "\nProcessing complete."
puts "NEW FILE CREATED: #{file_no_poly}"
puts "NEW FILE CREATED: #{file_with_poly}"
puts "NEW FILE CREATED: #{file_additional}"
else
puts "ALERT: You must declare a filename: usage should be \"ruby trees.rb NAME_OF_FILE\""
end