https://github.com/flora-phenotype-ontology/flopoontology
Raw File
Tip revision: cfb699302816405d6a7775ff377aaf5f05b510aa authored by Robert Hoehndorf on 06 September 2019, 01:20:44 UTC
FLOPO BioHackathon 2019 release
Tip revision: cfb6993
MakeNexusFile.groovy
// creates nexus file from TSV

def taxa = new TreeSet()
new File(args[0]).splitEachLine("\t") { line ->
  t = line[0]
  taxa.add(t)
}

def characters = new TreeSet()
def values = [:].withDefault { [:] } // taxon -> character -> value or taxon -> character -> null if no value
def cvalues = new TreeSet()
new File(args[0]).splitEachLine("\t") { line ->
  def taxon = line[0]
  def e1 = line[2]
  def e2 = line[3]
  def q = line[4]
  def trait = line[5]
  def label = line[6]
  if (q == trait) {
    q = label
  }
  def character = "$e1/$e2/$trait"
  characters.add(character)
  values[taxon][character] = q
  def cvalue = "$character $q"
  cvalues.add(cvalue)
}


def character2value = [:].withDefault { new TreeSet() }
values.each { taxon, c2v ->
  c2v.each { c, v ->
    if (v!=null) {
      character2value[c].add(v)
    }
  }
}
def character2value2symbol = [:].withDefault { [:] }
character2value.each { c, v -> 
  def count = 'A'
  v.each { val ->
    character2value2symbol[c][val] = count
    if (count!='Z') {
      count++
    } else {
      count = 'a'
    }
  }
}

def tempSet = new TreeSet()
values.each { taxon, c2v ->
  if (c2v.size()<10) { // at least 10 characters for this taxon, otherwise omit
    tempSet.add(taxon)
  }
}
tempSet.each { tax ->
  taxa.remove(tax)
  values.remove(tax)
}

println "#NEXUS"
println "BEGIN TAXA;\n\tTITLE taxa_block;\n\tDIMENSIONS  NTAX="+taxa.size()+";"
println "\tTAXLABELS"
print "\t\t"
taxa.each {
  print "\'$it\' "
}
println "\t;\nEND;\n\n"

println "BEGIN CHARACTERS;"
println "\tTITLE  Untitled_Character_Matrix;"
println "\tDIMENSIONS  NCHAR="+characters.size()+";"
println "\tFORMAT DATATYPE=STANDARD MISSING=? RESPECTCASE;"
println "\tMATRIX"
values.each { taxon, c2v ->
  print "\t\'$taxon\'    "
  characters.each { c ->
    if (c2v[c]!=null) {
      print character2value2symbol[c][c2v[c]]
    } else {
      print "?"
    }
  }
  println ""
}
println ";"
println "END;"
back to top