Revision 6740d3ab73923164703a0ba2b11fc0294e8ea12e authored by John Chilton on 04 July 2020, 14:16:28 UTC, committed by John Chilton on 04 July 2020, 14:16:28 UTC
1 parent 78273d2
parse_builds.py
#!/usr/bin/env python
"""
Connects to the URL specified and outputs builds available at that
DSN in tabular format. UCSC Main gateway is used as default.
build description
"""
from __future__ import print_function
import sys
import xml.etree.ElementTree as ElementTree
import requests
def getbuilds(url):
try:
text = requests.get(url).text
except Exception:
print("#Unable to open " + url)
print("?\tunspecified (?)")
sys.exit(1)
try:
tree = ElementTree.fromstring(text)
except Exception:
print("#Invalid xml passed back from " + url)
print("?\tunspecified (?)")
sys.exit(1)
print("#Harvested from " + url)
print("?\tunspecified (?)")
for dsn in tree:
build = dsn.find("SOURCE").attrib['id']
description = dsn.find("DESCRIPTION").text.replace(" - Genome at UCSC", "").replace(" Genome at UCSC", "")
fields = description.split(" ")
temp = fields[0]
for i in range(len(fields) - 1):
if temp == fields[i + 1]:
fields.pop(i + 1)
else:
temp = fields[i + 1]
description = " ".join(fields)
yield [build, description]
if __name__ == "__main__":
if len(sys.argv) > 1:
URL = sys.argv[1]
else:
URL = "http://genome.cse.ucsc.edu/cgi-bin/das/dsn"
for build in getbuilds(URL):
print(build[0] + "\t" + build[1] + " (" + build[0] + ")")
Computing file changes ...