https://github.com/schemaorg/schemaorg
Tip revision: 3a6f7bf7fba215323eaacc965bd4338d87d07c68 authored by Dataliberate on 11 February 2019, 10:34:06 UTC
Tweak
Tweak
Tip revision: 3a6f7bf
sdordf2csv.py
import csv
import rdflib
from rdflib.namespace import RDFS, RDF, OWL
from rdflib.term import URIRef
import threading
import api
from apimarkdown import Markdown
from apirdflib import RDFLIBLOCK
import logging
logging.basicConfig(level=logging.INFO) # dev_appserver.py --log_level debug .
log = logging.getLogger(__name__)
class sdordf2csv():
def __init__(self, queryGraph=None, fullGraph=None, markdownComments=True,excludeAttic=False):
self.setqueryGraph(queryGraph)
self.setfullGraph(fullGraph)
self.setexcludeAttic(excludeAttic)
self.setmarkdownComments(markdownComments)
def setqueryGraph(self,graph=None):
self.queryGraph = graph
def setfullGraph(self,graph=None):
self.fullGraph = graph
def setexcludeAttic(self,state):
self.excludeAttic = state
self.attic = api.SdoConfig.atticUri()
if not self.attic:
self.excludeAttic = False
def setmarkdownComments(self,state):
self.markdown = state
def doQuery(self,graph=None,query=None):
res = None
try:
RDFLIBLOCK.acquire()
res = list(graph.query(query))
finally:
RDFLIBLOCK.release()
return res
def outputCSVtypes(self,file):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf <%s>}." % self.attic
query= ('''select ?term where {
?term a ?type.
FILTER NOT EXISTS {?term a rdf:Property}.
FILTER (strstarts(str(?term),'%s')).
%s
}
ORDER BY ?term
''') % (api.SdoConfig.vocabUri(),atticfilter)
try:
RDFLIBLOCK.acquire()
types = list(self.queryGraph.query(query))
finally:
RDFLIBLOCK.release()
#log.info( "Types: %s" % len(types))
self.type2CSV(header=True,out=file)
for t in types:
self.type2CSV(term=t.term,header=False,out=file,graph=self.queryGraph)
def outputCSVproperties(self,file):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?term schema:isPartOf <%s>}." % self.attic
query= ('''select ?term where {
?term a rdf:Property.
FILTER EXISTS {?term rdfs:label ?l}.
FILTER (strstarts(str(?term),'%s')).
%s
}
ORDER BY ?term''') % (api.SdoConfig.vocabUri(),atticfilter)
props = list(self.queryGraph.query(query))
self.prop2CSV(header=True,out=file)
for t in props:
self.prop2CSV(term=t.term,header=False,out=file,graph=self.queryGraph)
def prop2CSV(self,term=None,header=True,out=None,graph=None):
cols = ["id","label","comment","subPropertyOf","equivalentProperty","subproperties","domainIncludes","rangeIncludes","inverseOf","supersedes","supersededBy","isPartOf"]
if not out:
return
writer = csv.writer(out,quoting=csv.QUOTE_ALL,lineterminator='\n')
if header:
writer.writerow(cols)
return
if not graph:
graph = self.queryGraph
if term == None or graph == None:
return
row = [str(term)]
row.append(self.graphValueToCSV(subject=term,predicate=RDFS.label,graph=graph))
row.append(self.getCSVComment(term,graph=self.fullGraph))
row.append(self.getCSVSuperProperties(term,graph=self.fullGraph))
row.append(self.graphValueToCSV(subject=term,predicate=OWL.equivalentProperty,graph=graph))
row.append(self.getCSVSubProperties(term,graph=self.fullGraph))
row.append(self.getCSVDomainIncludes(term,graph=self.fullGraph))
row.append(self.getCSVRangeIncludes(term,graph=self.fullGraph))
row.append(self.graphValueToCSV(subject=term,predicate=URIRef("http://schema.org/inverseOf"),graph=graph))
row.append(self.getCSVsuperseds(term,graph=self.fullGraph))
row.append(self.getCSVSupersededBy(term,graph=self.fullGraph))
row=[s.encode('utf-8') for s in row]
writer.writerow(row)
#print term
def type2CSV(self,term=None,header=True,out=None,graph=None):
cols = ["id","label","comment","subTypeOf","enumerationtype","equivalentClass","properties","subTypes","supersedes","supersededBy","isPartOf"]
if not out:
return
writer = csv.writer(out,quoting=csv.QUOTE_ALL,lineterminator='\n')
if header:
writer.writerow(cols)
return
if not graph:
graph = self.queryGraph
if term == None or graph == None:
return
if not isinstance(term, URIRef):
term = URIRef(term)
enumType = self.graphValueToCSV(subject=term,predicate=RDF.type,graph=graph)
if enumType.endswith("#Class"):
enumType = ""
row = [str(term)]
row.append(self.graphValueToCSV(subject=term,predicate=RDFS.label,graph=graph))
row.append(self.getCSVComment(term,graph=self.fullGraph))
row.append(self.getCSVSupertypes(term,graph=self.fullGraph))
row.append(enumType)
row.append(self.graphValueToCSV(subject=term,predicate=OWL.equivalentClass,graph=graph))
row.append(self.getCSVTypeProperties(term,graph=self.fullGraph))
row.append(self.getCSVSubtypes(term,graph=self.fullGraph))
row.append(self.getCSVsuperseds(term,graph=self.fullGraph))
row.append(self.getCSVSupersededBy(term,graph=self.fullGraph))
row.append(self.graphValueToCSV(subject=term,predicate=URIRef("http://schema.org/isPartOf"),graph=graph))
row=[s.encode('utf-8') for s in row]
writer.writerow(row)
def graphValueToCSV(self, subject=None, predicate= None, object= None, graph=None):
ret = ""
try:
RDFLIBLOCK.acquire()
ret = str(graph.value(subject=subject,predicate=predicate,object=object))
finally:
RDFLIBLOCK.release()
if ret == None or ret == "None":
ret = ""
return ret
def getCSVSupertypes(self,term=None,graph=None):
query='''select ?sup where{
<%s> rdfs:subClassOf ?sup.
FILTER (strstarts(str(?sup),'%s')). }
ORDER BY ?sup''' % (term,api.SdoConfig.vocabUri())
res = self.doQuery(graph,query)
ret = ', '.join([x.sup for x in res])
return ret
def getCSVTypeProperties(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?prop schema:isPartOf <%s>.}" % self.attic
query='''select DISTINCT ?prop where{
?term (^rdfs:subClassOf*) <%s>.
?prop <http://schema.org/domainIncludes> ?term.
%s
}
ORDER BY ?prop''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.prop for x in res])
return ret
def getCSVSubtypes(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic
query='''select ?sub where{
?sub rdfs:subClassOf <%s>.
%s
}
ORDER BY ?sub''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.sub for x in res])
#print "SUBTYPES of %s: '%s'" % (term,ret)
return ret
def getCSVSupersededBy(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic
query='''select ?sup where{
<%s> schema:supersededBy ?sup.
%s
}
ORDER BY ?sup''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.sup for x in res])
#print "%s supercededBy: '%s'" % (term,ret)
return ret
def getCSVsuperseds(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?sup schema:isPartOf <%s>.}" % self.attic
query='''select ?sup where{
?sup schema:supersededBy <%s>.
%s
}
ORDER BY ?sup''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.sup for x in res])
#print "%s superseds: '%s'" % (term,ret)
return ret
def getCSVSuperProperties(self,term=None,graph=None):
query='''select ?sup where{
<%s> rdfs:subPropertyOf ?sup.
FILTER (strstarts(str(?sup),'%s')) }
ORDER BY ?sup''' % (term,api.SdoConfig.vocabUri())
res = self.doQuery(graph,query)
ret = ', '.join([x.sup for x in res])
#print "%s subtypeof: '%s'" % (term,ret)
return ret
def getCSVSubProperties(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?sub schema:isPartOf <%s>.}" % self.attic
query='''select ?sub where{
?sub rdfs:subPropertyOf <%s>.
%s
}
ORDER BY ?sub''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.sub for x in res])
#print "SUBTYPES of %s: '%s'" % (term,ret)
return ret
def getCSVDomainIncludes(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf <%s>.}" % self.attic
query='''select ?type where{
<%s> <http://schema.org/domainIncludes> ?type.
%s
}
ORDER BY ?type''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.type for x in res])
#print "SUBTYPES of %s: '%s'" % (term,ret)
return ret
def getCSVRangeIncludes(self,term=None,graph=None):
atticfilter = ""
if self.excludeAttic:
atticfilter = "FILTER NOT EXISTS {?type schema:isPartOf <%s>.}" % self.attic
query='''select ?type where{
<%s> <http://schema.org/rangeIncludes> ?type.
%s
}
ORDER BY ?type''' % (term,atticfilter)
res = self.doQuery(graph,query)
ret = ', '.join([x.type for x in res])
#print "SUBTYPES of %s: '%s'" % (term,ret)
return ret
def getCSVComment(self,term=None,graph=None):
query='''select ?com where{
<%s> rdfs:comment ?com.
}''' % term
res = self.doQuery(graph,query)
ret = ', '.join([x.com for x in res])
#print "SUBTYPES of %s: '%s'" % (term,ret)
if self.markdown:
Markdown.setPre(api.SdoConfig.vocabUri())
ret = Markdown.parse(ret)
Markdown.setPre()
return ret