Revision 3fe76ec539cccabcea18e0f150b9a9f7931320be authored by RichardWallis on 21 July 2020, 09:03:17 UTC, committed by RichardWallis on 21 July 2020, 09:03:17 UTC
1 parent 62316af
Raw File
rdfa2nq
#!/usr/bin/env python
#
# "ardfa2nq somefile someURI" - reads a UTF-8 RDFa file and emits n-quads using the specified URI.
# 
# Example usage:
#
# ./scripts/rdfa2nq data/releases/2.1/schema.rdfa http://schema.org/version/2.1/ 
# 
# Example output:
#
# <http://schema.org/deathPlace> <http://www.w3.org/2000/01/rdf-schema#label> "deathPlace" <http://schema.org/version/2.1/> .
# <http://schema.org/BreadcrumbList> <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/ItemList> <http://schema.org/version/2.1/> .

# see also http://rdflib.readthedocs.org/en/latest/apidocs/rdflib.html#rdflib.graph.ConjunctiveGraph
# http://rdflib.readthedocs.org/en/latest/apidocs/rdflib.html#rdflib.graph.Dataset

import sys
from rdflib import Graph, ConjunctiveGraph, Dataset, URIRef


if __name__ == '__main__':

#        g = Graph()
        g = Dataset() # a kind of ConjunctiveGraph 
        p = Graph() #  pgraph: target for error and warning triples; 
		# processor graph, in RDFa spec. parlance. If set to None, these triples are ignored

        fn = str(sys.argv[1])
        graph_uri = str(sys.argv[2])

        sdo_graph = g.graph(URIRef( graph_uri ))

#        print "# URL: ", fn
        sdo_graph.parse( fn, format='rdfa', pgraph=p)#, charset="utf8")
#        print "# errors: "
#        print p.serialize(format="nt")

        print "# data: "
        print g.serialize(format="nquads", encoding="utf-8")



# Use with extension
# ./scripts/rdfa2nq data/ext/bib/bsdo-1.0.rdfa http://bib.schema.org/#v2.1 > data/releases/2.1/ext-bib.nq
# ./scripts/rdfa2nq data/ext/bib/comics.rdfa http://bib.schema.org/#v2.1 >> data/releases/2.1/ext-bib.nq
# ./scripts/rdfa2nq data/ext/auto/auto.rdfa http://auto.schema.org/#v2.1 > data/releases/2.1/ext-auto.nq

# data/releases/2.1[* master0|0] $ cat ext-* core.nq > all-layers.nq
back to top