Revision a67b54b993b81fcb658d0352b710e27125e302a6 authored by lucy-kind on 20 August 2020, 19:49:41 UTC, committed by GitHub on 20 August 2020, 19:49:41 UTC
1 parent d3ab640
exercisesite.py
#!/usr/bin/env python2.7
import unittest
import os
from os import path, getenv
from os.path import expanduser
import logging # https://docs.python.org/2/library/logging.html#logging-levels
import glob
import argparse
import sys
import csv
sys.path.append( os.getcwd() )
sys.path.insert( 1, 'lib' ) #Pickup libs, rdflib etc., from shipped lib directory
sys.path.insert( 1, 'sdopythonapp' ) #Pickup sdopythonapp functionality
sys.path.insert( 1, 'sdopythonapp/lib' ) #Pickup sdopythonapp libs, rdflib etc., from shipped lib directory
sys.path.insert( 1, 'sdopythonapp/site' ) #Pickup sdopythonapp from shipped site
# Ensure that the google.appengine.* packages are available
# in tests as well as all bundled third-party packages.
sdk_path = getenv('APP_ENGINE', expanduser("~") + '/google-cloud-sdk/platform/google_appengine/')
sys.path.insert(0, sdk_path) # add AppEngine SDK to path
import dev_appserver
dev_appserver.fix_sys_path()
from testharness import *
#Setup testharness state BEFORE importing sdo libraries
setInTestHarness(True)
from api import *
import rdflib
from rdflib.term import URIRef, Literal
from rdflib.parser import Parser
from rdflib.serializer import Serializer
from rdflib.plugins.sparql import prepareQuery, processUpdate
from rdflib.compare import graph_diff
from rdflib.namespace import RDFS, RDF
import threading
from api import inLayer, read_file, full_path, read_schemas, read_extensions, read_examples, namespaces, DataCache, getMasterStore
from apirdflib import getNss, getRevNss
from apimarkdown import Markdown
os.environ["WARMUPSTATE"] = "off"
from sdoapp import *
from sdordf2csv import sdordf2csv
rdflib.plugin.register("json-ld", Serializer, "rdflib_jsonld.serializer", "JsonLDSerializer")
# Ensure that the google.appengine.* packages are available
# in tests as well as all bundled third-party packages.
import dev_appserver
dev_appserver.fix_sys_path()
parser = argparse.ArgumentParser()
parser.add_argument("-e","--exclude", default= [[]],action='append',nargs='*', help="Exclude graph(s) [core|extensions|all|bib|auto|meta|{etc} (Repeatable) - 'attic' always excluded unless explictly included")
parser.add_argument("-p","--pausetime", default=0, help="Seconds between requests")
parser.add_argument("-i","--include", default= [[]],action='append',nargs='*', help="Include graph(s) [core|extensions|all|attic|bib|auto|meta|{etc} (Repeatable) overrides exclude - 'attic' always excluded unless explictly individually included")
parser.add_argument("-s","--site", required=True, help="site")
args = parser.parse_args()
pause = 0
if args.pausetime:
pause = args.pausetime
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
#Setup testharness state BEFORE importing sdoapp
import sdoapp
from sdoapp import ENABLED_EXTENSIONS
STATICPAGES = ["","docs/schemas.html","docs/full.html"]
class Exercise():
def __init__(self):
self.setSkips()
self.getGraphs()
self.loadGraphs()
self.exercise(self.outGraph)
self.exerciseStatics("")
def setSkips(self):
self.skiplist = ['']
for e in args.exclude:
for s in e:
if s == "core":
self.skiplist.append("http://schema.org/")
elif s == "extensions":
for i in sdoapp.ENABLED_EXTENSIONS:
self.skiplist.append(getNss(i))
elif s == "all":
self.skiplist.append("http://schema.org/")
for i in sdoapp.ENABLED_EXTENSIONS:
self.skiplist.append(getNss(i))
else:
self.skiplist.append(getNss(s))
if not getNss('attic') in self.skiplist: #Always skip attic by defualt
self.skiplist.append(getNss('attic'))
for e in args.include:
for s in e:
if s == "core" and "http://schema.org/" in self.skiplist:
self.skiplist.remove("http://schema.org/")
elif s == "extensions":
for i in sdoapp.ENABLED_EXTENSIONS:
if getNss(i) in self.skiplist:
self.skiplist.remove(getNss(i))
elif s == "all":
self.skiplist.remove("http://schema.org/")
for i in sdoapp.ENABLED_EXTENSIONS:
if getNss(i) in self.skiplist and getNss(i) != "attic":
self.skiplist.remove(getNss(i))
elif getNss(s) in self.skiplist:
self.skiplist.remove(getNss(s))
def getGraphs(self):
self.store = getMasterStore()
self.fullGraph = getQueryGraph()
def loadGraphs(self):
self.fullGraph = getQueryGraph()
self.fullGraph.bind('owl', 'http://www.w3.org/2002/07/owl#')
self.fullGraph.bind('rdfa', 'http://www.w3.org/ns/rdfa#')
self.fullGraph.bind('dct', 'http://purl.org/dc/terms/')
self.fullGraph.bind('schema', 'http://schema.org/')
self.skipOddTriples(self.fullGraph)
for s in self.skiplist:
#print(" SKIPPING: %s" % s)
self.skipTriples(s,self.fullGraph)
self.outGraph = self.fullGraph
def skipTriples(self,skip, graph):
if not len(skip):
return
if skip.endswith("/"):
skip = skip[:len(skip) -1]
print("skip %s" % skip)
delcore="""PREFIX schema: <http://schema.org/>
DELETE {?term ?p ?o}
WHERE {
?term ?p ?o.
?term a ?t.
FILTER NOT EXISTS {?term schema:isPartOf ?x}.
}"""
delext ="""PREFIX schema: <http://schema.org/>
DELETE {?s ?p ?o}
WHERE {
?s a ?t;
schema:isPartOf <%s>.
?s ?p ?o.
}""" % skip
if skip == "http://schema.org":
q = delcore
else:
q = delext
before = len(graph)
processUpdate(graph,q)
def skipOddTriples(self, graph):
delf = """
DELETE {?s ?p ?o}
WHERE {
?s ?p ?o.
FILTER (! strstarts(str(?s), "http://schema.org")).
}"""
processUpdate(graph,delf)
def exercise(self, graph):
types = {}
props = {}
exts = []
for (s,p,o) in graph.triples((None,RDF.type,RDFS.Class)):
if s.startswith("http://schema.org"):
types.update({s:graph.identifier})
for t in sorted(types.keys()):
self.access(t,types[t])
for (s,p,o) in graph.triples((None,RDF.type,RDF.Property)):
if s.startswith("http://schema.org"):
props.update({s:"http://schema.org"})
for p in sorted(props.keys()):
self.access(p,props[p])
def exerciseStatics(self, graph):
for s in STATICPAGES:
self.access(s,graph)
def access(self, id, ext):
if id.startswith("http://schema.org"):
id = id[18:]
ext = ""
#ext = getRevNss(str(ext))
#if ext == "core":
# ext = ""
#else:
# ext = ext + "."
site = args.site
scheme = "http://"
if site.startswith("http://"):
site = site[7:]
elif site.startswith("https://"):
site = site[8:]
scheme = "https://"
#log.info("%s %s %s %s" % (scheme,ext,site,id))
path = "%s%s%s/%s" % (scheme,ext,site,id)
self.fetch(path)
def fetch(self, url):
import urllib2
import time,datetime
success = False
fivehundred = 0
while not success:
load_start = datetime.datetime.now()
try:
sys.stdout.write(url)
sys.stdout.flush()
r = urllib2.urlopen(url)
print(" %s %s" % (r.getcode(), str(datetime.datetime.now()-load_start)))
success = True
except urllib2.HTTPError as e:
print(" got error: {} - {}".format(e.code, e.reason))
if e.code == 500:
fivehundred += 1
time.sleep(float(args.pausetime))
if not fivehundred or fivehundred > 5:
break
return
if __name__ == "__main__":
ex = Exercise()
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...