https://github.com/shexSpec/shex.js
Raw File
Tip revision: e0072d7d1842b53234aff90dea73d2f7abf5a410 authored by Eric Prud'hommeaux on 29 September 2018, 07:05:24 UTC
Merge branch 'shex-next'
Tip revision: e0072d7
bnf
# Yacker grammer for (Sh)ape (Ex)pressions (C)ompact language
# 
# Copyright 2015, Eric Prud'hommeax, Harold Solbrig, Iovka Boneva, Jose Labra Gayo
# All rights reserved; please contact copyright holders for any use outside of the ShEx semantics.
# http://www.w3.org/2005/01/yacker/uploads/ShEx2/bnf
#
# yacker: https://www.w3.org/2005/01/yacker/uploads/ShEx2?lang=perl
# repo: https://github.com/shexSpec/spec/
# branch: gh-pages
# path: index.html
# TODO:
#   annotations on shapes
#   look hard at EXTERNAL valueClassDefinitions
# 
# Changes from yacker ShEx2:
# 
# + '*' in 2nd pos of REPEAT_RANGE so e.g. {2,*} === {2,}
# ~ code names are iris. this changes existing semantic actions as follows:
#   <S> { :p1 . %GenX{ blah blah blah %} }
#   PREFIX GenX: <http://...GenX> <S> { :p1 . %GenX:{ blah blah blah %} }
#   empty code names %{ ... %} are parsed as empty relative URLs: %<>{ ... %}
# + ATPNAME_NS, ATPNAME_LN terminals to parse e.g. @ex:foo which looks like a LANGTAG
# ~ a is an iri (not just for predicates) enabling e.g. <reif> { rdf:predicate (a) }
# ~ groupShapeConstr can only have ORs (no ANDs) so no <S1> { :p1 @<S2> AND @<S3> }
# + includeSet parallels inclPropertySet (&<A><B><C>) e.g. <S3> &<S1> <S2> { ... }
# + factored out (labeled) productions for startActions and semanticActions
# ~ BNodes can have stringFacets
# - ANON terminal (not referenced)
# ~ tripleConstraint: swapped position of annotations and cardinality (EGP 20150727)
# + valueClass: add xsFacet* to datatype (EGP 20150727)
# + escapes (\\, \uxxxx, \Uxxxxxxxx) in CODE (EGP 20150730)
# ~ moved | RDF_TYPE from iri: to predicate: (disables e.g. %a{ ... %}) (EGP 20150807)
# + tried to intercept non-terminated STRING_LITERAL_LONG{1,2} (EGP 20150807)
# - unaryShape: id (EGP 20150818)
# + unaryShape: annotation* (EGP 20150818)
# + valueClassLabel, valueClassDefinition, valueClassExpr, valueClassOrRef (EGP 20150818)
# + factored out notStartAction (EGP 20150818)
# - oneOf (EGP 20150820)
# ~ split out encapsulatedShape (EGP 20150831)
# - numericRange takes a numericLiteral, was INTEGER (EGP 20150917)
# + innerShape ::= multiElementGroup | multiElementSomeOf (EGP 20151022)
# ~ annotation ::= ';' predicate (iri | literal) -- was iri (EGP 20151022)
# + generalized AND/OR expressions on valueClasses (EGP 20151031)
# - groupShapeConstr (shapeOrRef (OR shapeOrRef)*) (EGP 20151031)
# + stringFacet* on negatableValueClass ::= shapeOrRef (EGP 20151101)
# ~ vcand and vcor are mutually exclusive (i.e. not recursive) (EGP 20151111)
# ~ s/valueClassDefinition/valueExprDefinition/ (EGP 20151120)
# ~ s/valueClassLabel/valueExprLabel/ (EGP 20151120)
# ~ change valueSet delimiters from ()s to []s (EGP 20160104)
# ~ codeDecls can have no code (implies code ref) (EGP 20160128)
# + ()s around valueExprs (EGP 20160520)
# + shapeDefinition += nonLiteralKind? stringFacet* (EGP 20160520)
# - shape == "VIRTUAL"? (EGP 20160520)
# + ';' separator for groups (EGP 20160524)
# ~ s{;}{//} for annotations (EGP 20160524)
# + ("AND" shapeDefinition)* (EGP 20160524)
# + ("OR" shapeDefinition)* (EGP 20160615)
# ~ generalized shapeExpression to include valueExprDefinition (EGP 20160708)
# ~ reordered (EGP 20160913)
# ~ s/ShapeDisjunction/shapeOr/ (EGP 20160913)
# ~ s/shapeConjunction/shapeAnd/ (EGP 20160913)
# ~ s/negShapeAtom/shapeNot/ (EGP 20160913)
# ~ s/inclPropertySet/extraPropertySet/ (EGP 20160913)
# ~ s/value/valueSetValue/ (EGP 20160913)
# ~ updated production labels (EGP 20160913)
# ~ reorder to align with spec (EGP 20160930)
# ~ s/someOfShape/someOfTripleExpr/ (EGP 20160930)
# ~ s/innerShape/innerTripleExpr/ (EGP 20160930)
# ~ s/groupShape/groupTripleExpr/ (EGP 20160930)
# ~ s/unaryShape/unaryTripleExpr/ (EGP 20160930)
# ~ s/encapsulatedShape/bracketedTripleExpr/ (EGP 20160930)
# ~ s/SomeOf/OneOf/g (EGP 20161201)
# ~ s/shapeLabel/shapeExprLabel/? (EGP 20170228)
# + tripleExprLabel (EGP 20170228)
# - ^^-style facet arguments per shex#41 (EGP 20170303)
# ~ switch to ~/regexp/ (EGP 20170303)
# ~ s/shapeExpression/inlineShapeExpression/
# + NOTs on shapeExpression
# + shapeOr += ("AND" shapeNot)+
# + shapeAtomNoRef
# ~ nodeConstraint split into litNodeConstraint and nonLitNodeConstraint
# - tripleExprLable -= '$'
# ~ align REGEXP with XML Schema

# Notation:
#   in-line terminals in ""s are case-insensitive
#   production numbers ending in t or s are from Turtle or SPARQL.

# leading CODE is captured in startActions
[1]     shexDoc               ::= directive* ((notStartAction | startActions) statement*)?
[2]     directive             ::= baseDecl | prefixDecl
[3]     baseDecl              ::= "BASE" IRIREF
[4]     prefixDecl            ::= "PREFIX" PNAME_NS IRIREF
[5]     notStartAction        ::= start | shapeExprDecl
[6]     start                 ::= "start" '=' inlineShapeExpression
[7]     startActions          ::= codeDecl+
[8]     statement             ::= directive | notStartAction
[9]     shapeExprDecl         ::= shapeExprLabel (shapeExpression | "EXTERNAL")
[10]    shapeExpression       ::= "NOT"? shapeAtomNoRef shapeOr?
                                | "NOT" shapeRef shapeOr?
                                | shapeRef shapeOr
[11]    inlineShapeExpression ::= inlineShapeOr
[12]    shapeOr               ::= ("OR" shapeAnd)+
                                | ("AND" shapeNot)+ ("OR" shapeAnd)*
[13]    inlineShapeOr         ::= inlineShapeAnd ("OR" inlineShapeAnd)*
[14]    shapeAnd              ::= shapeNot ("AND" shapeNot)*
[15]    inlineShapeAnd        ::= inlineShapeNot ("AND" inlineShapeNot)*
[16]    shapeNot              ::= "NOT"? shapeAtom
[17]    inlineShapeNot        ::= "NOT"? inlineShapeAtom
[18]    shapeAtom             ::= nonLitNodeConstraint shapeOrRef?
                                | litNodeConstraint
                                | shapeOrRef nonLitNodeConstraint?
                                | '(' shapeExpression ')'
                                | '.'
[19]    shapeAtomNoRef        ::= nonLitNodeConstraint shapeOrRef?
                                | litNodeConstraint
                                | shapeDefinition nonLitNodeConstraint?
                                | '(' shapeExpression ')'
                                | '.'
[20]    inlineShapeAtom       ::= nonLitNodeConstraint inlineShapeOrRef?
                                | litNodeConstraint
                                | inlineShapeOrRef nonLitNodeConstraint?
                                | '(' shapeExpression ')'
                                | '.'
[21]    shapeOrRef            ::= shapeDefinition | shapeRef
[22]    inlineShapeOrRef      ::= inlineShapeDefinition | shapeRef
[23]    shapeRef              ::= ATPNAME_LN | ATPNAME_NS | '@' shapeExprLabel

[24]    litNodeConstraint     ::= "LITERAL" xsFacet*
                                | datatype xsFacet*
                                | valueSet xsFacet*
                                | numericFacet+
[25]    nonLitNodeConstraint  ::= nonLiteralKind stringFacet*
                                | stringFacet+
[26]    nonLiteralKind        ::= "IRI" | "BNODE" | "NONLITERAL"
[27]    xsFacet               ::= stringFacet | numericFacet
[28]    stringFacet           ::= stringLength INTEGER
                                | REGEXP
[29]    stringLength          ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
[30]    numericFacet          ::= numericRange numericLiteral
                                | numericLength INTEGER
[31]    numericRange          ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
[32]    numericLength         ::= "TOTALDIGITS" | "FRACTIONDIGITS"
[33]    shapeDefinition       ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
[34]    inlineShapeDefinition ::= (extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
[35]    extraPropertySet      ::= "EXTRA" predicate+
[36]    tripleExpression      ::= oneOfTripleExpr
[37]    oneOfTripleExpr       ::= groupTripleExpr | multiElementOneOf
[38]    multiElementOneOf     ::= groupTripleExpr ('|' groupTripleExpr)+
[39]    innerTripleExpr       ::= multiElementGroup | multiElementOneOf
[40]    groupTripleExpr       ::= singleElementGroup | multiElementGroup
[41]    singleElementGroup    ::= unaryTripleExpr ';'?
[42]    multiElementGroup     ::= unaryTripleExpr (';' unaryTripleExpr)+ ';'?
[43]    unaryTripleExpr       ::= ('$' tripleExprLabel)? (tripleConstraint | bracketedTripleExpr)
                                | include
[44]    bracketedTripleExpr   ::= '(' innerTripleExpr ')' cardinality? annotation* semanticActions
[45]    tripleConstraint      ::= senseFlags? predicate inlineShapeExpression cardinality? annotation* semanticActions
[46]    cardinality           ::= '*' | '+' | '?' | REPEAT_RANGE
[47]    senseFlags            ::= '^'
[48]    valueSet              ::= '[' valueSetValue* ']'
[49]    valueSetValue         ::= iriRange | literal
[50]    iriRange              ::= iri ('~' exclusion*)?
                                | '.' exclusion+
[51]    exclusion             ::= '-' iri '~'?
[52]    include               ::= '&' tripleExprLabel
[53]    annotation            ::= "//" predicate (iri | literal)
[54]    semanticActions       ::= codeDecl*
[55]    codeDecl              ::= '%' iri (CODE | '%')
[13t]   literal               ::= rdfLiteral | numericLiteral | booleanLiteral
[56]    predicate             ::= iri | RDF_TYPE
[57]    datatype              ::= iri
[58]    shapeExprLabel        ::= iri | blankNode
[59]    tripleExprLabel       ::= iri | blankNode
[16t]   numericLiteral        ::= INTEGER | DECIMAL | DOUBLE
[60]  rdfLiteral              ::= langString | string ('^^' datatype)?
[134s]  booleanLiteral        ::= "true" | "false"
[135s]  string                ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
                                | STRING_LITERAL2 | STRING_LITERAL_LONG2
[61]   langString             ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
                                | LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
[136s]  iri                   ::= IRIREF | prefixedName
[137s]  prefixedName          ::= PNAME_LN | PNAME_NS
[138s]  blankNode             ::= BLANK_NODE_LABEL

@terminals

[62]    CODE                  ::= '{' ([^%\\] | '\\' [%\\] | UCHAR)* '%' '}'
[63]    REPEAT_RANGE          ::= '{' INTEGER (',' (INTEGER | '*')?)? '}'
[64]    RDF_TYPE              ::= 'a'
[18t]   IRIREF                ::= '<' ([^#x00-#x20<>\"{}|^`\\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
[140s]  PNAME_NS              ::= PN_PREFIX? ':'
[141s]  PNAME_LN              ::= PNAME_NS PN_LOCAL
[65]    ATPNAME_NS            ::= '@' PN_PREFIX? ':'
[66]    ATPNAME_LN            ::= '@' PNAME_NS PN_LOCAL
[67]    REGEXP                ::= '/' ([^#x2f#x5C#xA#xD] | '\\' [nrt\\|.?*+(){}$#x2D#x5B#x5D^/] | UCHAR)+ '/' [smix]*

[142s]  BLANK_NODE_LABEL      ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
[145s]  LANGTAG               ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
[19t]   INTEGER               ::= [+-]? [0-9]+
[20t]   DECIMAL               ::= [+-]? [0-9]* '.' [0-9]+
[21t]   DOUBLE                ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.'? [0-9]+ EXPONENT)
[155s]  EXPONENT              ::= [eE] [+-]? [0-9]+
[156s]  STRING_LITERAL1       ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
[157s]  STRING_LITERAL2       ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
[158s]  STRING_LITERAL_LONG1  ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''"
[159s]  STRING_LITERAL_LONG2  ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""'
[68]    LANG_STRING_LITERAL1       ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'"  LANGTAG
[69]    LANG_STRING_LITERAL2       ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' LANGTAG
[70]    LANG_STRING_LITERAL_LONG1  ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''" LANGTAG
[71]    LANG_STRING_LITERAL_LONG2  ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""' LANGTAG
[26t]   UCHAR                 ::= '\\u' HEX HEX HEX HEX
                                | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
[160s]  ECHAR                 ::= '\\' [tbnrf\\\"\']
[164s]  PN_CHARS_BASE         ::= [A-Z] | [a-z]
                                | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF]
                                | [#x0370-#x037D] | [#x037F-#x1FFF]
                                | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF]
                                | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD]
                                | [#x10000-#xEFFFF]
[165s]  PN_CHARS_U            ::= PN_CHARS_BASE | '_'
[167s]  PN_CHARS              ::= PN_CHARS_U | '-' | [0-9]
                                | [#x00B7] | [#x0300-#x036F] | [#x203F-#x2040]
[168s]  PN_PREFIX             ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
[169s]  PN_LOCAL              ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
[170s]  PLX                   ::= PERCENT | PN_LOCAL_ESC
[171s]  PERCENT               ::= '%' HEX HEX
[172s]  HEX                   ::= [0-9] | [A-F] | [a-f]
[173s]  PN_LOCAL_ESC          ::= '\\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')

@pass ::= [ \t\r\n]+
        | '#' [^\r\n]*
back to top