https://github.com/shexSpec/shex.js
Tip revision: 44adfe5946bc6f13c3971787f452be094608db38 authored by Eric Prud'hommeaux on 18 August 2022, 16:52:11 UTC
Merge branch 'main' into extends
Merge branch 'main' into extends
Tip revision: 44adfe5
bnf
# Yacker grammer for (Sh)ape (Ex)pressions (C)ompact language
#
# Copyright 2015, Eric Prud'hommeax, Harold Solbrig, Iovka Boneva, Jose Labra Gayo
# All rights reserved; please contact copyright holders for any use outside of the ShEx semantics.
# http://www.w3.org/2005/01/yacker/uploads/ShEx2/bnf
#
# yacker: https://www.w3.org/2005/01/yacker/uploads/ShEx2_EXTENDS?lang=perl
# repo: https://github.com/shexSpec/shex.js/
# branch: main
# path: doc/bnf
# TODO:
# annotations on shapes
# look hard at EXTERNAL valueClassDefinitions
#
# Changes from yacker ShEx2:
#
# + '*' in 2nd pos of REPEAT_RANGE so e.g. {2,*} === {2,}
# ~ code names are iris. this changes existing semantic actions as follows:
# <S> { :p1 . %GenX{ blah blah blah %} }
# PREFIX GenX: <http://...GenX> <S> { :p1 . %GenX:{ blah blah blah %} }
# empty code names %{ ... %} are parsed as empty relative URLs: %<>{ ... %}
# + ATPNAME_NS, ATPNAME_LN terminals to parse e.g. @ex:foo which looks like a LANGTAG
# ~ a is an iri (not just for predicates) enabling e.g. <reif> { rdf:predicate (a) }
# ~ groupShapeConstr can only have ORs (no ANDs) so no <S1> { :p1 @<S2> AND @<S3> }
# + includeSet parallels inclPropertySet (&<A><B><C>) e.g. <S3> &<S1> <S2> { ... }
# + factored out (labeled) productions for startActions and semanticActions
# ~ BNodes can have stringFacets
# - ANON terminal (not referenced)
# ~ tripleConstraint: swapped position of annotations and cardinality (EGP 20150727)
# + valueClass: add xsFacet* to datatype (EGP 20150727)
# + escapes (\\, \uxxxx, \Uxxxxxxxx) in CODE (EGP 20150730)
# ~ moved | RDF_TYPE from iri: to predicate: (disables e.g. %a{ ... %}) (EGP 20150807)
# + tried to intercept non-terminated STRING_LITERAL_LONG{1,2} (EGP 20150807)
# - unaryShape: id (EGP 20150818)
# + unaryShape: annotation* (EGP 20150818)
# + valueClassLabel, valueClassDefinition, valueClassExpr, valueClassOrRef (EGP 20150818)
# + factored out notStartAction (EGP 20150818)
# - oneOf (EGP 20150820)
# ~ split out encapsulatedShape (EGP 20150831)
# - numericRange takes a numericLiteral, was INTEGER (EGP 20150917)
# + innerShape ::= multiElementGroup | multiElementSomeOf (EGP 20151022)
# ~ annotation ::= ';' predicate (iri | literal) -- was iri (EGP 20151022)
# + generalized AND/OR expressions on valueClasses (EGP 20151031)
# - groupShapeConstr (shapeOrRef (OR shapeOrRef)*) (EGP 20151031)
# + stringFacet* on negatableValueClass ::= shapeOrRef (EGP 20151101)
# ~ vcand and vcor are mutually exclusive (i.e. not recursive) (EGP 20151111)
# ~ s/valueClassDefinition/valueExprDefinition/ (EGP 20151120)
# ~ s/valueClassLabel/valueExprLabel/ (EGP 20151120)
# ~ change valueSet delimiters from ()s to []s (EGP 20160104)
# ~ codeDecls can have no code (implies code ref) (EGP 20160128)
# + ()s around valueExprs (EGP 20160520)
# + shapeDefinition += nonLiteralKind? stringFacet* (EGP 20160520)
# - shape == "VIRTUAL"? (EGP 20160520)
# + ';' separator for groups (EGP 20160524)
# ~ s{;}{//} for annotations (EGP 20160524)
# + ("AND" shapeDefinition)* (EGP 20160524)
# + ("OR" shapeDefinition)* (EGP 20160615)
# ~ generalized shapeExpression to include valueExprDefinition (EGP 20160708)
# ~ reordered (EGP 20160913)
# ~ s/ShapeDisjunction/shapeOr/ (EGP 20160913)
# ~ s/shapeConjunction/shapeAnd/ (EGP 20160913)
# ~ s/negShapeAtom/shapeNot/ (EGP 20160913)
# ~ s/inclPropertySet/extraPropertySet/ (EGP 20160913)
# ~ s/value/valueSetValue/ (EGP 20160913)
# ~ updated production labels (EGP 20160913)
# ~ reorder to align with spec (EGP 20160930)
# ~ s/someOfShape/someOfTripleExpr/ (EGP 20160930)
# ~ s/innerShape/innerTripleExpr/ (EGP 20160930)
# ~ s/groupShape/groupTripleExpr/ (EGP 20160930)
# ~ s/unaryShape/unaryTripleExpr/ (EGP 20160930)
# ~ s/encapsulatedShape/bracketedTripleExpr/ (EGP 20160930)
# ~ s/SomeOf/OneOf/g (EGP 20161201)
# ~ s/shapeLabel/shapeExprLabel/? (EGP 20170228)
# + tripleExprLabel (EGP 20170228)
# - ^^-style facet arguments per shex#41 (EGP 20170303)
# ~ switch to ~/regexp/ (EGP 20170303)
# ~ s/shapeExpression/inlineShapeExpression/
# + NOTs on shapeExpression
# + shapeOr += ("AND" shapeNot)+
# + shapeAtomNoRef
# ~ nodeConstraint split into litNodeConstraint and nonLitNodeConstraint
# - tripleExprLable -= '$'
# ~ align REGEXP with XML Schema
# ~ align with current EXTENDS/RESTRICTS consensus
# Notation:
# in-line terminals in ""s are case-insensitive
# production numbers ending in t or s are from Turtle or SPARQL.
# leading CODE is captured in startActions
[1] shexDoc ::= directive* ((notStartAction | startActions) statement*)?
[2] directive ::= baseDecl | prefixDecl
[3] baseDecl ::= "BASE" IRIREF
[4] prefixDecl ::= "PREFIX" PNAME_NS IRIREF
[5] notStartAction ::= start | shapeExprDecl
[6] start ::= "start" '=' inlineShapeExpression
[7] startActions ::= codeDecl+
[8] statement ::= directive | notStartAction
[9] shapeExprDecl ::= "ABSTRACT"? shapeExprLabel restrictions* (shapeExpression | "EXTERNAL")
[10] shapeExpression ::= "NOT"? shapeAtomNoRef shapeOr?
| "NOT" shapeRef shapeOr?
| shapeRef shapeOr
[11] inlineShapeExpression ::= inlineShapeOr
[12] shapeOr ::= ("OR" shapeAnd)+
| ("AND" shapeNot)+ ("OR" shapeAnd)*
[13] inlineShapeOr ::= inlineShapeAnd ("OR" inlineShapeAnd)*
[14] shapeAnd ::= shapeNot ("AND" shapeNot)*
[15] inlineShapeAnd ::= inlineShapeNot ("AND" inlineShapeNot)*
[16] shapeNot ::= "NOT"? shapeAtom
[17] inlineShapeNot ::= "NOT"? inlineShapeAtom
[18] shapeAtom ::= nonLitNodeConstraint shapeOrRef?
| litNodeConstraint
| shapeOrRef nonLitNodeConstraint?
| '(' shapeExpression ')'
| '.'
[19] shapeAtomNoRef ::= nonLitNodeConstraint shapeOrRef?
| litNodeConstraint
| shapeDefinition nonLitNodeConstraint?
| '(' shapeExpression ')'
| '.'
[20] inlineShapeAtom ::= nonLitNodeConstraint inlineShapeOrRef?
| litNodeConstraint
| inlineShapeOrRef nonLitNodeConstraint?
| '(' shapeExpression ')'
| '.'
[21] shapeOrRef ::= shapeDefinition | shapeRef
[22] inlineShapeOrRef ::= inlineShapeDefinition | shapeRef
[23] shapeRef ::= ATPNAME_LN | ATPNAME_NS | '@' shapeExprLabel
[24] litNodeConstraint ::= "LITERAL" xsFacet*
| datatype xsFacet*
| valueSet xsFacet*
| numericFacet+
[25] nonLitNodeConstraint ::= nonLiteralKind stringFacet*
| stringFacet+
[26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
[27] xsFacet ::= stringFacet | numericFacet
[28] stringFacet ::= stringLength INTEGER
| REGEXP
[29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
[30] numericFacet ::= numericRange numericLiteral
| numericLength INTEGER
[31] numericRange ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
[32] numericLength ::= "TOTALDIGITS" | "FRACTIONDIGITS"
[33] shapeDefinition ::= (extensions | extraPropertySet | "CLOSED")* '{' tripleExpression? '}' annotation* semanticActions
[34] inlineShapeDefinition ::= (extensions | extraPropertySet | "CLOSED")* '{' tripleExpression? '}'
[35] extraPropertySet ::= "EXTRA" predicate+
[36] tripleExpression ::= oneOfTripleExpr
[37] oneOfTripleExpr ::= groupTripleExpr | multiElementOneOf
[38] multiElementOneOf ::= groupTripleExpr ('|' groupTripleExpr)+
[39] innerTripleExpr ::= multiElementGroup | multiElementOneOf
[40] groupTripleExpr ::= singleElementGroup | multiElementGroup
[41] singleElementGroup ::= unaryTripleExpr ';'?
[42] multiElementGroup ::= unaryTripleExpr (';' unaryTripleExpr)+ ';'?
[43] unaryTripleExpr ::= ('$' tripleExprLabel)? (tripleConstraint | bracketedTripleExpr)
| include
[44] bracketedTripleExpr ::= '(' innerTripleExpr ')' cardinality? annotation* semanticActions
[45] tripleConstraint ::= senseFlags? predicate inlineShapeExpression cardinality? annotation* semanticActions
[46] cardinality ::= '*' | '+' | '?' | REPEAT_RANGE
[47] senseFlags ::= '^'
[48] valueSet ::= '[' valueSetValue* ']'
[49] valueSetValue ::= iriRange | literal
[50] iriRange ::= iri ('~' exclusion*)?
| '.' exclusion+
[51] exclusion ::= '-' iri '~'?
[52] include ::= '&' tripleExprLabel
[53] annotation ::= "//" predicate (iri | literal)
[54] semanticActions ::= codeDecl*
[55] codeDecl ::= '%' iri (CODE | '%')
[13t] literal ::= rdfLiteral | numericLiteral | booleanLiteral
[56] predicate ::= iri | RDF_TYPE
[57] datatype ::= iri
[58] shapeExprLabel ::= iri | blankNode
[59] tripleExprLabel ::= iri | blankNode
[16t] numericLiteral ::= INTEGER | DECIMAL | DOUBLE
[60] rdfLiteral ::= langString | string ('^^' datatype)?
[134s] booleanLiteral ::= "true" | "false"
[135s] string ::= STRING_LITERAL1 | STRING_LITERAL_LONG1
| STRING_LITERAL2 | STRING_LITERAL_LONG2
[61] langString ::= LANG_STRING_LITERAL1 | LANG_STRING_LITERAL_LONG1
| LANG_STRING_LITERAL2 | LANG_STRING_LITERAL_LONG2
[136s] iri ::= IRIREF | prefixedName
[137s] prefixedName ::= PNAME_LN | PNAME_NS
[138s] blankNode ::= BLANK_NODE_LABEL
[57] extensions ::= "EXTENDS" shapeExprLabel+
| '&' shapeExprLabel+
[00] restrictions ::= "RESTRICTS" shapeExprLabel
| '-' shapeExprLabel
@terminals
[62] CODE ::= '{' ([^%\\] | '\\' [%\\] | UCHAR)* '%' '}'
[63] REPEAT_RANGE ::= '{' INTEGER (',' (INTEGER | '*')?)? '}'
[64] RDF_TYPE ::= 'a'
[18t] IRIREF ::= '<' ([^#x00-#x20<>\"{}|^`\\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
[140s] PNAME_NS ::= PN_PREFIX? ':'
[141s] PNAME_LN ::= PNAME_NS PN_LOCAL
[65] ATPNAME_NS ::= '@' PN_PREFIX? ':'
[66] ATPNAME_LN ::= '@' PNAME_NS PN_LOCAL
[67] REGEXP ::= '/' ([^#x2f#x5C#xA#xD] | '\\' [nrt\\|.?*+(){}$#x2D#x5B#x5D^/] | UCHAR)+ '/' [smix]*
[142s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
[145s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
[19t] INTEGER ::= [+-]? [0-9]+
[20t] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
[21t] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.'? [0-9]+ EXPONENT)
[155s] EXPONENT ::= [eE] [+-]? [0-9]+
[156s] STRING_LITERAL1 ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
[157s] STRING_LITERAL2 ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
[158s] STRING_LITERAL_LONG1 ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''"
[159s] STRING_LITERAL_LONG2 ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""'
[68] LANG_STRING_LITERAL1 ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" LANGTAG
[69] LANG_STRING_LITERAL2 ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' LANGTAG
[70] LANG_STRING_LITERAL_LONG1 ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''" LANGTAG
[71] LANG_STRING_LITERAL_LONG2 ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""' LANGTAG
[26t] UCHAR ::= '\\u' HEX HEX HEX HEX
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
[160s] ECHAR ::= '\\' [tbnrf\\\"\']
[164s] PN_CHARS_BASE ::= [A-Z] | [a-z]
| [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF]
| [#x0370-#x037D] | [#x037F-#x1FFF]
| [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF]
| [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD]
| [#x10000-#xEFFFF]
[165s] PN_CHARS_U ::= PN_CHARS_BASE | '_'
[167s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9]
| [#x00B7] | [#x0300-#x036F] | [#x203F-#x2040]
[168s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
[169s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
[170s] PLX ::= PERCENT | PN_LOCAL_ESC
[171s] PERCENT ::= '%' HEX HEX
[172s] HEX ::= [0-9] | [A-F] | [a-f]
[173s] PN_LOCAL_ESC ::= '\\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')
@pass ::= [ \t\r\n]+
| '#' [^\r\n]*