Raw File
# Yacker grammer for (Sh)ape (Ex)pressions (C)ompact language
# 
# Copyright 2015, Eric Prud'hommeax, Harold Solbrig, Iovka Boneva, Jose Labra Gayo
# All rights reserved; please contact copyright holders for any use outside of the ShEx semantics.
# http://www.w3.org/2005/01/yacker/uploads/ShEx2/bnf
# 
# TODO:
#   annotations on shapes
#   look hard at EXTERNAL valueClassDefinitions
# 
# Changes from yacker ShEx2:
# 
# + '*' in 2nd pos of REPEAT_RANGE so e.g. {2,*} === {2,}
# ~ code names are iris. this changes existing semantic actions as follows:
#   <S> { :p1 . %GenX{ blah blah blah %} }
#   PREFIX GenX: <http://...GenX> <S> { :p1 . %GenX:{ blah blah blah %} }
#   empty code names %{ ... %} are parsed as empty relative URLs: %<>{ ... %}
# + ATPNAME_NS, ATPNAME_LN terminals to parse e.g. @ex:foo which looks like a LANGTAG
# ~ a is an iri (not just for predicates) enabling e.g. <reif> { rdf:predicate (a) }
# ~ groupShapeConstr can only have ORs (no ANDs) so no <S1> { :p1 @<S2> AND @<S3> }
# + includeSet parallels inclPropertySet (&<A><B><C>) e.g. <S3> &<S1> <S2> { ... }
# + factored out (labeled) productions for startActions and semanticActions
# ~ BNodes can have stringFacets
# - ANON terminal (not referenced)
# ~ tripleConstraint: swapped position of annotations and cardinality (EGP 20150727)
# + valueClass: add xsFacet* to datatype (EGP 20150727)
# + escapes (\\, \uxxxx, \Uxxxxxxxx) in CODE (EGP 20150730)
# ~ moved | RDF_TYPE from iri: to predicate: (disables e.g. %a{ ... %}) (EGP 20150807)
# + tried to intercept non-terminated STRING_LITERAL_LONG{1,2} (EGP 20150807)
# - unaryShape: id (EGP 20150818)
# + unaryShape: annotation* (EGP 20150818)
# + valueClassLabel, valueClassDefinition, valueClassExpr, valueClassOrRef (EGP 20150818)
# + factored out notStartAction (EGP 20150818)
# - oneOf (EGP 20150820)
# ~ split out encapsulatedShape (EGP 20150831)
# - numericRange takes a numericLiteral, was INTEGER (EGP 20150917)
# + innerShape ::= multiElementGroup | multiElementSomeOf (EGP 20151022)
# ~ annotation ::= ';' predicate (iri | literal) -- was iri (EGP 20151022)
# + generalized AND/OR expressions on valueClasses (EGP 20151031)
# - groupShapeConstr (shapeOrRef (OR shapeOrRef)*) (EGP 20151031)
# + stringFacet* on negatableValueClass ::= shapeOrRef (EGP 20151101)
# ~ vcand and vcor are mutually exclusive (i.e. not recursive) (EGP 20151111)
# ~ s/valueClassDefinition/valueExprDefinition/ (EGP 20151120)
# ~ s/valueClassLabel/valueExprLabel/ (EGP 20151120)
# ~ change valueSet delimiters from ()s to []s (EGP 20160104)
# ~ codeDecls can have no code (implies code ref) (EGP 20160128)

# Notation:
#   in-line terminals in ""s are case-insensitive

[1]     shexDoc               ::= directive* ((notStartAction | startActions) statement*)? # leading CODE
[2]     statement             ::= directive | notStartAction
[2b]    notStartAction        ::= start | shape | valueExprDefinition
[3]     directive             ::= baseDecl | prefixDecl
[3a]    valueExprDefinition   ::= valueExprLabel ( '=' valueClassExpr semanticActions | "EXTERNAL" )
[3a]    valueClassExpr        ::= valueClass valueClassJuncts?
[3a]    valueClassJuncts      ::= ( "OR" valueClass )+ | ( "AND" valueClass )+
[3a]    valueExprLabel       ::= '$' (iri | blankNode)
[5s]    baseDecl              ::= "BASE" IRIREF
[6s]    prefixDecl            ::= "PREFIX" PNAME_NS IRIREF
[4]     start                 ::= "start" '=' ( shapeLabel | shapeDefinition semanticActions )
[5]     shape                 ::= "ABSTRACT"? shapeLabel shapeDefinition semanticActions
[6]     shapeDefinition       ::= (includeSet | inclPropertySet | "CLOSED")* '{' someOfShape? '}'
[7]     includeSet            ::= '&' shapeLabel+
[8]     inclPropertySet       ::= "EXTRA" predicate+
[10]    someOfShape           ::= groupShape | multiElementOneOf
[10a]   multiElementOneOf     ::= groupShape ('|' groupShape)+
[10b]   innerShape            ::= multiElementGroup | multiElementOneOf
[11]    groupShape            ::= singleElementGroup | multiElementGroup
[11]    singleElementGroup    ::= unaryShape ','?
[11a]   multiElementGroup     ::= unaryShape ( ',' unaryShape )+ ','?
[12]    unaryShape            ::= tripleConstraint | include | encapsulatedShape
[13]    encapsulatedShape     ::= '(' innerShape ')' cardinality? annotation* semanticActions
[7]     include               ::= '&' shapeLabel
[14]    shapeLabel            ::= iri | blankNode
[15]    tripleConstraint      ::= senseFlags? predicate valueClassExpr cardinality? annotation* semanticActions
[16]    senseFlags            ::= '!' '^'?
                                | '^' '!'?  # inverse not
[17]    predicate             ::= iri | RDF_TYPE
[18]    valueClass            ::= '!'? negatableValueClass
[18a]   negatableValueClass   ::= "LITERAL" xsFacet*
                                | ("IRI" | "BNODE" | "NONLITERAL") shapeOrRef? stringFacet*
                                | datatype xsFacet*
                                | shapeOrRef stringFacet*
                                | valueSet
                                | valueExprLabel
                                | '.'  # no constraint
#                                | '[' valueClassExpr ']'
[20]    shapeOrRef            ::= ATPNAME_LN | ATPNAME_NS | '@' shapeLabel | shapeDefinition
[21]    xsFacet               ::= stringFacet
                                | numericFacet

[21]    stringFacet           ::= stringLength INTEGER
                                | "PATTERN" string
                                | '~' string  # shortcut for "PATTERN"
[21]    stringLength          ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
[21]    numericFacet          ::= numericRange (numericLiteral | string '^^' datatype )
                                | numericLength INTEGER
[21]    numericRange          ::= "MININCLUSIVE" | "MINEXCLUSIVE" | "MAXINCLUSIVE" | "MAXEXCLUSIVE"
[21]    numericLength         ::= "TOTALDIGITS" | "FRACTIONDIGITS"
[22]    datatype              ::= iri
[23]    annotation            ::= ';' predicate (iri | literal)
[24]    cardinality           ::= '*' | '+' | '?' | REPEAT_RANGE
[25]    valueSet              ::= '[' value* ']'
[26]    value                 ::= iriRange | literal
[27]    iriRange              ::= iri ('~' exclusion*)? | '.' exclusion+
[28]    exclusion             ::= '-' iri '~'?
[13t]   literal               ::= rdfLiteral | numericLiteral | booleanLiteral
[16t]   numericLiteral        ::= INTEGER | DECIMAL | DOUBLE
[129s]  rdfLiteral            ::= string (LANGTAG | '^^' datatype)?
[134s]  booleanLiteral        ::= 'true' | 'false'
[135s]  string                ::= STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2
[136s]  iri                   ::= IRIREF | prefixedName
[137s]  prefixedName          ::= PNAME_LN | PNAME_NS
[138s]  blankNode             ::= BLANK_NODE_LABEL
[0]     codeDecl              ::= '%' iri (CODE | "%")
[0]     startActions          ::= codeDecl+
[0]     semanticActions       ::= codeDecl*

@terminals

[29]    CODE                  ::= '{' ([^%\\] | '\\' [%\\] | UCHAR)* '%' '}'
[30]    REPEAT_RANGE          ::= '{' INTEGER (',' (INTEGER | '*')?)? '}'
[3y]    RDF_TYPE              ::= 'a'
[18t]   IRIREF                ::= '<' ([^#x00-#x20<>\"{}|^`\\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
[140s]  PNAME_NS              ::= PN_PREFIX? ':'
[141s]  PNAME_LN              ::= PNAME_NS PN_LOCAL
[140x]  ATPNAME_NS            ::= '@' PN_PREFIX? ':'
[141x]  ATPNAME_LN            ::= '@' PNAME_NS PN_LOCAL

[142s]  BLANK_NODE_LABEL      ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
[145s]  LANGTAG               ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
[19t]   INTEGER               ::= [+-]? [0-9]+
[20t]   DECIMAL               ::= [+-]? [0-9]* '.' [0-9]+
[21t]   DOUBLE                ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.'? [0-9]+ EXPONENT)
[155s]  EXPONENT              ::= [eE] [+-]? [0-9]+
[156s]  STRING_LITERAL1       ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
[157s]  STRING_LITERAL2       ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
[158s]  STRING_LITERAL_LONG1  ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''"
[98]    STRING_LITERAL_LONG1X ::= "'''" # This is *supposed* to disable non-terminated LONG1.
[159s]  STRING_LITERAL_LONG2  ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""'
[99]    STRING_LITERAL_LONG2X ::= '"""' # This is *supposed* to disable non-terminated LONG2.
[26t]   UCHAR                 ::= '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
[160s]  ECHAR                 ::= '\\' [tbnrf\\\"\']
[162s]  WS                    ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */
[163s]  ANON                  ::= '[' WS* ']'
[164s]  PN_CHARS_BASE         ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
[165s]  PN_CHARS_U            ::= PN_CHARS_BASE | '_'
[167s]  PN_CHARS              ::= PN_CHARS_U | '-' | [0-9] | [#x00B7] | [#x0300-#x036F] | [#x203F-#x2040]
[168s]  PN_PREFIX             ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
[169s]  PN_LOCAL              ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
[170s]  PLX                   ::= PERCENT | PN_LOCAL_ESC
[171s]  PERCENT               ::= '%' HEX HEX
[172s]  HEX                   ::= [0-9] | [A-F] | [a-f]
[173s]  PN_LOCAL_ESC          ::= '\\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')

@pass ::= [ \t\r\n]+
 | "#" [^\r\n]* 
back to top