/*
XML Grammar abstracted from:
XML 1.0 (Fourth Edition) http://www.w3.org/TR/xml/
XML v1.1(Second Edition) http://www.w3.org/TR/xml11/
Namespaces in XML 1.1 (Second Edition) http://www.w3.org/TR/xml-names11/
*/
%entrypoint document dtd_document
%explicit_whitespace
dtd_document ::= extSubset <<EOF>>
//[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
// Re-write to be more efficient:
[1] document ::= prolog element Misc* <<EOF>>
[2] Char ::= '[\x1-\xD7FF]' | '[\xE000-\xFFFD]' | '[\x10000-\x10FFFF]'
/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
[2a] RestrictedChar ::= '[\x1-\x8]' | '[\xB-\xC]' | '[\xE-\x1F]' | '[\x7F-\x84]' | '[\x86-\x9F]'
[3] S ::= ('\x20' | '\x9' | '\xD' | '\xA')+
[4] NameStartChar := ":" | '[A-Z]' | "_" | '[a-z]' |
'[\xC0-\xD6]' | '[\xD8-\xF6]' | '[\xF8-\x2FF]' | '[\x370-\x37D]' | '[\x37F-\x1FFF]' |
'[\x200C-\x200D]' | '[\x2070-\x218F]' | '[\x2C00-\x2FEF]' |
'[\x3001-\xD7FF]' | '[\xF900-\xFDCF]' | '[\xFDF0-\xFFFD]' | '[\x10000-\xEFFFF]'
[4a] NameChar := NameStartChar | "-" | "." | '[0-9]' | '\xB7' |
'[\x0300-\x036F]' | '[\x203F-\x2040]'
[5] Name ::= NameStartChar NameChar*
[6] Names ::= Name (S Name)*
[7] Nmtoken ::= NameChar+
[8] Nmtokens ::= Nmtoken (S Nmtoken)*
[9] EntityValue ::= '"' ('[^%&"]' | PEReference | Reference)* '"'
| "'" ('[^%&\']' | PEReference | Reference)* "'"
[10] AttValue ::= '"' ('[^<&"]' | Reference)* '"'
| "'" ('[^<&\']' | Reference)* "'"
[11] SystemLiteral ::= ('"' '[^"]*' '"') | ("'" '[^\']*' "'")
//[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
// Re-write to be more efficient:
[12] PubidLiteral ::= '"' '[^\"]*' '"' | "'" '[^\']*' "'"
[13] PubidChar ::= '\x20' | '\xD' | '\xA' | '[a-zA-Z0-9]' | '[-\'()+,./:=?;!*#@$_%]'
//[14] CharData ::= '[^<&]*' - ('[^<&]*' ']]>' '[^<&]*')
// Re-write to be more efficient:
[14] CharData ::= ('[^<&\]]' | '][^<&\]]' | ']][^<&>]')+
//[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
// Re-write to be more efficient:
[15] Comment ::= '<!--([^-]|-[^-])*-->'
//[16] PI ::= '<\?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
// Re-write to be more efficient:
[16] PI ::= '<\?' PITarget (S ('[^?]' | '?' '^>]')* )? '?>'
//[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
// Re-write to be more efficient:
[17] PITarget ::= Name
[18] CDSect ::= CDStart CData CDEnd
[19] CDStart ::= '<!\[CDATA\['
//[20] CData ::= (Char* - (Char* ']]>' Char*))
// Re-write to be more efficient:
[20] CData ::= ('[^\]]' | ']' '[^\]]' | ']]' '[^>]')*
[21] CDEnd ::= ']]>'
[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
[23] XMLDecl ::= '<\?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')/* */
[25] Eq ::= S? '=' S?
[26] VersionNum ::= ('[a-zA-Z0-9_\.:]' | '-')+
[27] Misc ::= Comment | PI | S
/* Replaced by Namespace Definition */
// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
/*VC: Root Element Type*/
/*WFC: External Subset*/
[28a] DeclSep ::= PEReference | S /*WFC: PE Between Declarations*/
[28b] intSubset ::= (markupdecl | DeclSep)*
[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
/* [VC: Proper Declaration/PE Nesting] */
/* [WFC: PEs in Internal Subset] */
[30] extSubset ::= TextDecl? extSubsetDecl
[31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
/* [VC: Standalone Document Declaration] */
/* (Productions 33 through 38 have been removed.) */
[39] element ::= EmptyElemTag | STag content ETag
/* [WFC: Element Type Match] */
/* [VC: Element Valid] */
/* Replaced by Namespace Definition */
// [40] STag ::= '<' Name (S Attribute)* S? '>' /* [WFC: Unique Att Spec] */
/* Replaced by Namespace Definition */
// [41] Attribute ::= Name Eq AttValue /* [VC: Attribute Value Type] */
/* [WFC: No External Entity References] */
/* [WFC: No < in Attribute Values] */
/* Replaced by Namespace Definition */
// [42] ETag ::= '<\/' Name S? '>'
[43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
/* Replaced by Namespace Definition */
// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '\/>' /* [WFC: Unique Att Spec] */
/* Replaced by Namespace Definition */
// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
/* [VC: Unique Element Type Declaration] */
[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
[47] children ::= (choice | seq) ('?' | '*' | '+')?
/* Replaced by Namespace Definition */
// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
[49] choice ::= '\(' S? cp ( S? '|' S? cp )+ S? ')'
/* [VC: Proper Group/PE Nesting] */
[50] seq ::= '\(' S? cp ( S? ',' S? cp )* S? ')'
/* [VC: Proper Group/PE Nesting] */
/* Replaced by Namespace Definition */
// [51] Mixed ::= '\(' S? '#PCDATA' (S? '|' S? Name)* S? ')\*' | '\(' S? '#PCDATA' S? ')'
/* [VC: Proper Group/PE Nesting] */
/* [VC: No Duplicate Types] */
/* Replaced by Namespace Definition */
// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
/* Replaced by Namespace Definition */
// [53] AttDef ::= S Name S AttType S DefaultDecl
[54] AttType ::= StringType | TokenizedType | EnumeratedType
[55] StringType ::= 'CDATA'
[56] TokenizedType ::= 'ID' /* [VC: ID] */ /* [VC: One ID per Element Type] */ /* [VC: ID Attribute Default] */
| 'IDREF' /* [VC: IDREF] */
| 'IDREFS' /* [VC: IDREF] */
| 'ENTITY' /* [VC: Entity Name] */
| 'ENTITIES' /* [VC: Entity Name] */
| 'NMTOKEN' /* [VC: Name Token] */
| 'NMTOKENS' /* [VC: Name Token] */
[57] EnumeratedType ::= NotationType | Enumeration
[58] NotationType ::= 'NOTATION' S '\(' S? Name (S? '|' S? Name)* S? ')' /* [VC: Notation Attributes] */
/* [VC: One Notation Per Element Type] */
/* [VC: No Notation on Empty Element] */
[59] Enumeration ::= '\(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* [VC: Enumeration] */
[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
| (('#FIXED' S)? AttValue) /* [VC: Required Attribute] */
/* [VC: Attribute Default Legal] */
/* [WFC: No < in Attribute Values] */
/* [VC: Fixed Attribute Default] */
[61] conditionalSect ::= includeSect | ignoreSect
[62] includeSect ::= '<!\[' S? 'INCLUDE' S? '\[' extSubsetDecl ']]>' /* */
/* [VC: Proper Conditional Section/PE Nesting] */
[63] ignoreSect ::= '<!\[' S? 'IGNORE' S? '\[' ignoreSectContents* ']]>' /* */
/* [VC: Proper Conditional Section/PE Nesting] */
[64] ignoreSectContents ::= Ignore ('<!\[' ignoreSectContents ']]>' Ignore)*
//[65] Ignore ::= Char* - (Char* ('<!\[' | ']]>') Char*)
// Re-write to be more efficient:
[65] Ignore ::= ('[^<\]]' | '<' ('[^!]' | '!' '[^\[]') | ']' ('[^\]]' | ']' '[^>]') )*
[66] CharRef ::= '&#' '[0-9]+' ';'
| '&#x' '[0-9a-fA-F]+' ';' /* [WFC: Legal Character] */
[67] Reference ::= EntityRef | CharRef
[68] EntityRef ::= '&' Name ';' /* [WFC: Entity Declared] */
/* [VC: Entity Declared] */
/* [WFC: Parsed Entity] */
/* [WFC: No Recursion] */
[69] PEReference ::= '%' Name ';' /* [VC: Entity Declared] */
/* [WFC: No Recursion] */
/* [WFC: In DTD] */
[70] EntityDecl ::= GEDecl | PEDecl
[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
[74] PEDef ::= EntityValue | ExternalID
[75] ExternalID ::= 'SYSTEM' S SystemLiteral
| 'PUBLIC' S PubidLiteral S SystemLiteral
[76] NDataDecl ::= S 'NDATA' S Name /* [VC: Notation Declared] */
[77] TextDecl ::= '<\?xml' VersionInfo? EncodingDecl S? '?>'
[78] extParsedEnt ::= TextDecl? content
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
{ // switch encoding on the stream here:
$set_encoding(std::max($7,$11));
}
[81] EncName ::= '[A-Za-z]' ('[A-Za-z0-9\._]' | '-')* /* Encoding name contains only Latin characters */
[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' /* [VC: Unique Notation Name] */
[83] PublicID ::= 'PUBLIC' S PubidLiteral
/* Namespace Definitions
*/
[1] NSAttName ::= PrefixedAttName | DefaultAttName
[2] PrefixedAttName ::= 'xmlns:' NCName /* NSC: Reserved Prefixes and Namespace Names */
[3] DefaultAttName ::= 'xmlns'
[4] NCName ::= NCNameStartChar NCNameChar* /* An XML Name, minus the ":" */
//[5] NCNameChar ::= NameChar - ':'
// Re-write to be more efficient:
[5] NCNameChar ::= NCNameStartChar | "-" | "." | '[0-9]' | '\xB7' |
'[\x0300-\x036F]' | '[\x203F-\x2040]'
//[6] NCNameStartChar ::= NameStartChar - ':'
// Re-write to be more efficient:
[6] NCNameStartChar ::= '[A-Z]' | "_" | '[a-z]' |
'[\xC0-\x2FF]' | '[\x370-\x37D]' | '[\x37F-\x1FFF]' |
'[\x200C-\x200D]' | '[\x2070-\x218F]' | '[\x2C00-\x2FEF]' |
'[\x3001-\xD7FF]' | '[\xF900-\xEFFFF]'
[7] QName ::= PrefixedName | UnprefixedName
[8] PrefixedName ::= Prefix ':' LocalPart
[9] UnprefixedName ::= LocalPart
[10] Prefix ::= NCName
[11] LocalPart ::= NCName
[12] STag ::= '<' QName (S Attribute)* S? '>' /*NSC: Prefix Declared*/
[13] ETag ::= '<\/' QName S? '>' /*NSC: Prefix Declared*/
[14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' /*NSC: Prefix Declared*/
[15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue /*NSC: Prefix Declared*/
[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
[18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
[19] Mixed ::= '\(' S? '#PCDATA' (S? '|' S? QName)* S? ')\*' | '\(' S? '#PCDATA' S? ')'
[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
[21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
|