Scarlet Line home page Scarlet Line - SOFTWARE DESIGN & DEVELOPMENT

[Home]->[Documentation]->[Syntac Universal Parser]->[Other Grammars]->[Extensible Markup Language (XML)]->[Grammar]

Expand All
Collapse All
Contents

XML Grammar Locate in Contents

/*
XML Grammar abstracted from:
XML 1.0 (Fourth Edition) http://www.w3.org/TR/xml/ 
XML v1.1(Second Edition) http://www.w3.org/TR/xml11/
Namespaces in XML 1.1 (Second Edition) http://www.w3.org/TR/xml-names11/
*/

%entrypoint document dtd_document

%explicit_whitespace

dtd_document	::=		extSubset <<EOF>>

//[1]   	document	   ::=   	  (  prolog  element  Misc*  )   -  (  Char* RestrictedChar    Char*  )  
// Re-write to be more efficient:
[1]    document    ::=    prolog element Misc* <<EOF>>

[2]   	Char	   ::=   	'[\x1-\xD7FF]' | '[\xE000-\xFFFD]' | '[\x10000-\x10FFFF]'
	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
[2a]   	RestrictedChar	   ::=   	'[\x1-\x8]' | '[\xB-\xC]' | '[\xE-\x1F]' | '[\x7F-\x84]' | '[\x86-\x9F]'

[3]    S    ::=    ('\x20' | '\x9' | '\xD' | '\xA')+ 

[4]     NameStartChar := ":" | '[A-Z]' | "_" | '[a-z]' |
         '[\xC0-\xD6]' | '[\xD8-\xF6]' | '[\xF8-\x2FF]' | '[\x370-\x37D]' | '[\x37F-\x1FFF]' |
         '[\x200C-\x200D]' | '[\x2070-\x218F]' | '[\x2C00-\x2FEF]' |
         '[\x3001-\xD7FF]' | '[\xF900-\xFDCF]' | '[\xFDF0-\xFFFD]' | '[\x10000-\xEFFFF]'

[4a]    NameChar := NameStartChar | "-" | "." | '[0-9]' | '\xB7' |
         '[\x0300-\x036F]' | '[\x203F-\x2040]'

[5]     Name    ::=   NameStartChar NameChar*

[6]    Names    ::=    Name (S Name)* 

[7]     Nmtoken    ::=   NameChar+

[8]    Nmtokens    ::=    Nmtoken (S Nmtoken)* 

[9]    EntityValue    ::=    '"' ('[^%&"]' | PEReference | Reference)* '"'  
   |  "'" ('[^%&\']' | PEReference | Reference)* "'"

    
[10]    AttValue    ::=    '"' ('[^<&"]' | Reference)* '"'  
   |  "'" ('[^<&\']' | Reference)* "'" 
   
[11]    SystemLiteral    ::=    ('"' '[^"]*' '"') | ("'" '[^\']*' "'")  

//[12]    PubidLiteral    ::=    '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 

// Re-write to be more efficient:
[12]    PubidLiteral    ::=    '"' '[^\"]*' '"' | "'" '[^\']*' "'" 

[13]    PubidChar    ::=    '\x20' | '\xD' | '\xA' | '[a-zA-Z0-9]' | '[-\'()+,./:=?;!*#@$_%]'
 
//[14]    CharData    ::=    '[^<&]*' - ('[^<&]*' ']]>' '[^<&]*') 
// Re-write to be more efficient:
[14]    CharData    ::=    ('[^<&\]]' | '][^<&\]]' | ']][^<&>]')+

//[15]    Comment    ::=    '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 
// Re-write to be more efficient:
[15]    Comment    ::=    '<!--([^-]|-[^-])*-->' 

//[16]    PI    ::=    '<\?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 
// Re-write to be more efficient:
[16]    PI    ::=    '<\?' PITarget (S ('[^?]' | '?' '^>]')* )? '?>'
 
//[17]    PITarget    ::=    Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 
// Re-write to be more efficient:
[17]    PITarget    ::=    Name

[18]    CDSect    ::=    CDStart CData CDEnd 

[19]    CDStart    ::=    '<!\[CDATA\[' 

//[20]    CData    ::=    (Char* - (Char* ']]>' Char*))  
// Re-write to be more efficient:
[20]    CData    ::=    ('[^\]]' | ']' '[^\]]' | ']]' '[^>]')*

[21]    CDEnd    ::=    ']]>' 

[22]    prolog    ::=    XMLDecl? Misc* (doctypedecl Misc*)? 

[23]    XMLDecl    ::=    '<\?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 

[24]    VersionInfo    ::=    S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')/* */ 

[25]    Eq    ::=    S? '=' S? 

[26]    VersionNum    ::=    ('[a-zA-Z0-9_\.:]' | '-')+ 

[27]    Misc    ::=    Comment | PI | S 

/* Replaced by Namespace Definition */
// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'	
	/*VC: Root Element Type*/
	/*WFC: External Subset*/
[28a]   	DeclSep	   ::=   	PEReference | S 	/*WFC: PE Between Declarations*/
[28b]   	intSubset	   ::=   	(markupdecl | DeclSep)*

[29]    markupdecl    ::=    elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment  
	/* [VC: Proper Declaration/PE Nesting] */ 
    /* [WFC: PEs in Internal Subset] */ 

[30]    extSubset    ::=    TextDecl? extSubsetDecl
 
[31]    extSubsetDecl    ::=    ( markupdecl | conditionalSect | DeclSep)* 

[32]    SDDecl    ::=    S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))  
	/* [VC: Standalone Document Declaration] */ 

/* (Productions 33 through 38 have been removed.) */

[39]    element    ::=    EmptyElemTag | STag content ETag 
	/* [WFC: Element Type Match] */ 
    /* [VC: Element Valid] */ 

/* Replaced by Namespace Definition */
// [40]    STag    ::=    '<' Name (S Attribute)* S? '>' /* [WFC: Unique Att Spec] */
 
/* Replaced by Namespace Definition */
// [41]    Attribute    ::=    Name Eq AttValue /* [VC: Attribute Value Type] */ 
    /* [WFC: No External Entity References] */ 
    /* [WFC: No < in Attribute Values] */ 

/* Replaced by Namespace Definition */
// [42]    ETag    ::=    '<\/' Name S? '>' 

[43]    content    ::=    CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*  

/* Replaced by Namespace Definition */
// [44]    EmptyElemTag    ::=    '<' Name (S Attribute)* S? '\/>' /* [WFC: Unique Att Spec] */ 

/* Replaced by Namespace Definition */
// [45]    elementdecl    ::=    '<!ELEMENT' S Name S contentspec S? '>' 
	/* [VC: Unique Element Type Declaration] */ 
	
[46]    contentspec    ::=    'EMPTY' | 'ANY' | Mixed | children  

[47]    children    ::=    (choice | seq) ('?' | '*' | '+')? 

/* Replaced by Namespace Definition */
// [48]    cp    ::=    (Name | choice | seq) ('?' | '*' | '+')? 

[49]    choice    ::=    '\(' S? cp ( S? '|' S? cp )+ S? ')' 
    /* [VC: Proper Group/PE Nesting] */ 
	
[50]    seq    ::=    '\(' S? cp ( S? ',' S? cp )* S? ')'  
    /* [VC: Proper Group/PE Nesting] */ 

/* Replaced by Namespace Definition */
// [51]    Mixed    ::=    '\(' S? '#PCDATA' (S? '|' S? Name)* S? ')\*' | '\(' S? '#PCDATA' S? ')'
	/* [VC: Proper Group/PE Nesting] */ 
    /* [VC: No Duplicate Types] */ 

/* Replaced by Namespace Definition */
// [52]    AttlistDecl    ::=    '<!ATTLIST' S Name AttDef* S? '>' 

/* Replaced by Namespace Definition */
// [53]    AttDef    ::=    S Name S AttType S DefaultDecl 

[54]    AttType    ::=    StringType | TokenizedType | EnumeratedType  

[55]    StringType    ::=    'CDATA' 

[56]    TokenizedType    ::=    'ID' /* [VC: ID] */ /* [VC: One ID per Element Type] */ /* [VC: ID Attribute Default] */ 
   | 'IDREF' /* [VC: IDREF] */ 
   | 'IDREFS' /* [VC: IDREF] */ 
   | 'ENTITY' /* [VC: Entity Name] */ 
   | 'ENTITIES' /* [VC: Entity Name] */ 
   | 'NMTOKEN' /* [VC: Name Token] */ 
   | 'NMTOKENS' /* [VC: Name Token] */ 

[57]    EnumeratedType    ::=    NotationType | Enumeration 
 
[58]    NotationType    ::=    'NOTATION' S '\(' S? Name (S? '|' S? Name)* S? ')'  /* [VC: Notation Attributes] */ 
    /* [VC: One Notation Per Element Type] */ 
    /* [VC: No Notation on Empty Element] */ 

[59]    Enumeration    ::=    '\(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* [VC: Enumeration] */ 

[60]    DefaultDecl    ::=    '#REQUIRED' | '#IMPLIED'  
   | (('#FIXED' S)? AttValue) /* [VC: Required Attribute] */ 
    /* [VC: Attribute Default Legal] */ 
    /* [WFC: No < in Attribute Values] */ 
    /* [VC: Fixed Attribute Default] */ 

[61]    conditionalSect    ::=    includeSect | ignoreSect  

[62]    includeSect    ::=    '<!\[' S? 'INCLUDE' S? '\[' extSubsetDecl ']]>'  /* */ 
    /* [VC: Proper Conditional Section/PE Nesting] */ 

[63]    ignoreSect    ::=    '<!\[' S? 'IGNORE' S? '\[' ignoreSectContents* ']]>' /* */ 
    /* [VC: Proper Conditional Section/PE Nesting] */ 

[64]    ignoreSectContents    ::=    Ignore ('<!\[' ignoreSectContents ']]>' Ignore)* 

//[65]    Ignore    ::=    Char* - (Char* ('<!\[' | ']]>') Char*)  
// Re-write to be more efficient:
[65]    Ignore    ::=    ('[^<\]]' | '<' ('[^!]' | '!' '[^\[]') | ']' ('[^\]]' | ']' '[^>]') )*  

[66]    CharRef    ::=    '&#' '[0-9]+' ';'  
   | '&#x' '[0-9a-fA-F]+' ';' /* [WFC: Legal Character] */ 

[67]    Reference    ::=    EntityRef | CharRef 

[68]    EntityRef    ::=    '&' Name ';' /* [WFC: Entity Declared] */ 
    /* [VC: Entity Declared] */ 
    /* [WFC: Parsed Entity] */ 
    /* [WFC: No Recursion] */ 

[69]    PEReference    ::=    '%' Name ';' /* [VC: Entity Declared] */ 
    /* [WFC: No Recursion] */ 
    /* [WFC: In DTD] */ 

[70]    EntityDecl    ::=    GEDecl | PEDecl 

[71]    GEDecl    ::=    '<!ENTITY' S Name S EntityDef S? '>' 

[72]    PEDecl    ::=    '<!ENTITY' S '%' S Name S PEDef S? '>' 

[73]    EntityDef    ::=    EntityValue | (ExternalID NDataDecl?) 

[74]    PEDef    ::=    EntityValue | ExternalID 

[75]    ExternalID    ::=    'SYSTEM' S SystemLiteral 
   | 'PUBLIC' S PubidLiteral S SystemLiteral  

[76]    NDataDecl    ::=    S 'NDATA' S Name /* [VC: Notation Declared] */ 

[77]    TextDecl    ::=    '<\?xml' VersionInfo? EncodingDecl S? '?>' 

[78]    extParsedEnt    ::=    TextDecl? content 

[80]    EncodingDecl    ::=    S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
	{ // switch encoding on the stream here:
	$set_encoding(std::max($7,$11));
	}  

[81]    EncName    ::=    '[A-Za-z]' ('[A-Za-z0-9\._]' | '-')* /* Encoding name contains only Latin characters */ 

[82]    NotationDecl    ::=    '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' /* [VC: Unique Notation Name] */ 

[83]    PublicID    ::=    'PUBLIC' S PubidLiteral  

/* Namespace Definitions
*/

[1]   	NSAttName	   ::=   	PrefixedAttName	| DefaultAttName

[2]   	PrefixedAttName	   ::=   	'xmlns:' NCName	/* NSC: Reserved Prefixes and Namespace Names */

[3]   	DefaultAttName	   ::=   	'xmlns'

[4]   	NCName	   ::=   	NCNameStartChar NCNameChar*	/* An XML Name, minus the ":" */

//[5]   	NCNameChar	   ::=   	NameChar - ':'
// Re-write to be more efficient:
[5]   	NCNameChar	   ::=   	NCNameStartChar | "-" | "." | '[0-9]' | '\xB7' |
         '[\x0300-\x036F]' | '[\x203F-\x2040]'

//[6]   	NCNameStartChar	   ::=   	NameStartChar - ':'
// Re-write to be more efficient:
[6]   	NCNameStartChar	   ::= '[A-Z]' | "_" | '[a-z]' |
         '[\xC0-\x2FF]' | '[\x370-\x37D]' | '[\x37F-\x1FFF]' |
         '[\x200C-\x200D]' | '[\x2070-\x218F]' | '[\x2C00-\x2FEF]' |
         '[\x3001-\xD7FF]' | '[\xF900-\xEFFFF]'

[7]   	QName	   ::=   	PrefixedName | UnprefixedName

[8]   	PrefixedName	   ::=   	Prefix ':' LocalPart

[9]   	UnprefixedName	   ::=   	LocalPart

[10]   	Prefix	   ::=   	NCName

[11]   	LocalPart	   ::=   	NCName

[12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>' 	/*NSC: Prefix Declared*/

[13]   	ETag	   ::=   	'<\/' QName S? '>'	/*NSC: Prefix Declared*/

[14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>'	/*NSC: Prefix Declared*/

[15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue	/*NSC: Prefix Declared*/

[16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' intSubset ']' S?)? '>'

[17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>'

[18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')?

[19]   	Mixed	   ::=   	'\(' S? '#PCDATA' (S? '|' S? QName)* S? ')\*' | '\(' S? '#PCDATA' S? ')'
			
[20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>'

[21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl