Scarlet Line home page Scarlet Line - SOFTWARE DESIGN & DEVELOPMENT

[Home]->[Documentation]->[Syntac Universal Parser]->[Other Grammars]->[Extended Backus-Naur form (EBNF)]->[Grammar]

Expand All
Collapse All
Contents

Extended Backus-Naur form (EBNF) Grammar Locate in Contents

%encoding binary
%explicit_whitespace

/* this rule matches one syntac file: */
syntac_file ::= 
		(lws? code_block)? (lws? class_definition)? lws? (production_rule | '%' body_declaration) (lwsnl (production_rule | '%' body_declaration))* lws? <<EOF>>
	

reference ::= /* e.g. [3] or (6.4.1) */
		"[" '[0-9][0-9\.a-z]*' "]"

	|	"(" '[0-9][0-9\.a-z]*' ")"
	

production_rule ::= 
		(reference lws?)? 
			(
				symbol_declare ( lws? code_block )? symbol_expression
			|	symbol_append_union ( lws? code_block )? append_symexp_union_list
			|	symbol_append_concat ( lws? code_block )? additional_symexp_concat_list
			)
	

symbol_declare ::= 
		label ( "[" (param_list {_param_list.assign($0); } ) "]" )? lws? expands_to

symbol_append_union ::= 
		label ( "[" (param_list {_param_list.assign($0); } ) "]" )? lws? expands_to_union

symbol_append_concat ::= 
		label ( "[" (param_list {_param_list.assign($0); } ) "]" )? lws? expands_to_concat

expands_to ::=
		':+=*'
	|	'=+'
	
expands_to_union ::=
		expands_to lws? '[|/]'

expands_to_concat ::=
		expands_to lws? "+"

	
symbol_expression ::=
		symexp_union_element additional_symexp_union_list?
	

symexp_union_element ::=
		symexp_concat_list ( lws? code_block )?
	

additional_symexp_union_list ::=
		( lws? '[|/]' symexp_union_element )+
	
append_symexp_union_list ::=
		symexp_union_element ( lws? '[|/]' symexp_union_element )*
	

symexp_concat_list ::=
		lws? symexp_compound additional_symexp_concat_list?
	
additional_symexp_concat_list ::=
		( lws? symexp_compound )+
	
symexp_compound ::=
		symexp_element ( lws? expression_modifier )?
	|	symexp_compound_rfc
	

/* required white-space before this: */
symexp_compound_rfc ::=
		digit* ("*" digit*)? "[" symbol_expression lws? "]"

	|	(digit+ ("*" digit*)? | "*" digit*) symexp_element
	|	digit* "#" digit* symexp_element
	

expression_modifier ::=
		"?"	
	|	"+"	
	|	"*"	
	|	"{" ( ( digit+ ("," digit*)? ) | ( "," digit* ) ) "}" 
	


symexp_element ::=
		"(" ( lws? code_block )? symbol_expression lws? ( code_block lws? )? ")"
	|	symbol
	|	terminal
	

symbol ::=
		label ( "[" param_list "]" )?
	

terminal ::=	
		"'" regular_expression regexp_tail_end? "'" "i"?
	|	"'" regexp_tail? "'" "i"?
	|	'\"' quoted_string '\"' "i"?
	|	"@"

	|	"!"
	|	'%' abnf_value
	|	"~"
	

regexp_tail_end ::=
		regexp_tail
	

regexp_tail ::=
		"$" /* [\n\z] */
	|	"/"	regular_expression
	

regular_expression ::=
		regexp_concat_list additional_regexp_union_list?
	

additional_regexp_union_list ::=
		( "|" regexp_concat_list )+
	

regexp_concat_list ::=
		regexp_compound_first additional_regexp_concat_list?
	

additional_regexp_concat_list ::=
		regexp_compound+
	

regexp_compound_first ::=
		regexp_element_first regexp_modifier?
	|	"^"  
	


regexp_compound ::=
		regexp_element regexp_modifier?
	

regexp_modifier ::=
		"?"  
	|	"+"  
	|	"*"  
	|	"{" digit+ ("," digit*)? "}" 
	


regexp_element_first ::= 
		"(" regular_expression ")"
	|	regexp_elem_char_first 
	|	regexp_range

regexp_element ::= 
		"(" regular_expression ")"
	|	regexp_elem_char 
	|	regexp_range
	
regexp_range ::=
	(	"[" regexp_list "]"

	|	regexp_predef_range
	)

regexp_predef_range ::=
	(
		"."		{ _char_class.assign("any"); }
	|	"\\"
		(	"d" 
		|	"D" 
		|	"w" 
		|	"W" 
		|	"s" 
		|	"S" 
		|	"p{" '[A-Za-z_0-9&]+' "}" 
		|	"P{" '[A-Za-z_0-9&]+' "}" 
		)
	|	"[:"

		(	"L" 
		|	"Lu"
		|	"Ll"
		|	"Lt"
		|	"L&"

		|	"Lm"
		|	"Lo"

		|	"M" 
		|	"Mn"
		|	"Me"

		|	"Mc"

		|	"N" 
		|	"Nd"
		|	"Nl"
		|	"No"

		|	"Z" 
		|	"Zs"
		|	"Zl"
		|	"Zp"

		|	"C" 
		|	"Cc"
		|	"Cf"
		|	"Co"
		|	"Cs"

		|	"Cn"

		|	"P" 
		|	"Pd"
		|	"Ps"
		|	"Pe"

		|	"Pc"
		|	"Pi"
		|	"Pf"
		|	"Po"

		|	"S" 
		|	"Sm"
		|	"Sc"
		|	"Sk"
		|	"So"

		|	('[A-Za-z_0-9&]+'
		)
		":]"
	)

regexp_list ::=
		regexp_oneof
	|	"^" regexp_not_oneof
	

regexp_oneof ::=
		character_set_elem_first_oneof character_set_elem*
	

regexp_not_oneof ::=
	(	character_set_elem_first_not_oneof character_set_elem* )

regexp_sub_range ::=
	regexp_sub_intersect additional_regexp_sub_range*

additional_regexp_sub_range ::=
		"-" regexp_sub_intersect

regexp_sub_intersect ::=
	regexp_sub_element additional_regexp_sub_intersect?

additional_regexp_sub_intersect ::=
		"&" regexp_sub_intersect

regexp_sub_element ::=
	(	"[" regexp_list "]"

	|	regexp_predef_range
	)

character_set_elem ::= 
		regexp_range_char ("-" regexp_range_char)?
	|	( 
		regexp_sub_range
		)
	

character_set_elem_first_oneof ::= 
		regexp_range_char_first_oneof ("-" regexp_range_char)?
	|	( 
		regexp_sub_range
		)
	

character_set_elem_first_not_oneof ::= 
		regexp_range_char_first_not_oneof ("-" regexp_range_char)?
	|	( 
		regexp_sub_range
		)
	

/* These characters are allowed as literal in regexps outside [ .. ] in position 1 */
regexp_elem_char_first ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character		
	|	'[!\"#%&\')*+,\-:;<=>?@\]_`{|}~]'	
	
/* These characters are allowed as literal in regexps outside [ .. ] in positions greater than 1 */
regexp_elem_char ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character		
	|	'[!\"#%&,\-:;<=>@\]^_`}~]'	

/* These characters are allowed as literal within '[ ... ]' in position 1 */
regexp_range_char_first_oneof ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character
	|	'[!\"#$%&\'()*+,\-/;<=>?@\]_`{|}~]'	
	
/* These characters are allowed as literal within '[^ ... ]' in position 1 */
regexp_range_char_first_not_oneof ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character
	|	'[!\"#$%&\'()*+,\-/:;<=>?@\]^_`{|}~]'	
	
/* These characters are allowed as literal within '[ ... ]' in positions greater than 1 */
regexp_range_char ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character
	|	'[!\"#$%&\'()*+,/:;<=>?@^_`{|}~]'	


abnf_value ::=
		'b' bin_value ( "." bin_value )*
	|	'd' dec_value ( "." dec_value )*
	|	'x' hex_value ( "." hex_value )*
	

bin_value ::=
		bin_char ('-' bin_char)?
	

dec_value ::=
		dec_char ("-" dec_char)?
	

hex_value ::=
		hex_char ("-" hex_char)?
	

quoted_string ::=
		quoted_string_elem quoted_string_rest?
	

quoted_string_elem ::= 
		quoted_char 
	

quoted_string_rest ::=
		quoted_string_elem+
	

quoted_char ::=
		alpha_digit_spacebar	
	|	"\\" escaped_character		
	|	quoted_punct		
	

quoted_punct ::=
		'[!#$%&\'()*+,\-\./:;<=>?@\[\]^_`{|}~]'
	

param_list ::=
		param_list_in ( param_list_sub param_list_in )*
	
param_list_sub ::=
		"[" param_list_in "]"

	
param_list_in ::=
		'[^\[\]]*'

label ::=
		'[a-zA-Z_]' '([\:\-][a-zA-Z_0-9]|[a-zA-Z_0-9])*'	'/[^a-zA-Z_0-9]'
	|	"<" '[ \t]*' label_name_first label_name_rest* '[ \t]*' ">"
	|	"<<" '[ \t]*' label_name_first label_name_rest* '[ \t]*' ">>"
	

label_name_first ::= 
	( '[A-Z_a-z]' | '[0-9]' | '[!\"#$%&\'()*+,\-\./:;=?@\[\\\]^`{|}~]' )
	

label_name_rest ::=
	'[ \t]*' ( '[A-Z_a-z0-9]' | '[!\"#$%&\'()*+,\-\./:;<=?@\[\\\]^`{|}~]' )


lwsnl ::= /* linear white space ( must go past new line ) */
		ws? ( '\n' ws? )+
	
escaped_character ::= { _value = 0; }
		'a'	 /* BEL alert */
	|	'b'	 /* BS  backspace */
	|	't'	 /* HT  horizontal tab */
	|	'n'	 /* NL, LF  newline */
	|	'v'	 /* VT  vertical tab */
	|	'f'	 /* FF  form feed */
	|	'r'	 /* CR  carriage return */
	|	'z'	 /* EOF  end of file */
	|	hex_character
	|	oct_character
	|	dec_character
	|	unicode_character
	|	punct 
	|	' '	
	

hex_character ::=
		'x' xdigit_roll{1,8} '/[^0-9A-Fa-f]'
	 
unicode_character ::=
		'u' xdigit_roll{1,8} '/[^0-9A-Fa-f]'
	 
xdigit_roll ::=
		xdigit
	 

hex_char ::=
		xdigit xdigit?
	 

oct_character ::=
		odigit odigit odigit
	 

dec_character ::=
		'd' digit digit
	 

dec_char ::=
		digit digit?
	 

bin_character ::=
		'b' bdigit bdigit bdigit bdigit bdigit bdigit bdigit bdigit
	 

bin_char ::=
		bdigit_roll{1,8}
	 

bdigit_roll ::=
		bdigit
	 
alpha_digit_spacebar ::= '[A-Za-z0-9 ]'

/* C++ stuff follows to enable necessary code recognition:
   much of this is truncated to be syntactically correct
   without preserving semantics ( let the C++ compiler deal with it )
*/
code_block ::=	{ std::streamoff _start_line = $line; }
		'{' code_block_in ( code_block_sub code_block_in )* '}'
	

code_block_sub ::=
		'{' code_block_in ( code_block_sub code_block_in )* '}'
	

code_block_in ::=
	(		'[^{}"\'\/]'
		|	('"' ('[^"\\]'|'\\[.\n]')* '"')
		|	('\'' ('[^\'\\]'|'\\[.\n]') '\'') 
		|	'\/\*([^*]|\*[^/])*\*\/'
		|	'\/\/[^\n]*$'
	)*

class_definition ::= 
		class-head (lws? code_block)?
	

class-head ::=
		( 'template' lws? template_block lws? )? 'class' (lws identifier)? (lws? base-clause)?
	|	( 'template' lws? template_block lws? )? 'class' lws ( nested-name-specifier identifier ) (lws? base-clause)?
	

identifier ::=
		csymf csym*
	

base-clause ::=
		':' lws? ( base-specifier ( lws? ',' lws? base-specifier )* )
	

base-specifier ::=
		'::'? lws? (nested-name-specifier lws)? class-name
	|	'virtual' (lws access-specifier)? (lws? '::')? lws? nested-name-specifier? class-name
	|	access-specifier (lws 'virtual')? (lws? '::')? lws? nested-name-specifier? class-name
	

access-specifier ::=
		'private'
	|	'protected'
	|	'public'
	

class-name ::=
		identifier lws? template_block?
	

template_block ::=
		'<' template_in_block ( sub_template_block template_in_block )* '>'
	

sub_template_block ::=
		'<' template_in_block ( sub_template_block template_in_block )* '>'
	

template_in_block ::=
		'[^<>]*'
	

nested-name-specifier ::=
		(class-name lws? '::' lws?)+
	

body_declaration ::= 
	'include' lws filepath
	|	'entry[-_]?point' (lws entrypoint)+
	|	'implicit[-_]?whitespace' (lws label)?
	|	'explicit[-_]?whitespace'	
	|	'list[-_]?seperator' (lws label)?
	|	'case[-_]?sensitive'	
	|	'case[-_]?insensitive'	
	|	'encoding' lws
		( 
			'(binary|8-?bit|latin-?1|iso[ -_]?8859-?1)'i	 
		|	'(ascii|text|7-?bit)'i							
		|	('(utf-?8|unicode|31-?bit)'i					)
		)
	|	('type' lws typedefinition (lws labeldef)*
		)
	
	
filepath ::=
		'\'' '[^\']'+ '\''	
	|	'\"' '[^\"]'+ '\"'	
	|	'[^\'\"\x09-\x0D ]'+
	

entrypoint ::=	
		label
	

head_declaration ::=
		('%' identifier) lws?
	

typedefinition ::=
		'<' '[^>]'+ '>'	
	|	'[^ \t<>]'+		
	

labeldef ::= 
		label