Home | History | Annotate | Download | only in functional
      1 /** XML parser by Oliver Zeigermann October 10, 2005 */
      2 lexer grammar t012lexerXML;
      3 options {
      4   language = JavaScript;
      5 }
      6 
      7 @lexer::members {
      8 this.lout = [];
      9 this.output = function(line) {
     10     this.lout.push(line);
     11 };
     12 }
     13 
     14 DOCUMENT
     15     :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
     16     ;
     17 
     18 fragment DOCTYPE
     19     :
     20         '<!DOCTYPE' WS rootElementName=GENERIC_ID
     21         {this.output("ROOTELEMENT: "+$rootElementName.text)}
     22         WS
     23         (
     24             ( 'SYSTEM' WS sys1=VALUE
     25                 {this.output("SYSTEM: "+$sys1.text)}
     26 
     27             | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
     28                 {this.output("PUBLIC: "+$pub.text)}
     29                 {this.output("SYSTEM: "+$sys2.text)}
     30             )
     31             ( WS )?
     32         )?
     33         ( dtd=INTERNAL_DTD
     34             {this.output("INTERNAL DTD: "+$dtd.text)}
     35         )?
     36 		'>'
     37 	;
     38 
     39 fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
     40 
     41 fragment PI :
     42         '<?' target=GENERIC_ID WS?
     43           {this.output("PI: "+$target.text)}
     44         ( ATTRIBUTE WS? )*  '?>'
     45 	;
     46 
     47 fragment XMLDECL :
     48         '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
     49           {this.output("XML declaration")}
     50         ( ATTRIBUTE WS? )*  '?>'
     51 	;
     52 
     53 
     54 fragment ELEMENT
     55     : ( START_TAG
     56             (ELEMENT
     57             | t=PCDATA
     58                 {this.output("PCDATA: \""+$t.text+"\"")}
     59             | t=CDATA
     60                 {this.output("CDATA: \""+$t.text+"\"")}
     61             | t=COMMENT
     62                 {this.output("Comment: \""+$t.text+"\"")}
     63             | pi=PI
     64             )*
     65             END_TAG
     66         | EMPTY_ELEMENT
     67         )
     68     ;
     69 
     70 fragment START_TAG
     71     : '<' WS? name=GENERIC_ID WS?
     72           {this.output("Start Tag: "+$name.text)}
     73         ( ATTRIBUTE WS? )* '>'
     74     ;
     75 
     76 fragment EMPTY_ELEMENT
     77     : '<' WS? name=GENERIC_ID WS?
     78           {this.output("Empty Element: "+$name.text)}
     79         ( ATTRIBUTE WS? )* '/>'
     80     ;
     81 
     82 fragment ATTRIBUTE
     83     : name=GENERIC_ID WS? '=' WS? value=VALUE
     84         {this.output("Attr: "+$name.text+"="+$value.text)}
     85     ;
     86 
     87 fragment END_TAG
     88     : '</' WS? name=GENERIC_ID WS? '>'
     89         {this.output("End Tag: "+$name.text)}
     90     ;
     91 
     92 fragment COMMENT
     93 	:	'<!--' (options {greedy=false;} : .)* '-->'
     94 	;
     95 
     96 fragment CDATA
     97 	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
     98 	;
     99 
    100 fragment PCDATA : (~'<')+ ;
    101 
    102 fragment VALUE :
    103         ( '\"' (~'\"')* '\"'
    104         | '\'' (~'\'')* '\''
    105         )
    106 	;
    107 
    108 fragment GENERIC_ID
    109     : ( LETTER | '_' | ':')
    110         ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
    111 	;
    112 
    113 fragment LETTER
    114 	: 'a'..'z'
    115 	| 'A'..'Z'
    116 	;
    117 
    118 fragment WS  :
    119         (   ' '
    120         |   '\t'
    121         |  ( '\n'
    122             |	'\r\n'
    123             |	'\r'
    124             )
    125         )+
    126     ;
    127 
    128