Home | History | Annotate | Download | only in tests
      1 lexer grammar t012lexerXMLLexer;
      2 options {
      3   language = Python;
      4 }
      5 
      6 @header {
      7 from cStringIO import StringIO
      8 }
      9 
     10 @lexer::init {
     11 self.outbuf = StringIO()
     12 }
     13 
     14 @lexer::members {
     15 def output(self, line):
     16     self.outbuf.write(line.encode('utf-8') + "\n")
     17 }
     18 
     19 DOCUMENT
     20     :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 
     21     ;
     22 
     23 fragment DOCTYPE
     24     :
     25         '<!DOCTYPE' WS rootElementName=GENERIC_ID 
     26         {self.output("ROOTELEMENT: "+rootElementName.text)}
     27         WS
     28         ( 
     29             ( 'SYSTEM' WS sys1=VALUE
     30                 {self.output("SYSTEM: "+sys1.text)}
     31                 
     32             | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
     33                 {self.output("PUBLIC: "+pub.text)}
     34                 {self.output("SYSTEM: "+sys2.text)}   
     35             )
     36             ( WS )?
     37         )?
     38         ( dtd=INTERNAL_DTD
     39             {self.output("INTERNAL DTD: "+dtd.text)}
     40         )?
     41 		'>'
     42 	;
     43 
     44 fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
     45 
     46 fragment PI :
     47         '<?' target=GENERIC_ID WS? 
     48           {self.output("PI: "+target.text)}
     49         ( ATTRIBUTE WS? )*  '?>'
     50 	;
     51 
     52 fragment XMLDECL :
     53         '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 
     54           {self.output("XML declaration")}
     55         ( ATTRIBUTE WS? )*  '?>'
     56 	;
     57 
     58 
     59 fragment ELEMENT
     60     : ( START_TAG
     61             (ELEMENT
     62             | t=PCDATA
     63                 {self.output("PCDATA: \""+$t.text+"\"")}
     64             | t=CDATA
     65                 {self.output("CDATA: \""+$t.text+"\"")}
     66             | t=COMMENT
     67                 {self.output("Comment: \""+$t.text+"\"")}
     68             | pi=PI
     69             )*
     70             END_TAG
     71         | EMPTY_ELEMENT
     72         )
     73     ;
     74 
     75 fragment START_TAG 
     76     : '<' WS? name=GENERIC_ID WS?
     77           {self.output("Start Tag: "+name.text)}
     78         ( ATTRIBUTE WS? )* '>'
     79     ;
     80 
     81 fragment EMPTY_ELEMENT 
     82     : '<' WS? name=GENERIC_ID WS?
     83           {self.output("Empty Element: "+name.text)}
     84         ( ATTRIBUTE WS? )* '/>'
     85     ;
     86 
     87 fragment ATTRIBUTE 
     88     : name=GENERIC_ID WS? '=' WS? value=VALUE
     89         {self.output("Attr: "+name.text+"="+value.text)}
     90     ;
     91 
     92 fragment END_TAG 
     93     : '</' WS? name=GENERIC_ID WS? '>'
     94         {self.output("End Tag: "+name.text)}
     95     ;
     96 
     97 fragment COMMENT
     98 	:	'<!--' (options {greedy=false;} : .)* '-->'
     99 	;
    100 
    101 fragment CDATA
    102 	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
    103 	;
    104 
    105 fragment PCDATA : (~'<')+ ; 
    106 
    107 fragment VALUE : 
    108         ( '\"' (~'\"')* '\"'
    109         | '\'' (~'\'')* '\''
    110         )
    111 	;
    112 
    113 fragment GENERIC_ID 
    114     : ( LETTER | '_' | ':') 
    115         ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
    116 	;
    117 
    118 fragment LETTER
    119 	: 'a'..'z' 
    120 	| 'A'..'Z'
    121 	;
    122 
    123 fragment WS  :
    124         (   ' '
    125         |   '\t'
    126         |  ( '\n'
    127             |	'\r\n'
    128             |	'\r'
    129             )
    130         )+
    131     ;    
    132 
    133