1 #!/usr/bin/ruby 2 # encoding: utf-8 3 4 require 'antlr3/test/functional' 5 6 class XMLLexerTest < ANTLR3::Test::Functional 7 inline_grammar( <<-'END' ) 8 lexer grammar XML; 9 options { language = Ruby; } 10 11 @members { 12 include ANTLR3::Test::CaptureOutput 13 include ANTLR3::Test::RaiseErrors 14 15 def quote(text) 16 text = text.gsub(/\"/, '\\"') 17 \%("#{ text }") 18 end 19 } 20 21 DOCUMENT 22 : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 23 ; 24 25 fragment DOCTYPE 26 : 27 '<!DOCTYPE' WS rootElementName=GENERIC_ID 28 {say("ROOTELEMENT: " + $rootElementName.text)} 29 WS 30 ( 31 ( 'SYSTEM' WS sys1=VALUE 32 {say("SYSTEM: " + $sys1.text)} 33 34 | 'PUBLIC' WS pub=VALUE WS sys2=VALUE 35 {say("PUBLIC: " + $pub.text)} 36 {say("SYSTEM: " + $sys2.text)} 37 ) 38 ( WS )? 39 )? 40 ( dtd=INTERNAL_DTD 41 {say("INTERNAL DTD: " + $dtd.text)} 42 )? 43 '>' 44 ; 45 46 fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; 47 48 fragment PI : 49 '<?' target=GENERIC_ID WS? 50 {say("PI: " + $target.text)} 51 ( ATTRIBUTE WS? )* '?>' 52 ; 53 54 fragment XMLDECL : 55 '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 56 {say("XML declaration")} 57 ( ATTRIBUTE WS? )* '?>' 58 ; 59 60 61 fragment ELEMENT 62 : ( START_TAG 63 (ELEMENT 64 | t=PCDATA 65 {say("PCDATA: " << quote($t.text))} 66 | t=CDATA 67 {say("CDATA: " << quote($t.text))} 68 | t=COMMENT 69 {say("Comment: " << quote($t.text))} 70 | pi=PI 71 )* 72 END_TAG 73 | EMPTY_ELEMENT 74 ) 75 ; 76 77 fragment START_TAG 78 : '<' WS? name=GENERIC_ID WS? 79 {say("Start Tag: " + $name.text)} 80 ( ATTRIBUTE WS? )* '>' 81 ; 82 83 fragment EMPTY_ELEMENT 84 : '<' WS? name=GENERIC_ID WS? 85 {say("Empty Element: " + $name.text)} 86 ( ATTRIBUTE WS? )* '/>' 87 ; 88 89 fragment ATTRIBUTE 90 : name=GENERIC_ID WS? '=' WS? value=VALUE 91 {say("Attr: " + $name.text + " = "+ $value.text)} 92 ; 93 94 fragment END_TAG 95 : '</' WS? name=GENERIC_ID WS? '>' 96 {say("End Tag: " + $name.text)} 97 ; 98 99 fragment COMMENT 100 : '<!--' (options {greedy=false;} : .)* '-->' 101 ; 102 103 fragment CDATA 104 : '<![CDATA[' (options {greedy=false;} : .)* ']]>' 105 ; 106 107 fragment PCDATA : (~'<')+ ; 108 109 fragment VALUE : 110 ( '\"' (~'\"')* '\"' 111 | '\'' (~'\'')* '\'' 112 ) 113 ; 114 115 fragment GENERIC_ID 116 : ( LETTER | '_' | ':') 117 ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* 118 ; 119 120 fragment LETTER 121 : 'a'..'z' 122 | 'A'..'Z' 123 ; 124 125 fragment WS : 126 ( ' ' 127 | '\t' 128 | ( '\n' 129 | '\r\n' 130 | '\r' 131 ) 132 )+ 133 ; 134 END 135 136 it "should be valid" do 137 lexer = XML::Lexer.new( <<-'END'.fixed_indent( 0 ) ) 138 <?xml version='1.0'?> 139 <!DOCTYPE component [ 140 <!ELEMENT component (PCDATA|sub)*> 141 <!ATTLIST component 142 attr CDATA #IMPLIED 143 attr2 CDATA #IMPLIED 144 > 145 <!ELMENT sub EMPTY> 146 147 ]> 148 <component attr="val'ue" attr2='val"ue'> 149 <!-- This is a comment --> 150 Text 151 <![CDATA[huhu]]> 152 153 & 154 < 155 <?xtal cursor='11'?> 156 <sub/> 157 <sub></sub> 158 </component> 159 END 160 161 lexer.map { |tk| tk } 162 163 lexer.output.should == <<-'END'.fixed_indent( 0 ) 164 XML declaration 165 Attr: version = '1.0' 166 ROOTELEMENT: component 167 INTERNAL DTD: [ 168 <!ELEMENT component (PCDATA|sub)*> 169 <!ATTLIST component 170 attr CDATA #IMPLIED 171 attr2 CDATA #IMPLIED 172 > 173 <!ELMENT sub EMPTY> 174 175 ] 176 Start Tag: component 177 Attr: attr = "val'ue" 178 Attr: attr2 = 'val"ue' 179 PCDATA: " 180 " 181 Comment: "<!-- This is a comment -->" 182 PCDATA: " 183 Text 184 " 185 CDATA: "<![CDATA[huhu]]>" 186 PCDATA: " 187 188 & 189 < 190 " 191 PI: xtal 192 Attr: cursor = '11' 193 PCDATA: " 194 " 195 Empty Element: sub 196 PCDATA: " 197 " 198 Start Tag: sub 199 End Tag: sub 200 PCDATA: " 201 " 202 End Tag: component 203 END 204 end 205 206 end 207