1 import antlr3 2 import testbase 3 import unittest 4 import os 5 import sys 6 from cStringIO import StringIO 7 import difflib 8 import textwrap 9 10 class t012lexerXML(testbase.ANTLRTest): 11 def setUp(self): 12 self.compileGrammar('t012lexerXMLLexer.g') 13 14 15 def lexerClass(self, base): 16 class TLexer(base): 17 def emitErrorMessage(self, msg): 18 # report errors to /dev/null 19 pass 20 21 def reportError(self, re): 22 # no error recovery yet, just crash! 23 raise re 24 25 return TLexer 26 27 28 def testValid(self): 29 inputPath = os.path.splitext(__file__)[0] + '.input' 30 stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) 31 lexer = self.getLexer(stream) 32 33 while True: 34 token = lexer.nextToken() 35 if token.type == self.lexerModule.EOF: 36 break 37 38 39 output = unicode(lexer.outbuf.getvalue(), 'utf-8') 40 41 outputPath = os.path.splitext(__file__)[0] + '.output' 42 testOutput = unicode(open(outputPath).read(), 'utf-8') 43 44 success = (output == testOutput) 45 if not success: 46 d = difflib.Differ() 47 r = d.compare(output.splitlines(1), testOutput.splitlines(1)) 48 self.fail( 49 ''.join([l.encode('ascii', 'backslashreplace') for l in r]) 50 ) 51 52 53 def testMalformedInput1(self): 54 input = textwrap.dedent("""\ 55 <?xml version='1.0'?> 56 <document d> 57 </document> 58 """) 59 60 stream = antlr3.StringStream(input) 61 lexer = self.getLexer(stream) 62 63 try: 64 while True: 65 token = lexer.nextToken() 66 if token.type == antlr3.EOF: 67 break 68 69 raise AssertionError 70 71 except antlr3.NoViableAltException, exc: 72 assert exc.unexpectedType == '>', repr(exc.unexpectedType) 73 assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 74 assert exc.line == 2, repr(exc.line) 75 76 77 def testMalformedInput2(self): 78 input = textwrap.dedent("""\ 79 <?tml version='1.0'?> 80 <document> 81 </document> 82 """) 83 84 stream = antlr3.StringStream(input) 85 lexer = self.getLexer(stream) 86 87 try: 88 while True: 89 token = lexer.nextToken() 90 if token.type == antlr3.EOF: 91 break 92 93 raise AssertionError 94 95 except antlr3.MismatchedSetException, exc: 96 assert exc.unexpectedType == 't', repr(exc.unexpectedType) 97 assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) 98 assert exc.line == 1, repr(exc.line) 99 100 101 def testMalformedInput3(self): 102 input = textwrap.dedent("""\ 103 <?xml version='1.0'?> 104 <docu ment attr="foo"> 105 </document> 106 """) 107 108 stream = antlr3.StringStream(input) 109 lexer = self.getLexer(stream) 110 111 try: 112 while True: 113 token = lexer.nextToken() 114 if token.type == antlr3.EOF: 115 break 116 117 raise AssertionError 118 119 except antlr3.NoViableAltException, exc: 120 assert exc.unexpectedType == 'a', repr(exc.unexpectedType) 121 assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 122 assert exc.line == 2, repr(exc.line) 123 124 125 126 if __name__ == '__main__': 127 unittest.main() 128 129 130 ## # run an infinite loop with randomly mangled input 131 ## while True: 132 ## print "ping" 133 134 ## input = """\ 135 ## <?xml version='1.0'?> 136 ## <!DOCTYPE component [ 137 ## <!ELEMENT component (PCDATA|sub)*> 138 ## <!ATTLIST component 139 ## attr CDATA #IMPLIED 140 ## attr2 CDATA #IMPLIED 141 ## > 142 ## <!ELMENT sub EMPTY> 143 144 ## ]> 145 ## <component attr="val'ue" attr2='val"ue'> 146 ## <!-- This is a comment --> 147 ## Text 148 ## <![CDATA[huhu]]> 149 ## & 150 ## < 151 ## <?xtal cursor='11'?> 152 ## <sub/> 153 ## <sub></sub> 154 ## </component> 155 ## """ 156 157 ## import random 158 ## input = list(input) # make it mutable 159 ## for _ in range(3): 160 ## p1 = random.randrange(len(input)) 161 ## p2 = random.randrange(len(input)) 162 163 ## c1 = input[p1] 164 ## input[p1] = input[p2] 165 ## input[p2] = c1 166 ## input = ''.join(input) # back to string 167 168 ## stream = antlr3.StringStream(input) 169 ## lexer = Lexer(stream) 170 171 ## try: 172 ## while True: 173 ## token = lexer.nextToken() 174 ## if token.type == EOF: 175 ## break 176 177 ## except antlr3.RecognitionException, exc: 178 ## print exc 179 ## for l in input.splitlines()[0:exc.line]: 180 ## print l 181 ## print ' '*exc.charPositionInLine + '^' 182 183 ## except BaseException, exc: 184 ## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())]) 185 ## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine) 186 ## print 187 188 ## raise 189 190