Home | History | Annotate | Download | only in tests
      1 import antlr3
      2 import testbase
      3 import unittest
      4 import os
      5 import sys
      6 from cStringIO import StringIO
      7 import difflib
      8 import textwrap
      9 
     10 class t012lexerXML(testbase.ANTLRTest):
     11     def setUp(self):
     12         self.compileGrammar('t012lexerXMLLexer.g')
     13         
     14         
     15     def lexerClass(self, base):
     16         class TLexer(base):
     17             def emitErrorMessage(self, msg):
     18                 # report errors to /dev/null
     19                 pass
     20 
     21             def reportError(self, re):
     22                 # no error recovery yet, just crash!
     23                 raise re
     24 
     25         return TLexer
     26     
     27         
     28     def testValid(self):
     29         inputPath = os.path.splitext(__file__)[0] + '.input'
     30         stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
     31         lexer = self.getLexer(stream)
     32 
     33         while True:
     34             token = lexer.nextToken()
     35             if token.type == self.lexerModule.EOF:
     36                 break
     37 
     38 
     39         output = unicode(lexer.outbuf.getvalue(), 'utf-8')
     40 
     41         outputPath = os.path.splitext(__file__)[0] + '.output'
     42         testOutput = unicode(open(outputPath).read(), 'utf-8')
     43 
     44         success = (output == testOutput)
     45         if not success:
     46             d = difflib.Differ()
     47             r = d.compare(output.splitlines(1), testOutput.splitlines(1))
     48             self.fail(
     49                 ''.join([l.encode('ascii', 'backslashreplace') for l in r])
     50                 )
     51 
     52 
     53     def testMalformedInput1(self):
     54         input = textwrap.dedent("""\
     55         <?xml version='1.0'?>
     56         <document d>
     57         </document>
     58         """)
     59 
     60         stream = antlr3.StringStream(input)
     61         lexer = self.getLexer(stream)
     62 
     63         try:
     64             while True:
     65                 token = lexer.nextToken()
     66                 if token.type == antlr3.EOF:
     67                     break
     68 
     69             raise AssertionError
     70 
     71         except antlr3.NoViableAltException, exc:
     72             assert exc.unexpectedType == '>', repr(exc.unexpectedType)
     73             assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
     74             assert exc.line == 2, repr(exc.line)
     75 
     76 
     77     def testMalformedInput2(self):
     78         input = textwrap.dedent("""\
     79         <?tml version='1.0'?>
     80         <document>
     81         </document>
     82         """)
     83 
     84         stream = antlr3.StringStream(input)
     85         lexer = self.getLexer(stream)
     86 
     87         try:
     88             while True:
     89                 token = lexer.nextToken()
     90                 if token.type == antlr3.EOF:
     91                     break
     92 
     93             raise AssertionError
     94 
     95         except antlr3.MismatchedSetException, exc:
     96             assert exc.unexpectedType == 't', repr(exc.unexpectedType)
     97             assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
     98             assert exc.line == 1, repr(exc.line)
     99 
    100 
    101     def testMalformedInput3(self):
    102         input = textwrap.dedent("""\
    103         <?xml version='1.0'?>
    104         <docu ment attr="foo">
    105         </document>
    106         """)
    107 
    108         stream = antlr3.StringStream(input)
    109         lexer = self.getLexer(stream)
    110 
    111         try:
    112             while True:
    113                 token = lexer.nextToken()
    114                 if token.type == antlr3.EOF:
    115                     break
    116 
    117             raise AssertionError
    118 
    119         except antlr3.NoViableAltException, exc:
    120             assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
    121             assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
    122             assert exc.line == 2, repr(exc.line)
    123 
    124             
    125 
    126 if __name__ == '__main__':
    127     unittest.main()
    128 
    129 
    130 ## # run an infinite loop with randomly mangled input
    131 ## while True:
    132 ##     print "ping"
    133 
    134 ##     input = """\
    135 ## <?xml version='1.0'?>
    136 ## <!DOCTYPE component [
    137 ## <!ELEMENT component (PCDATA|sub)*>
    138 ## <!ATTLIST component
    139 ##           attr CDATA #IMPLIED
    140 ##           attr2 CDATA #IMPLIED
    141 ## >
    142 ## <!ELMENT sub EMPTY>
    143 
    144 ## ]>
    145 ## <component attr="val'ue" attr2='val"ue'>
    146 ## <!-- This is a comment -->
    147 ## Text
    148 ## <![CDATA[huhu]]>
    149 ## &amp;
    150 ## &lt;
    151 ## <?xtal cursor='11'?>
    152 ## <sub/>
    153 ## <sub></sub>
    154 ## </component>
    155 ## """
    156 
    157 ##     import random
    158 ##     input = list(input) # make it mutable
    159 ##     for _ in range(3):
    160 ##         p1 = random.randrange(len(input))
    161 ##         p2 = random.randrange(len(input))
    162 
    163 ##         c1 = input[p1]
    164 ##         input[p1] = input[p2]
    165 ##         input[p2] = c1
    166 ##     input = ''.join(input) # back to string
    167         
    168 ##     stream = antlr3.StringStream(input)
    169 ##     lexer = Lexer(stream)
    170 
    171 ##     try:
    172 ##         while True:
    173 ##             token = lexer.nextToken()
    174 ##             if token.type == EOF:
    175 ##                 break
    176 
    177 ##     except antlr3.RecognitionException, exc:
    178 ##         print exc
    179 ##         for l in input.splitlines()[0:exc.line]:
    180 ##             print l
    181 ##         print ' '*exc.charPositionInLine + '^'
    182 
    183 ##     except BaseException, exc:
    184 ##         print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
    185 ##         print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
    186 ##         print
    187         
    188 ##         raise
    189     
    190