Home | History | Annotate | Download | only in tests
      1 #!/usr/bin/env python3
      2 
      3 #
      4 # Copyright (C) 2018 The Android Open Source Project
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #      http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 #
     18 
     19 """This module contains the unit tests to check the Lexer class."""
     20 
     21 import sys
     22 import unittest
     23 
     24 from blueprint import Lexer, LexerError, Token
     25 
     26 
     27 #------------------------------------------------------------------------------
     28 # Python 2 compatibility
     29 #------------------------------------------------------------------------------
     30 
     31 if sys.version_info >= (3,):
     32     py3_str = str  # pylint: disable=invalid-name
     33 else:
     34     def py3_str(string):
     35         """Convert a string into a utf-8 encoded string."""
     36         return unicode(string).encode('utf-8')
     37 
     38 
     39 #------------------------------------------------------------------------------
     40 # LexerError
     41 #------------------------------------------------------------------------------
     42 
     43 class LexerErrorTest(unittest.TestCase):
     44     """Unit tests for LexerError class."""
     45 
     46     def test_lexer_error(self):
     47         """Test LexerError __init__(), __str__(), line, column, and message."""
     48 
     49         exc = LexerError('a %', 2, 'unexpected character')
     50         self.assertEqual(exc.line, 1)
     51         self.assertEqual(exc.column, 3)
     52         self.assertEqual(exc.message, 'unexpected character')
     53         self.assertEqual(str(exc), 'LexerError: 1:3: unexpected character')
     54 
     55         exc = LexerError('a\nb\ncde %', 8, 'unexpected character')
     56         self.assertEqual(exc.line, 3)
     57         self.assertEqual(exc.column, 5)
     58         self.assertEqual(exc.message, 'unexpected character')
     59         self.assertEqual(str(exc), 'LexerError: 3:5: unexpected character')
     60 
     61 
     62     def test_hierarchy(self):
     63         """Test the hierarchy of LexerError."""
     64         with self.assertRaises(ValueError):
     65             raise LexerError('a', 0, 'error')
     66 
     67 
     68 class LexComputeLineColumn(unittest.TestCase):
     69     """Unit tests for Lexer.compute_line_column() method."""
     70 
     71     def test_compute_line_column(self):
     72         """Test the line and column computation."""
     73 
     74         # Line 1
     75         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 0)
     76         self.assertEqual(line, 1)
     77         self.assertEqual(column, 1)
     78 
     79         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 1)
     80         self.assertEqual(line, 1)
     81         self.assertEqual(column, 2)
     82 
     83         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 2)
     84         self.assertEqual(line, 1)
     85         self.assertEqual(column, 3)
     86 
     87         # Line 2
     88         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 3)
     89         self.assertEqual(line, 2)
     90         self.assertEqual(column, 1)
     91 
     92         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 4)
     93         self.assertEqual(line, 2)
     94         self.assertEqual(column, 2)
     95 
     96         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 5)
     97         self.assertEqual(line, 2)
     98         self.assertEqual(column, 3)
     99 
    100         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 6)
    101         self.assertEqual(line, 2)
    102         self.assertEqual(column, 4)
    103 
    104         # Line 3
    105         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 7)
    106         self.assertEqual(line, 3)
    107         self.assertEqual(column, 1)
    108 
    109         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 8)
    110         self.assertEqual(line, 3)
    111         self.assertEqual(column, 2)
    112 
    113         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 9)
    114         self.assertEqual(line, 3)
    115         self.assertEqual(column, 3)
    116 
    117         # Line 4 (empty line)
    118         line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 10)
    119         self.assertEqual(line, 4)
    120         self.assertEqual(column, 1)
    121 
    122 
    123 #------------------------------------------------------------------------------
    124 # Lex.lex_string()
    125 #------------------------------------------------------------------------------
    126 
    127 class LexStringTest(unittest.TestCase):
    128     """Unit tests for the Lexer.lex_string() method."""
    129 
    130     def test_raw_string_lit(self):
    131         """Test whether Lexer.lex_string() can tokenize raw string literal."""
    132 
    133         end, lit = Lexer.lex_string('`a`', 0)
    134         self.assertEqual(end, 3)
    135         self.assertEqual(lit, 'a')
    136 
    137         end, lit = Lexer.lex_string('`a\nb`', 0)
    138         self.assertEqual(end, 5)
    139         self.assertEqual(lit, 'a\nb')
    140 
    141         end, lit = Lexer.lex_string('"a""b"', 3)
    142         self.assertEqual(end, 6)
    143         self.assertEqual(lit, 'b')
    144 
    145         with self.assertRaises(LexerError) as ctx:
    146             Lexer.lex_string('`a', 0)
    147         self.assertEqual(ctx.exception.line, 1)
    148         self.assertEqual(ctx.exception.column, 3)
    149 
    150         with self.assertRaises(LexerError) as ctx:
    151             Lexer.lex_string('"a\nb"', 0)
    152         self.assertEqual(ctx.exception.line, 1)
    153         self.assertEqual(ctx.exception.column, 3)
    154 
    155 
    156     def test_interpreted_string_literal(self):
    157         """Test whether Lexer.lex_string() can tokenize interpreted string
    158         literal."""
    159 
    160         end, lit = Lexer.lex_string('"a"', 0)
    161         self.assertEqual(end, 3)
    162         self.assertEqual(lit, 'a')
    163 
    164         end, lit = Lexer.lex_string('"n"', 0)
    165         self.assertEqual(end, 3)
    166         self.assertEqual(lit, 'n')
    167 
    168         with self.assertRaises(LexerError) as ctx:
    169             Lexer.lex_string('"\\', 0)
    170         self.assertEqual(ctx.exception.line, 1)
    171         self.assertEqual(ctx.exception.column, 2)
    172 
    173 
    174     def test_literal_escape_char(self):
    175         """Test whether Lexer.lex_string() can tokenize interpreted string
    176         literal with a escaped character."""
    177 
    178         end, lit = Lexer.lex_string('"\\a"', 0)
    179         self.assertEqual(end, 4)
    180         self.assertEqual(lit, '\a')
    181 
    182         end, lit = Lexer.lex_string('"\\b"', 0)
    183         self.assertEqual(end, 4)
    184         self.assertEqual(lit, '\b')
    185 
    186         end, lit = Lexer.lex_string('"\\f"', 0)
    187         self.assertEqual(end, 4)
    188         self.assertEqual(lit, '\f')
    189 
    190         end, lit = Lexer.lex_string('"\\n"', 0)
    191         self.assertEqual(end, 4)
    192         self.assertEqual(lit, '\n')
    193 
    194         end, lit = Lexer.lex_string('"\\r"', 0)
    195         self.assertEqual(end, 4)
    196         self.assertEqual(lit, '\r')
    197 
    198         end, lit = Lexer.lex_string('"\\t"', 0)
    199         self.assertEqual(end, 4)
    200         self.assertEqual(lit, '\t')
    201 
    202         end, lit = Lexer.lex_string('"\\v"', 0)
    203         self.assertEqual(end, 4)
    204         self.assertEqual(lit, '\v')
    205 
    206         end, lit = Lexer.lex_string('"\\\\"', 0)
    207         self.assertEqual(end, 4)
    208         self.assertEqual(lit, '\\')
    209 
    210         end, lit = Lexer.lex_string('"\\\'"', 0)
    211         self.assertEqual(end, 4)
    212         self.assertEqual(lit, '\'')
    213 
    214         end, lit = Lexer.lex_string('"\\\""', 0)
    215         self.assertEqual(end, 4)
    216         self.assertEqual(lit, '\"')
    217 
    218         with self.assertRaises(LexerError) as ctx:
    219             Lexer.lex_string('"\\?"', 0)
    220         self.assertEqual(ctx.exception.line, 1)
    221         self.assertEqual(ctx.exception.column, 2)
    222 
    223 
    224     def test_literal_escape_octal(self):
    225         """Test whether Lexer.lex_string() can tokenize interpreted string
    226         literal with an octal escape sequence."""
    227 
    228         end, lit = Lexer.lex_string('"\\000"', 0)
    229         self.assertEqual(end, 6)
    230         self.assertEqual(lit, '\0')
    231 
    232         end, lit = Lexer.lex_string('"\\377"', 0)
    233         self.assertEqual(end, 6)
    234         self.assertEqual(lit, '\377')
    235 
    236         tests = [
    237             '"\\0',
    238             '"\\0"  ',
    239             '"\\09" ',
    240             '"\\009"',
    241         ]
    242 
    243         for test in tests:
    244             with self.assertRaises(LexerError) as ctx:
    245                 Lexer.lex_string(test, 0)
    246             self.assertEqual(ctx.exception.line, 1)
    247             self.assertEqual(ctx.exception.column, 2)
    248 
    249 
    250     def test_literal_escape_hex(self):
    251         """Test whether Lexer.lex_string() can tokenize interpreted string
    252         literal with a hexadecimal escape sequence."""
    253 
    254         end, lit = Lexer.lex_string('"\\x00"', 0)
    255         self.assertEqual(end, 6)
    256         self.assertEqual(lit, '\0')
    257 
    258         end, lit = Lexer.lex_string('"\\xff"', 0)
    259         self.assertEqual(end, 6)
    260         self.assertEqual(lit, '\xff')
    261 
    262         tests = [
    263             '"\\x',
    264             '"\\x"  ',
    265             '"\\x0" ',
    266             '"\\xg" ',
    267             '"\\x0g"',
    268         ]
    269 
    270         for test in tests:
    271             with self.assertRaises(LexerError) as ctx:
    272                 Lexer.lex_string(test, 0)
    273             self.assertEqual(ctx.exception.line, 1)
    274             self.assertEqual(ctx.exception.column, 2)
    275 
    276 
    277     def test_literal_escape_little_u(self):
    278         """Test whether Lexer.lex_string() can tokenize interpreted string
    279         literal with a little u escape sequence."""
    280 
    281         end, lit = Lexer.lex_string('"\\u0000"', 0)
    282         self.assertEqual(end, 8)
    283         self.assertEqual(lit, '\0')
    284 
    285         end, lit = Lexer.lex_string('"\\uffff"', 0)
    286         self.assertEqual(end, 8)
    287         self.assertEqual(lit, py3_str(u'\uffff'))
    288 
    289         tests = [
    290             '"\\u',
    291             '"\\u"    ',
    292             '"\\u0"   ',
    293             '"\\ug"   ',
    294             '"\\u0g"  ',
    295             '"\\u00g" ',
    296             '"\\u000g"',
    297         ]
    298 
    299         for test in tests:
    300             with self.assertRaises(LexerError) as ctx:
    301                 Lexer.lex_string(test, 0)
    302             self.assertEqual(ctx.exception.line, 1)
    303             self.assertEqual(ctx.exception.column, 2)
    304 
    305 
    306     def test_literal_escape_big_u(self):
    307         """Test whether Lexer.lex_string() can tokenize interpreted string
    308         literal with a big u escape sequence."""
    309 
    310         end, lit = Lexer.lex_string('"\\U00000000"', 0)
    311         self.assertEqual(end, 12)
    312         self.assertEqual(lit, '\0')
    313 
    314         end, lit = Lexer.lex_string('"\\U0001ffff"', 0)
    315         self.assertEqual(end, 12)
    316         self.assertEqual(lit, py3_str(u'\U0001ffff'))
    317 
    318         tests = [
    319             '"\\U',
    320             '"\\U"        ',
    321             '"\\U0"       ',
    322             '"\\Ug"       ',
    323             '"\\U0g"      ',
    324             '"\\U00g"     ',
    325             '"\\U000g"    ',
    326             '"\\U000g"    ',
    327             '"\\U0000g"   ',
    328             '"\\U00000g"  ',
    329             '"\\U000000g" ',
    330             '"\\U0000000g"',
    331         ]
    332 
    333         for test in tests:
    334             with self.assertRaises(LexerError) as ctx:
    335                 Lexer.lex_string(test, 0)
    336             self.assertEqual(ctx.exception.line, 1)
    337             self.assertEqual(ctx.exception.column, 2)
    338 
    339 
    340 #------------------------------------------------------------------------------
    341 # Lexer.lex()
    342 #------------------------------------------------------------------------------
    343 
    344 class LexTest(unittest.TestCase):
    345     """Unit tests for the Lexer.lex() method."""
    346 
    347     def test_lex_char(self):
    348         """Test whether Lexer.lex() can lex a character."""
    349 
    350         token, end, lit = Lexer.lex('(', 0)
    351         self.assertEqual(token, Token.LPAREN)
    352         self.assertEqual(end, 1)
    353         self.assertEqual(lit, None)
    354 
    355         token, end, lit = Lexer.lex(')', 0)
    356         self.assertEqual(token, Token.RPAREN)
    357         self.assertEqual(end, 1)
    358         self.assertEqual(lit, None)
    359 
    360         token, end, lit = Lexer.lex('[', 0)
    361         self.assertEqual(token, Token.LBRACKET)
    362         self.assertEqual(end, 1)
    363         self.assertEqual(lit, None)
    364 
    365         token, end, lit = Lexer.lex(']', 0)
    366         self.assertEqual(token, Token.RBRACKET)
    367         self.assertEqual(end, 1)
    368         self.assertEqual(lit, None)
    369 
    370         token, end, lit = Lexer.lex('{', 0)
    371         self.assertEqual(token, Token.LBRACE)
    372         self.assertEqual(end, 1)
    373         self.assertEqual(lit, None)
    374 
    375         token, end, lit = Lexer.lex('}', 0)
    376         self.assertEqual(token, Token.RBRACE)
    377         self.assertEqual(end, 1)
    378         self.assertEqual(lit, None)
    379 
    380         token, end, lit = Lexer.lex(':', 0)
    381         self.assertEqual(token, Token.COLON)
    382         self.assertEqual(end, 1)
    383         self.assertEqual(lit, None)
    384 
    385         token, end, lit = Lexer.lex('=', 0)
    386         self.assertEqual(token, Token.ASSIGN)
    387         self.assertEqual(end, 1)
    388         self.assertEqual(lit, None)
    389 
    390         token, end, lit = Lexer.lex('+', 0)
    391         self.assertEqual(token, Token.PLUS)
    392         self.assertEqual(end, 1)
    393         self.assertEqual(lit, None)
    394 
    395         token, end, lit = Lexer.lex(',', 0)
    396         self.assertEqual(token, Token.COMMA)
    397         self.assertEqual(end, 1)
    398         self.assertEqual(lit, None)
    399 
    400 
    401     def test_lex_assign_plus(self):
    402         """Test whether Lexer.lex() can lex `+=` without problems."""
    403 
    404         token, end, lit = Lexer.lex('+=', 0)
    405         self.assertEqual(token, Token.ASSIGNPLUS)
    406         self.assertEqual(end, 2)
    407         self.assertEqual(lit, None)
    408 
    409 
    410     def test_lex_space(self):
    411         """Test whether Lexer.lex() can lex whitespaces."""
    412 
    413         token, end, lit = Lexer.lex(' ', 0)
    414         self.assertEqual(token, Token.SPACE)
    415         self.assertEqual(end, 1)
    416         self.assertEqual(lit, None)
    417 
    418         token, end, lit = Lexer.lex('\t', 0)
    419         self.assertEqual(token, Token.SPACE)
    420         self.assertEqual(end, 1)
    421         self.assertEqual(lit, None)
    422 
    423         token, end, lit = Lexer.lex('\r', 0)
    424         self.assertEqual(token, Token.SPACE)
    425         self.assertEqual(end, 1)
    426         self.assertEqual(lit, None)
    427 
    428         token, end, lit = Lexer.lex('\n', 0)
    429         self.assertEqual(token, Token.SPACE)
    430         self.assertEqual(end, 1)
    431         self.assertEqual(lit, None)
    432 
    433         token, end, lit = Lexer.lex('\n \r\t\n', 0)
    434         self.assertEqual(token, Token.SPACE)
    435         self.assertEqual(end, 5)
    436         self.assertEqual(lit, None)
    437 
    438 
    439     def test_lex_comment(self):
    440         """Test whether Lexer.lex() can lex comments."""
    441 
    442         token, end, lit = Lexer.lex('// abcd', 0)
    443         self.assertEqual(token, Token.COMMENT)
    444         self.assertEqual(end, 7)
    445         self.assertEqual(lit, None)
    446 
    447         token, end, lit = Lexer.lex('// abcd\nnext', 0)
    448         self.assertEqual(token, Token.COMMENT)
    449         self.assertEqual(end, 7)
    450         self.assertEqual(lit, None)
    451 
    452         token, end, lit = Lexer.lex('/*a\nb*/', 0)
    453         self.assertEqual(token, Token.COMMENT)
    454         self.assertEqual(end, 7)
    455         self.assertEqual(lit, None)
    456 
    457         token, end, lit = Lexer.lex('/*a\n *b*/', 0)
    458         self.assertEqual(token, Token.COMMENT)
    459         self.assertEqual(end, 9)
    460         self.assertEqual(lit, None)
    461 
    462         token, end, lit = Lexer.lex('/*a**b*/', 0)
    463         self.assertEqual(token, Token.COMMENT)
    464         self.assertEqual(end, 8)
    465         self.assertEqual(lit, None)
    466 
    467         token, end, lit = Lexer.lex('/*a***b*/', 0)
    468         self.assertEqual(token, Token.COMMENT)
    469         self.assertEqual(end, 9)
    470         self.assertEqual(lit, None)
    471 
    472         token, end, lit = Lexer.lex('/**/', 0)
    473         self.assertEqual(token, Token.COMMENT)
    474         self.assertEqual(end, 4)
    475         self.assertEqual(lit, None)
    476 
    477         token, end, lit = Lexer.lex('/***/', 0)
    478         self.assertEqual(token, Token.COMMENT)
    479         self.assertEqual(end, 5)
    480         self.assertEqual(lit, None)
    481 
    482         token, end, lit = Lexer.lex('/**a*/', 0)
    483         self.assertEqual(token, Token.COMMENT)
    484         self.assertEqual(end, 6)
    485         self.assertEqual(lit, None)
    486 
    487         token, end, lit = Lexer.lex('/*a**/', 0)
    488         self.assertEqual(token, Token.COMMENT)
    489         self.assertEqual(end, 6)
    490         self.assertEqual(lit, None)
    491 
    492         token, end, lit = Lexer.lex('/***a*/', 0)
    493         self.assertEqual(token, Token.COMMENT)
    494         self.assertEqual(end, 7)
    495         self.assertEqual(lit, None)
    496 
    497         token, end, lit = Lexer.lex('/*a***/', 0)
    498         self.assertEqual(token, Token.COMMENT)
    499         self.assertEqual(end, 7)
    500         self.assertEqual(lit, None)
    501 
    502 
    503     def test_lex_string(self):
    504         """Test whether Lexer.lex() can lex a string."""
    505 
    506         token, end, lit = Lexer.lex('"a"', 0)
    507         self.assertEqual(token, Token.STRING)
    508         self.assertEqual(end, 3)
    509         self.assertEqual(lit, 'a')
    510 
    511         token, end, lit = Lexer.lex('`a\nb`', 0)
    512         self.assertEqual(token, Token.STRING)
    513         self.assertEqual(end, 5)
    514         self.assertEqual(lit, 'a\nb')
    515 
    516 
    517     def test_lex_ident(self):
    518         """Test whether Lexer.lex() can lex an identifier."""
    519 
    520         token, end, lit = Lexer.lex('ident', 0)
    521         self.assertEqual(token, Token.IDENT)
    522         self.assertEqual(end, 5)
    523         self.assertEqual(lit, 'ident')
    524 
    525 
    526     def test_lex_offset(self):
    527         """Test the offset argument of Lexer.lex()."""
    528 
    529         token, end, lit = Lexer.lex('a "b"', 0)
    530         self.assertEqual(token, Token.IDENT)
    531         self.assertEqual(end, 1)
    532         self.assertEqual(lit, 'a')
    533 
    534         token, end, lit = Lexer.lex('a "b"', end)
    535         self.assertEqual(token, Token.SPACE)
    536         self.assertEqual(end, 2)
    537         self.assertEqual(lit, None)
    538 
    539         token, end, lit = Lexer.lex('a "b"', end)
    540         self.assertEqual(token, Token.STRING)
    541         self.assertEqual(end, 5)
    542         self.assertEqual(lit, 'b')
    543 
    544 
    545 #------------------------------------------------------------------------------
    546 # Lexer class test
    547 #------------------------------------------------------------------------------
    548 
    549 class LexerTest(unittest.TestCase):
    550     """Unit tests for the Lexer class."""
    551 
    552     def test_lexer(self):
    553         """Test token, start, end, literal, and consume()."""
    554 
    555         lexer = Lexer('a b //a\n "c"', 0)
    556 
    557         self.assertEqual(lexer.start, 0)
    558         self.assertEqual(lexer.end, 1)
    559         self.assertEqual(lexer.token, Token.IDENT)
    560         self.assertEqual(lexer.literal, 'a')
    561         lexer.consume(Token.IDENT)
    562 
    563         self.assertEqual(lexer.start, 2)
    564         self.assertEqual(lexer.end, 3)
    565         self.assertEqual(lexer.token, Token.IDENT)
    566         self.assertEqual(lexer.literal, 'b')
    567         lexer.consume(Token.IDENT)
    568 
    569         self.assertEqual(lexer.start, 9)
    570         self.assertEqual(lexer.end, 12)
    571         self.assertEqual(lexer.token, Token.STRING)
    572         self.assertEqual(lexer.literal, 'c')
    573         lexer.consume(Token.STRING)
    574 
    575         self.assertEqual(lexer.start, 12)
    576         self.assertEqual(lexer.end, 12)
    577         self.assertEqual(lexer.token, Token.EOF)
    578         self.assertEqual(lexer.literal, None)
    579 
    580 
    581     def test_lexer_offset(self):
    582         """Test the offset argument of Lexer.__init__()."""
    583 
    584         lexer = Lexer('a b', 2)
    585 
    586         self.assertEqual(lexer.start, 2)
    587         self.assertEqual(lexer.end, 3)
    588         self.assertEqual(lexer.token, Token.IDENT)
    589         self.assertEqual(lexer.literal, 'b')
    590         lexer.consume(Token.IDENT)
    591 
    592         self.assertEqual(lexer.start, 3)
    593         self.assertEqual(lexer.end, 3)
    594         self.assertEqual(lexer.token, Token.EOF)
    595         self.assertEqual(lexer.literal, None)
    596         lexer.consume(Token.EOF)
    597 
    598 
    599 if __name__ == '__main__':
    600     unittest.main()
    601