Home | History | Annotate | Download | only in testing
      1 #!/usr/bin/python2.4
      2 #
      3 # Copyright 2008 The RE2 Authors.  All Rights Reserved.
      4 # Use of this source code is governed by a BSD-style
      5 # license that can be found in the LICENSE file.
      6 
      7 """Unittest for the util/regexp/re2/unicode.py module."""
      8 
      9 import os
     10 import StringIO
     11 from google3.pyglib import flags
     12 from google3.testing.pybase import googletest
     13 from google3.util.regexp.re2 import unicode
     14 
     15 _UNICODE_DIR = os.path.join(flags.FLAGS.test_srcdir, "google3", "third_party",
     16                             "unicode", "ucd-5.1.0")
     17 
     18 
     19 class ConvertTest(googletest.TestCase):
     20   """Test the conversion functions."""
     21 
     22   def testUInt(self):
     23     self.assertEquals(0x0000, unicode._UInt("0000"))
     24     self.assertEquals(0x263A, unicode._UInt("263A"))
     25     self.assertEquals(0x10FFFF, unicode._UInt("10FFFF"))
     26     self.assertRaises(unicode.InputError, unicode._UInt, "263")
     27     self.assertRaises(unicode.InputError, unicode._UInt, "263AAAA")
     28     self.assertRaises(unicode.InputError, unicode._UInt, "110000")
     29 
     30   def testURange(self):
     31     self.assertEquals([1, 2, 3], unicode._URange("0001..0003"))
     32     self.assertEquals([1], unicode._URange("0001"))
     33     self.assertRaises(unicode.InputError, unicode._URange, "0001..0003..0005")
     34     self.assertRaises(unicode.InputError, unicode._URange, "0003..0001")
     35     self.assertRaises(unicode.InputError, unicode._URange, "0001..0001")
     36 
     37   def testUStr(self):
     38     self.assertEquals("0x263A", unicode._UStr(0x263a))
     39     self.assertEquals("0x10FFFF", unicode._UStr(0x10FFFF))
     40     self.assertRaises(unicode.InputError, unicode._UStr, 0x110000)
     41     self.assertRaises(unicode.InputError, unicode._UStr, -1)
     42 
     43 
     44 _UNICODE_TABLE = """# Commented line, should be ignored.
     45 # The next line is blank and should be ignored.
     46 
     47 0041;Capital A;Line 1
     48 0061..007A;Lowercase;Line 2
     49 1F00;<Greek, First>;Ignored
     50 1FFE;<Greek, Last>;Line 3
     51 10FFFF;Runemax;Line 4
     52 0000;Zero;Line 5
     53 """
     54 
     55 _BAD_TABLE1 = """
     56 111111;Not a code point;
     57 """
     58 
     59 _BAD_TABLE2 = """
     60 0000;<Zero, First>;Missing <Zero, Last>
     61 """
     62 
     63 _BAD_TABLE3 = """
     64 0010..0001;Bad range;
     65 """
     66 
     67 
     68 class AbortError(Exception):
     69   """Function should not have been called."""
     70 
     71 
     72 def Abort():
     73   raise AbortError("Abort")
     74 
     75 
     76 def StringTable(s, n, f):
     77   unicode.ReadUnicodeTable(StringIO.StringIO(s), n, f)
     78 
     79 
     80 class ReadUnicodeTableTest(googletest.TestCase):
     81   """Test the ReadUnicodeTable function."""
     82 
     83   def testSimpleTable(self):
     84 
     85     ncall = [0]  # can't assign to ordinary int in DoLine
     86 
     87     def DoLine(codes, fields):
     88       self.assertEquals(3, len(fields))
     89       ncall[0] += 1
     90       self.assertEquals("Line %d" % (ncall[0],), fields[2])
     91       if ncall[0] == 1:
     92         self.assertEquals([0x0041], codes)
     93         self.assertEquals("0041", fields[0])
     94         self.assertEquals("Capital A", fields[1])
     95       elif ncall[0] == 2:
     96         self.assertEquals(range(0x0061, 0x007A + 1), codes)
     97         self.assertEquals("0061..007A", fields[0])
     98         self.assertEquals("Lowercase", fields[1])
     99       elif ncall[0] == 3:
    100         self.assertEquals(range(0x1F00, 0x1FFE + 1), codes)
    101         self.assertEquals("1F00..1FFE", fields[0])
    102         self.assertEquals("Greek", fields[1])
    103       elif ncall[0] == 4:
    104         self.assertEquals([0x10FFFF], codes)
    105         self.assertEquals("10FFFF", fields[0])
    106         self.assertEquals("Runemax", fields[1])
    107       elif ncall[0] == 5:
    108         self.assertEquals([0x0000], codes)
    109         self.assertEquals("0000", fields[0])
    110         self.assertEquals("Zero", fields[1])
    111 
    112     StringTable(_UNICODE_TABLE, 3, DoLine)
    113     self.assertEquals(5, ncall[0])
    114 
    115   def testErrorTables(self):
    116     self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 4, Abort)
    117     self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 2, Abort)
    118     self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE1, 3, Abort)
    119     self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE2, 3, Abort)
    120     self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE3, 3, Abort)
    121 
    122 
    123 class ParseContinueTest(googletest.TestCase):
    124   """Test the ParseContinue function."""
    125 
    126   def testParseContinue(self):
    127     self.assertEquals(("Private Use", "First"),
    128                       unicode._ParseContinue("<Private Use, First>"))
    129     self.assertEquals(("Private Use", "Last"),
    130                       unicode._ParseContinue("<Private Use, Last>"))
    131     self.assertEquals(("<Private Use, Blah>", None),
    132                       unicode._ParseContinue("<Private Use, Blah>"))
    133 
    134 
    135 class CaseGroupsTest(googletest.TestCase):
    136   """Test the CaseGroups function (and the CaseFoldingReader)."""
    137 
    138   def FindGroup(self, c):
    139     if type(c) == str:
    140       c = ord(c)
    141     for g in self.groups:
    142       if c in g:
    143         return g
    144     return None
    145 
    146   def testCaseGroups(self):
    147     self.groups = unicode.CaseGroups(unicode_dir=_UNICODE_DIR)
    148     self.assertEquals([ord("A"), ord("a")], self.FindGroup("a"))
    149     self.assertEquals(None, self.FindGroup("0"))
    150 
    151 
    152 class ScriptsTest(googletest.TestCase):
    153   """Test the Scripts function (and the ScriptsReader)."""
    154 
    155   def FindScript(self, c):
    156     if type(c) == str:
    157       c = ord(c)
    158     for script, codes in self.scripts.items():
    159       for code in codes:
    160         if c == code:
    161           return script
    162     return None
    163 
    164   def testScripts(self):
    165     self.scripts = unicode.Scripts(unicode_dir=_UNICODE_DIR)
    166     self.assertEquals("Latin", self.FindScript("a"))
    167     self.assertEquals("Common", self.FindScript("0"))
    168     self.assertEquals(None, self.FindScript(0xFFFE))
    169 
    170 
    171 class CategoriesTest(googletest.TestCase):
    172   """Test the Categories function (and the UnicodeDataReader)."""
    173 
    174   def FindCategory(self, c):
    175     if type(c) == str:
    176       c = ord(c)
    177     short = None
    178     for category, codes in self.categories.items():
    179       for code in codes:
    180         if code == c:
    181           # prefer category Nd over N
    182           if len(category) > 1:
    183             return category
    184           if short == None:
    185             short = category
    186     return short
    187 
    188   def testCategories(self):
    189     self.categories = unicode.Categories(unicode_dir=_UNICODE_DIR)
    190     self.assertEquals("Ll", self.FindCategory("a"))
    191     self.assertEquals("Nd", self.FindCategory("0"))
    192     self.assertEquals("Lo", self.FindCategory(0xAD00))  # in First, Last range
    193     self.assertEquals(None, self.FindCategory(0xFFFE))
    194     self.assertEquals("Lo", self.FindCategory(0x8B5A))
    195     self.assertEquals("Lo", self.FindCategory(0x6C38))
    196     self.assertEquals("Lo", self.FindCategory(0x92D2))
    197     self.assertTrue(ord("a") in self.categories["L"])
    198     self.assertTrue(ord("0") in self.categories["N"])
    199     self.assertTrue(0x8B5A in self.categories["L"])
    200     self.assertTrue(0x6C38 in self.categories["L"])
    201     self.assertTrue(0x92D2 in self.categories["L"])
    202 
    203 def main():
    204   googletest.main()
    205 
    206 if __name__ == "__main__":
    207   main()
    208