1 #!/usr/bin/python2.4 2 # 3 # Copyright 2008 The RE2 Authors. All Rights Reserved. 4 # Use of this source code is governed by a BSD-style 5 # license that can be found in the LICENSE file. 6 7 """Unittest for the util/regexp/re2/unicode.py module.""" 8 9 import os 10 import StringIO 11 from google3.pyglib import flags 12 from google3.testing.pybase import googletest 13 from google3.util.regexp.re2 import unicode 14 15 _UNICODE_DIR = os.path.join(flags.FLAGS.test_srcdir, "google3", "third_party", 16 "unicode", "ucd-5.1.0") 17 18 19 class ConvertTest(googletest.TestCase): 20 """Test the conversion functions.""" 21 22 def testUInt(self): 23 self.assertEquals(0x0000, unicode._UInt("0000")) 24 self.assertEquals(0x263A, unicode._UInt("263A")) 25 self.assertEquals(0x10FFFF, unicode._UInt("10FFFF")) 26 self.assertRaises(unicode.InputError, unicode._UInt, "263") 27 self.assertRaises(unicode.InputError, unicode._UInt, "263AAAA") 28 self.assertRaises(unicode.InputError, unicode._UInt, "110000") 29 30 def testURange(self): 31 self.assertEquals([1, 2, 3], unicode._URange("0001..0003")) 32 self.assertEquals([1], unicode._URange("0001")) 33 self.assertRaises(unicode.InputError, unicode._URange, "0001..0003..0005") 34 self.assertRaises(unicode.InputError, unicode._URange, "0003..0001") 35 self.assertRaises(unicode.InputError, unicode._URange, "0001..0001") 36 37 def testUStr(self): 38 self.assertEquals("0x263A", unicode._UStr(0x263a)) 39 self.assertEquals("0x10FFFF", unicode._UStr(0x10FFFF)) 40 self.assertRaises(unicode.InputError, unicode._UStr, 0x110000) 41 self.assertRaises(unicode.InputError, unicode._UStr, -1) 42 43 44 _UNICODE_TABLE = """# Commented line, should be ignored. 45 # The next line is blank and should be ignored. 46 47 0041;Capital A;Line 1 48 0061..007A;Lowercase;Line 2 49 1F00;<Greek, First>;Ignored 50 1FFE;<Greek, Last>;Line 3 51 10FFFF;Runemax;Line 4 52 0000;Zero;Line 5 53 """ 54 55 _BAD_TABLE1 = """ 56 111111;Not a code point; 57 """ 58 59 _BAD_TABLE2 = """ 60 0000;<Zero, First>;Missing <Zero, Last> 61 """ 62 63 _BAD_TABLE3 = """ 64 0010..0001;Bad range; 65 """ 66 67 68 class AbortError(Exception): 69 """Function should not have been called.""" 70 71 72 def Abort(): 73 raise AbortError("Abort") 74 75 76 def StringTable(s, n, f): 77 unicode.ReadUnicodeTable(StringIO.StringIO(s), n, f) 78 79 80 class ReadUnicodeTableTest(googletest.TestCase): 81 """Test the ReadUnicodeTable function.""" 82 83 def testSimpleTable(self): 84 85 ncall = [0] # can't assign to ordinary int in DoLine 86 87 def DoLine(codes, fields): 88 self.assertEquals(3, len(fields)) 89 ncall[0] += 1 90 self.assertEquals("Line %d" % (ncall[0],), fields[2]) 91 if ncall[0] == 1: 92 self.assertEquals([0x0041], codes) 93 self.assertEquals("0041", fields[0]) 94 self.assertEquals("Capital A", fields[1]) 95 elif ncall[0] == 2: 96 self.assertEquals(range(0x0061, 0x007A + 1), codes) 97 self.assertEquals("0061..007A", fields[0]) 98 self.assertEquals("Lowercase", fields[1]) 99 elif ncall[0] == 3: 100 self.assertEquals(range(0x1F00, 0x1FFE + 1), codes) 101 self.assertEquals("1F00..1FFE", fields[0]) 102 self.assertEquals("Greek", fields[1]) 103 elif ncall[0] == 4: 104 self.assertEquals([0x10FFFF], codes) 105 self.assertEquals("10FFFF", fields[0]) 106 self.assertEquals("Runemax", fields[1]) 107 elif ncall[0] == 5: 108 self.assertEquals([0x0000], codes) 109 self.assertEquals("0000", fields[0]) 110 self.assertEquals("Zero", fields[1]) 111 112 StringTable(_UNICODE_TABLE, 3, DoLine) 113 self.assertEquals(5, ncall[0]) 114 115 def testErrorTables(self): 116 self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 4, Abort) 117 self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 2, Abort) 118 self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE1, 3, Abort) 119 self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE2, 3, Abort) 120 self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE3, 3, Abort) 121 122 123 class ParseContinueTest(googletest.TestCase): 124 """Test the ParseContinue function.""" 125 126 def testParseContinue(self): 127 self.assertEquals(("Private Use", "First"), 128 unicode._ParseContinue("<Private Use, First>")) 129 self.assertEquals(("Private Use", "Last"), 130 unicode._ParseContinue("<Private Use, Last>")) 131 self.assertEquals(("<Private Use, Blah>", None), 132 unicode._ParseContinue("<Private Use, Blah>")) 133 134 135 class CaseGroupsTest(googletest.TestCase): 136 """Test the CaseGroups function (and the CaseFoldingReader).""" 137 138 def FindGroup(self, c): 139 if type(c) == str: 140 c = ord(c) 141 for g in self.groups: 142 if c in g: 143 return g 144 return None 145 146 def testCaseGroups(self): 147 self.groups = unicode.CaseGroups(unicode_dir=_UNICODE_DIR) 148 self.assertEquals([ord("A"), ord("a")], self.FindGroup("a")) 149 self.assertEquals(None, self.FindGroup("0")) 150 151 152 class ScriptsTest(googletest.TestCase): 153 """Test the Scripts function (and the ScriptsReader).""" 154 155 def FindScript(self, c): 156 if type(c) == str: 157 c = ord(c) 158 for script, codes in self.scripts.items(): 159 for code in codes: 160 if c == code: 161 return script 162 return None 163 164 def testScripts(self): 165 self.scripts = unicode.Scripts(unicode_dir=_UNICODE_DIR) 166 self.assertEquals("Latin", self.FindScript("a")) 167 self.assertEquals("Common", self.FindScript("0")) 168 self.assertEquals(None, self.FindScript(0xFFFE)) 169 170 171 class CategoriesTest(googletest.TestCase): 172 """Test the Categories function (and the UnicodeDataReader).""" 173 174 def FindCategory(self, c): 175 if type(c) == str: 176 c = ord(c) 177 short = None 178 for category, codes in self.categories.items(): 179 for code in codes: 180 if code == c: 181 # prefer category Nd over N 182 if len(category) > 1: 183 return category 184 if short == None: 185 short = category 186 return short 187 188 def testCategories(self): 189 self.categories = unicode.Categories(unicode_dir=_UNICODE_DIR) 190 self.assertEquals("Ll", self.FindCategory("a")) 191 self.assertEquals("Nd", self.FindCategory("0")) 192 self.assertEquals("Lo", self.FindCategory(0xAD00)) # in First, Last range 193 self.assertEquals(None, self.FindCategory(0xFFFE)) 194 self.assertEquals("Lo", self.FindCategory(0x8B5A)) 195 self.assertEquals("Lo", self.FindCategory(0x6C38)) 196 self.assertEquals("Lo", self.FindCategory(0x92D2)) 197 self.assertTrue(ord("a") in self.categories["L"]) 198 self.assertTrue(ord("0") in self.categories["N"]) 199 self.assertTrue(0x8B5A in self.categories["L"]) 200 self.assertTrue(0x6C38 in self.categories["L"]) 201 self.assertTrue(0x92D2 in self.categories["L"]) 202 203 def main(): 204 googletest.main() 205 206 if __name__ == "__main__": 207 main() 208