1 """ Test script for the Unicode implementation. 2 3 Written by Bill Tutt. 4 Modified for Python 2.0 by Fredrik Lundh (fredrik (at] pythonware.com) 5 6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 7 8 """#" 9 10 import unittest 11 12 from test import test_support 13 14 class UnicodeNamesTest(unittest.TestCase): 15 16 def checkletter(self, name, code): 17 # Helper that put all \N escapes inside eval'd raw strings, 18 # to make sure this script runs even if the compiler 19 # chokes on \N escapes 20 res = eval(ur'u"\N{%s}"' % name) 21 self.assertEqual(res, code) 22 return res 23 24 def test_general(self): 25 # General and case insensitivity test: 26 chars = [ 27 "LATIN CAPITAL LETTER T", 28 "LATIN SMALL LETTER H", 29 "LATIN SMALL LETTER E", 30 "SPACE", 31 "LATIN SMALL LETTER R", 32 "LATIN CAPITAL LETTER E", 33 "LATIN SMALL LETTER D", 34 "SPACE", 35 "LATIN SMALL LETTER f", 36 "LATIN CAPITAL LeTtEr o", 37 "LATIN SMaLl LETTER x", 38 "SPACE", 39 "LATIN SMALL LETTER A", 40 "LATIN SMALL LETTER T", 41 "LATIN SMALL LETTER E", 42 "SPACE", 43 "LATIN SMALL LETTER T", 44 "LATIN SMALL LETTER H", 45 "LATIN SMALL LETTER E", 46 "SpAcE", 47 "LATIN SMALL LETTER S", 48 "LATIN SMALL LETTER H", 49 "LATIN small LETTER e", 50 "LATIN small LETTER e", 51 "LATIN SMALL LETTER P", 52 "FULL STOP" 53 ] 54 string = u"The rEd fOx ate the sheep." 55 56 self.assertEqual( 57 u"".join([self.checkletter(*args) for args in zip(chars, string)]), 58 string 59 ) 60 61 def test_ascii_letters(self): 62 import unicodedata 63 64 for char in "".join(map(chr, xrange(ord("a"), ord("z")))): 65 name = "LATIN SMALL LETTER %s" % char.upper() 66 code = unicodedata.lookup(name) 67 self.assertEqual(unicodedata.name(code), name) 68 69 def test_hangul_syllables(self): 70 self.checkletter("HANGUL SYLLABLE GA", u"\uac00") 71 self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") 72 self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") 73 self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") 74 self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") 75 self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") 76 self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") 77 self.checkletter("HANGUL SYLLABLE YI", u"\uc758") 78 self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") 79 self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") 80 self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") 81 self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") 82 self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") 83 84 import unicodedata 85 self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 86 87 def test_cjk_unified_ideographs(self): 88 self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400") 89 self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5") 90 self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00") 91 self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5") 92 self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000") 93 self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6") 94 95 def test_bmp_characters(self): 96 import unicodedata 97 count = 0 98 for code in xrange(0x10000): 99 char = unichr(code) 100 name = unicodedata.name(char, None) 101 if name is not None: 102 self.assertEqual(unicodedata.lookup(name), char) 103 count += 1 104 105 def test_misc_symbols(self): 106 self.checkletter("PILCROW SIGN", u"\u00b6") 107 self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD") 108 self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F") 109 self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41") 110 111 def test_errors(self): 112 import unicodedata 113 self.assertRaises(TypeError, unicodedata.name) 114 self.assertRaises(TypeError, unicodedata.name, u'xx') 115 self.assertRaises(TypeError, unicodedata.lookup) 116 self.assertRaises(KeyError, unicodedata.lookup, u'unknown') 117 118 def test_strict_eror_handling(self): 119 # bogus character name 120 self.assertRaises( 121 UnicodeError, 122 unicode, "\\N{blah}", 'unicode-escape', 'strict' 123 ) 124 # long bogus character name 125 self.assertRaises( 126 UnicodeError, 127 unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' 128 ) 129 # missing closing brace 130 self.assertRaises( 131 UnicodeError, 132 unicode, "\\N{SPACE", 'unicode-escape', 'strict' 133 ) 134 # missing opening brace 135 self.assertRaises( 136 UnicodeError, 137 unicode, "\\NSPACE", 'unicode-escape', 'strict' 138 ) 139 140 def test_main(): 141 test_support.run_unittest(UnicodeNamesTest) 142 143 if __name__ == "__main__": 144 test_main() 145