Home | History | Annotate | Download | only in test
      1 """ Test script for the Unicode implementation.
      2 
      3 Written by Bill Tutt.
      4 Modified for Python 2.0 by Fredrik Lundh (fredrik (at] pythonware.com)
      5 
      6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
      7 
      8 """#"

      9 
     10 import unittest
     11 
     12 from test import test_support
     13 
     14 class UnicodeNamesTest(unittest.TestCase):
     15 
     16     def checkletter(self, name, code):
     17         # Helper that put all \N escapes inside eval'd raw strings,

     18         # to make sure this script runs even if the compiler

     19         # chokes on \N escapes

     20         res = eval(ur'u"\N{%s}"' % name)
     21         self.assertEqual(res, code)
     22         return res
     23 
     24     def test_general(self):
     25         # General and case insensitivity test:

     26         chars = [
     27             "LATIN CAPITAL LETTER T",
     28             "LATIN SMALL LETTER H",
     29             "LATIN SMALL LETTER E",
     30             "SPACE",
     31             "LATIN SMALL LETTER R",
     32             "LATIN CAPITAL LETTER E",
     33             "LATIN SMALL LETTER D",
     34             "SPACE",
     35             "LATIN SMALL LETTER f",
     36             "LATIN CAPITAL LeTtEr o",
     37             "LATIN SMaLl LETTER x",
     38             "SPACE",
     39             "LATIN SMALL LETTER A",
     40             "LATIN SMALL LETTER T",
     41             "LATIN SMALL LETTER E",
     42             "SPACE",
     43             "LATIN SMALL LETTER T",
     44             "LATIN SMALL LETTER H",
     45             "LATIN SMALL LETTER E",
     46             "SpAcE",
     47             "LATIN SMALL LETTER S",
     48             "LATIN SMALL LETTER H",
     49             "LATIN small LETTER e",
     50             "LATIN small LETTER e",
     51             "LATIN SMALL LETTER P",
     52             "FULL STOP"
     53         ]
     54         string = u"The rEd fOx ate the sheep."
     55 
     56         self.assertEqual(
     57             u"".join([self.checkletter(*args) for args in zip(chars, string)]),
     58             string
     59         )
     60 
     61     def test_ascii_letters(self):
     62         import unicodedata
     63 
     64         for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
     65             name = "LATIN SMALL LETTER %s" % char.upper()
     66             code = unicodedata.lookup(name)
     67             self.assertEqual(unicodedata.name(code), name)
     68 
     69     def test_hangul_syllables(self):
     70         self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
     71         self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
     72         self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
     73         self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
     74         self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
     75         self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
     76         self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
     77         self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
     78         self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
     79         self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
     80         self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
     81         self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
     82         self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
     83 
     84         import unicodedata
     85         self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
     86 
     87     def test_cjk_unified_ideographs(self):
     88         self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
     89         self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
     90         self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
     91         self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
     92         self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
     93         self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
     94 
     95     def test_bmp_characters(self):
     96         import unicodedata
     97         count = 0
     98         for code in xrange(0x10000):
     99             char = unichr(code)
    100             name = unicodedata.name(char, None)
    101             if name is not None:
    102                 self.assertEqual(unicodedata.lookup(name), char)
    103                 count += 1
    104 
    105     def test_misc_symbols(self):
    106         self.checkletter("PILCROW SIGN", u"\u00b6")
    107         self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
    108         self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
    109         self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
    110 
    111     def test_errors(self):
    112         import unicodedata
    113         self.assertRaises(TypeError, unicodedata.name)
    114         self.assertRaises(TypeError, unicodedata.name, u'xx')
    115         self.assertRaises(TypeError, unicodedata.lookup)
    116         self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
    117 
    118     def test_strict_eror_handling(self):
    119         # bogus character name

    120         self.assertRaises(
    121             UnicodeError,
    122             unicode, "\\N{blah}", 'unicode-escape', 'strict'
    123         )
    124         # long bogus character name

    125         self.assertRaises(
    126             UnicodeError,
    127             unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
    128         )
    129         # missing closing brace

    130         self.assertRaises(
    131             UnicodeError,
    132             unicode, "\\N{SPACE", 'unicode-escape', 'strict'
    133         )
    134         # missing opening brace

    135         self.assertRaises(
    136             UnicodeError,
    137             unicode, "\\NSPACE", 'unicode-escape', 'strict'
    138         )
    139 
    140 def test_main():
    141     test_support.run_unittest(UnicodeNamesTest)
    142 
    143 if __name__ == "__main__":
    144     test_main()
    145