Home | History | Annotate | Download | only in test
      1 """ Test script for the Unicode implementation.
      2 
      3 Written by Marc-Andre Lemburg (mal (at] lemburg.com).
      4 
      5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
      6 
      7 """#"

      8 import sys, struct, codecs
      9 from test import test_support, string_tests
     10 
     11 # Error handling (bad decoder return)

     12 def search_function(encoding):
     13     def decode1(input, errors="strict"):
     14         return 42 # not a tuple

     15     def encode1(input, errors="strict"):
     16         return 42 # not a tuple

     17     def encode2(input, errors="strict"):
     18         return (42, 42) # no unicode

     19     def decode2(input, errors="strict"):
     20         return (42, 42) # no unicode

     21     if encoding=="test.unicode1":
     22         return (encode1, decode1, None, None)
     23     elif encoding=="test.unicode2":
     24         return (encode2, decode2, None, None)
     25     else:
     26         return None
     27 codecs.register(search_function)
     28 
     29 class UnicodeTest(
     30     string_tests.CommonTest,
     31     string_tests.MixinStrUnicodeUserStringTest,
     32     string_tests.MixinStrUnicodeTest,
     33     ):
     34     type2test = unicode
     35 
     36     def assertEqual(self, first, second, msg=None):
     37         # strict assertEqual method: reject implicit bytes/unicode equality

     38         super(UnicodeTest, self).assertEqual(first, second, msg)
     39         if isinstance(first, unicode) or isinstance(second, unicode):
     40             self.assertIsInstance(first, unicode)
     41             self.assertIsInstance(second, unicode)
     42         elif isinstance(first, str) or isinstance(second, str):
     43             self.assertIsInstance(first, str)
     44             self.assertIsInstance(second, str)
     45 
     46     def checkequalnofix(self, result, object, methodname, *args):
     47         method = getattr(object, methodname)
     48         realresult = method(*args)
     49         self.assertEqual(realresult, result)
     50         self.assertTrue(type(realresult) is type(result))
     51 
     52         # if the original is returned make sure that

     53         # this doesn't happen with subclasses

     54         if realresult is object:
     55             class usub(unicode):
     56                 def __repr__(self):
     57                     return 'usub(%r)' % unicode.__repr__(self)
     58             object = usub(object)
     59             method = getattr(object, methodname)
     60             realresult = method(*args)
     61             self.assertEqual(realresult, result)
     62             self.assertTrue(object is not realresult)
     63 
     64     def test_literals(self):
     65         self.assertEqual(u'\xff', u'\u00ff')
     66         self.assertEqual(u'\uffff', u'\U0000ffff')
     67         self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'')
     68         self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'')
     69         self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000)
     70 
     71     def test_repr(self):
     72         if not sys.platform.startswith('java'):
     73             # Test basic sanity of repr()

     74             self.assertEqual(repr(u'abc'), "u'abc'")
     75             self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
     76             self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
     77             self.assertEqual(repr(u'\\c'), "u'\\\\c'")
     78             self.assertEqual(repr(u'\\'), "u'\\\\'")
     79             self.assertEqual(repr(u'\n'), "u'\\n'")
     80             self.assertEqual(repr(u'\r'), "u'\\r'")
     81             self.assertEqual(repr(u'\t'), "u'\\t'")
     82             self.assertEqual(repr(u'\b'), "u'\\x08'")
     83             self.assertEqual(repr(u"'\""), """u'\\'"'""")
     84             self.assertEqual(repr(u"'\""), """u'\\'"'""")
     85             self.assertEqual(repr(u"'"), '''u"'"''')
     86             self.assertEqual(repr(u'"'), """u'"'""")
     87             latin1repr = (
     88                 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
     89                 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
     90                 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
     91                 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
     92                 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
     93                 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
     94                 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
     95                 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
     96                 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
     97                 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
     98                 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
     99                 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
    100                 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
    101                 "\\xfe\\xff'")
    102             testrepr = repr(u''.join(map(unichr, xrange(256))))
    103             self.assertEqual(testrepr, latin1repr)
    104             # Test repr works on wide unicode escapes without overflow.
    105             self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
    106                              repr(u"\U00010000" * 39 + u"\uffff" * 4096))
    107 
    108 
    109     def test_count(self):
    110         string_tests.CommonTest.test_count(self)
    111         # check mixed argument types
    112         self.checkequalnofix(3,  'aaa', 'count', u'a')
    113         self.checkequalnofix(0,  'aaa', 'count', u'b')
    114         self.checkequalnofix(3, u'aaa', 'count',  'a')
    115         self.checkequalnofix(0, u'aaa', 'count',  'b')
    116         self.checkequalnofix(0, u'aaa', 'count',  'b')
    117         self.checkequalnofix(1, u'aaa', 'count',  'a', -1)
    118         self.checkequalnofix(3, u'aaa', 'count',  'a', -10)
    119         self.checkequalnofix(2, u'aaa', 'count',  'a', 0, -1)
    120         self.checkequalnofix(0, u'aaa', 'count',  'a', 0, -10)
    121 
    122     def test_find(self):
    123         self.checkequalnofix(0,  u'abcdefghiabc', 'find', u'abc')
    124         self.checkequalnofix(9,  u'abcdefghiabc', 'find', u'abc', 1)
    125         self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
    126 
    127         self.assertRaises(TypeError, u'hello'.find)
    128         self.assertRaises(TypeError, u'hello'.find, 42)
    129 
    130     def test_rfind(self):
    131         string_tests.CommonTest.test_rfind(self)
    132         # check mixed argument types
    133         self.checkequalnofix(9,   'abcdefghiabc', 'rfind', u'abc')
    134         self.checkequalnofix(12,  'abcdefghiabc', 'rfind', u'')
    135         self.checkequalnofix(12, u'abcdefghiabc', 'rfind',  '')
    136 
    137     def test_index(self):
    138         string_tests.CommonTest.test_index(self)
    139         # check mixed argument types
    140         for (t1, t2) in ((str, unicode), (unicode, str)):
    141             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2(''))
    142             self.checkequalnofix(3, t1('abcdefghiabc'), 'index',  t2('def'))
    143             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2('abc'))
    144             self.checkequalnofix(9, t1('abcdefghiabc'), 'index',  t2('abc'), 1)
    145             self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
    146             self.assertRaises(ValueError, t1('abcdefghiab').index,  t2('abc'), 1)
    147             self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), 8)
    148             self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), -1)
    149 
    150     def test_rindex(self):
    151         string_tests.CommonTest.test_rindex(self)
    152         # check mixed argument types
    153         for (t1, t2) in ((str, unicode), (unicode, str)):
    154             self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex',  t2(''))
    155             self.checkequalnofix(3,  t1('abcdefghiabc'), 'rindex',  t2('def'))
    156             self.checkequalnofix(9,  t1('abcdefghiabc'), 'rindex',  t2('abc'))
    157             self.checkequalnofix(0,  t1('abcdefghiabc'), 'rindex',  t2('abc'), 0, -1)
    158 
    159             self.assertRaises(ValueError, t1('abcdefghiabc').rindex,  t2('hib'))
    160             self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('def'), 1)
    161             self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('abc'), 0, -1)
    162             self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, 8)
    163             self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, -1)
    164 
    165     def test_translate(self):
    166         self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
    167         self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
    168         self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
    169         self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
    170         self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
    171         self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
    172 
    173         self.assertRaises(TypeError, u'hello'.translate)
    174         self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
    175 
    176     def test_split(self):
    177         string_tests.CommonTest.test_split(self)
    178 
    179         # Mixed arguments
    180         self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
    181         self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
    182         self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
    183 
    184     def test_join(self):
    185         string_tests.MixinStrUnicodeUserStringTest.test_join(self)
    186 
    187         # mixed arguments
    188         self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
    189         self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
    190         self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
    191         self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
    192         self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
    193         self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
    194         self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
    195 
    196     def test_strip(self):
    197         string_tests.CommonTest.test_strip(self)
    198         self.assertRaises(UnicodeError, u"hello".strip, "\xff")
    199 
    200     def test_replace(self):
    201         string_tests.CommonTest.test_replace(self)
    202 
    203         # method call forwarded from str implementation because of unicode argument
    204         self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
    205         self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
    206 
    207     def test_comparison(self):
    208         # Comparisons:
    209         self.assertTrue(u'abc' == 'abc')
    210         self.assertTrue('abc' == u'abc')
    211         self.assertTrue(u'abc' == u'abc')
    212         self.assertTrue(u'abcd' > 'abc')
    213         self.assertTrue('abcd' > u'abc')
    214         self.assertTrue(u'abcd' > u'abc')
    215         self.assertTrue(u'abc' < 'abcd')
    216         self.assertTrue('abc' < u'abcd')
    217         self.assertTrue(u'abc' < u'abcd')
    218 
    219         if 0:
    220             # Move these tests to a Unicode collation module test...
    221             # Testing UTF-16 code point order comparisons...
    222 
    223             # No surrogates, no fixup required.
    224             self.assertTrue(u'\u0061' < u'\u20ac')
    225             # Non surrogate below surrogate value, no fixup required
    226             self.assertTrue(u'\u0061' < u'\ud800\udc02')
    227 
    228             # Non surrogate above surrogate value, fixup required
    229             def test_lecmp(s, s2):
    230                 self.assertTrue(s < s2)
    231 
    232             def test_fixup(s):
    233                 s2 = u'\ud800\udc01'
    234                 test_lecmp(s, s2)
    235                 s2 = u'\ud900\udc01'
    236                 test_lecmp(s, s2)
    237                 s2 = u'\uda00\udc01'
    238                 test_lecmp(s, s2)
    239                 s2 = u'\udb00\udc01'
    240                 test_lecmp(s, s2)
    241                 s2 = u'\ud800\udd01'
    242                 test_lecmp(s, s2)
    243                 s2 = u'\ud900\udd01'
    244                 test_lecmp(s, s2)
    245                 s2 = u'\uda00\udd01'
    246                 test_lecmp(s, s2)
    247                 s2 = u'\udb00\udd01'
    248                 test_lecmp(s, s2)
    249                 s2 = u'\ud800\ude01'
    250                 test_lecmp(s, s2)
    251                 s2 = u'\ud900\ude01'
    252                 test_lecmp(s, s2)
    253                 s2 = u'\uda00\ude01'
    254                 test_lecmp(s, s2)
    255                 s2 = u'\udb00\ude01'
    256                 test_lecmp(s, s2)
    257                 s2 = u'\ud800\udfff'
    258                 test_lecmp(s, s2)
    259                 s2 = u'\ud900\udfff'
    260                 test_lecmp(s, s2)
    261                 s2 = u'\uda00\udfff'
    262                 test_lecmp(s, s2)
    263                 s2 = u'\udb00\udfff'
    264                 test_lecmp(s, s2)
    265 
    266                 test_fixup(u'\ue000')
    267                 test_fixup(u'\uff61')
    268 
    269         # Surrogates on both sides, no fixup required
    270         self.assertTrue(u'\ud800\udc02' < u'\ud84d\udc56')
    271 
    272     def test_islower(self):
    273         string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
    274         self.checkequalnofix(False, u'\u1FFc', 'islower')
    275 
    276     def test_isupper(self):
    277         string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
    278         if not sys.platform.startswith('java'):
    279             self.checkequalnofix(False, u'\u1FFc', 'isupper')
    280 
    281     def test_istitle(self):
    282         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
    283         self.checkequalnofix(True, u'\u1FFc', 'istitle')
    284         self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
    285 
    286     def test_isspace(self):
    287         string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
    288         self.checkequalnofix(True, u'\u2000', 'isspace')
    289         self.checkequalnofix(True, u'\u200a', 'isspace')
    290         self.checkequalnofix(False, u'\u2014', 'isspace')
    291 
    292     def test_isalpha(self):
    293         string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
    294         self.checkequalnofix(True, u'\u1FFc', 'isalpha')
    295 
    296     def test_isdecimal(self):
    297         self.checkequalnofix(False, u'', 'isdecimal')
    298         self.checkequalnofix(False, u'a', 'isdecimal')
    299         self.checkequalnofix(True, u'0', 'isdecimal')
    300         self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
    301         self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
    302         self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
    303         self.checkequalnofix(True, u'0123456789', 'isdecimal')
    304         self.checkequalnofix(False, u'0123456789a', 'isdecimal')
    305 
    306         self.checkraises(TypeError, 'abc', 'isdecimal', 42)
    307 
    308     def test_isdigit(self):
    309         string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
    310         self.checkequalnofix(True, u'\u2460', 'isdigit')
    311         self.checkequalnofix(False, u'\xbc', 'isdigit')
    312         self.checkequalnofix(True, u'\u0660', 'isdigit')
    313 
    314     def test_isnumeric(self):
    315         self.checkequalnofix(False, u'', 'isnumeric')
    316         self.checkequalnofix(False, u'a', 'isnumeric')
    317         self.checkequalnofix(True, u'0', 'isnumeric')
    318         self.checkequalnofix(True, u'\u2460', 'isnumeric')
    319         self.checkequalnofix(True, u'\xbc', 'isnumeric')
    320         self.checkequalnofix(True, u'\u0660', 'isnumeric')
    321         self.checkequalnofix(True, u'0123456789', 'isnumeric')
    322         self.checkequalnofix(False, u'0123456789a', 'isnumeric')
    323 
    324         self.assertRaises(TypeError, u"abc".isnumeric, 42)
    325 
    326     def test_contains(self):
    327         # Testing Unicode contains method
    328         self.assertIn('a', u'abdb')
    329         self.assertIn('a', u'bdab')
    330         self.assertIn('a', u'bdaba')
    331         self.assertIn('a', u'bdba')
    332         self.assertIn('a', u'bdba')
    333         self.assertIn(u'a', u'bdba')
    334         self.assertNotIn(u'a', u'bdb')
    335         self.assertNotIn(u'a', 'bdb')
    336         self.assertIn(u'a', 'bdba')
    337         self.assertIn(u'a', ('a',1,None))
    338         self.assertIn(u'a', (1,None,'a'))
    339         self.assertIn(u'a', (1,None,u'a'))
    340         self.assertIn('a', ('a',1,None))
    341         self.assertIn('a', (1,None,'a'))
    342         self.assertIn('a', (1,None,u'a'))
    343         self.assertNotIn('a', ('x',1,u'y'))
    344         self.assertNotIn('a', ('x',1,None))
    345         self.assertNotIn(u'abcd', u'abcxxxx')
    346         self.assertIn(u'ab', u'abcd')
    347         self.assertIn('ab', u'abc')
    348         self.assertIn(u'ab', 'abc')
    349         self.assertIn(u'ab', (1,None,u'ab'))
    350         self.assertIn(u'', u'abc')
    351         self.assertIn('', u'abc')
    352 
    353         # If the following fails either
    354         # the contains operator does not propagate UnicodeErrors or
    355         # someone has changed the default encoding
    356         self.assertRaises(UnicodeDecodeError, 'g\xe2teau'.__contains__, u'\xe2')
    357         self.assertRaises(UnicodeDecodeError, u'g\xe2teau'.__contains__, '\xe2')
    358 
    359         self.assertIn(u'', '')
    360         self.assertIn('', u'')
    361         self.assertIn(u'', u'')
    362         self.assertIn(u'', 'abc')
    363         self.assertIn('', u'abc')
    364         self.assertIn(u'', u'abc')
    365         self.assertNotIn(u'\0', 'abc')
    366         self.assertNotIn('\0', u'abc')
    367         self.assertNotIn(u'\0', u'abc')
    368         self.assertIn(u'\0', '\0abc')
    369         self.assertIn('\0', u'\0abc')
    370         self.assertIn(u'\0', u'\0abc')
    371         self.assertIn(u'\0', 'abc\0')
    372         self.assertIn('\0', u'abc\0')
    373         self.assertIn(u'\0', u'abc\0')
    374         self.assertIn(u'a', '\0abc')
    375         self.assertIn('a', u'\0abc')
    376         self.assertIn(u'a', u'\0abc')
    377         self.assertIn(u'asdf', 'asdf')
    378         self.assertIn('asdf', u'asdf')
    379         self.assertIn(u'asdf', u'asdf')
    380         self.assertNotIn(u'asdf', 'asd')
    381         self.assertNotIn('asdf', u'asd')
    382         self.assertNotIn(u'asdf', u'asd')
    383         self.assertNotIn(u'asdf', '')
    384         self.assertNotIn('asdf', u'')
    385         self.assertNotIn(u'asdf', u'')
    386 
    387         self.assertRaises(TypeError, u"abc".__contains__)
    388         self.assertRaises(TypeError, u"abc".__contains__, object())
    389 
    390     def test_formatting(self):
    391         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
    392         # Testing Unicode formatting strings...
    393         self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
    394         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000,  3.00')
    395         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000,  3.00')
    396         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
    397         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
    398         self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
    399         if not sys.platform.startswith('java'):
    400             self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
    401         self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
    402         self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
    403 
    404         self.assertEqual(u'%c' % 0x1234, u'\u1234')
    405         self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
    406         self.assertRaises(ValueError, u"%.1\u1032f".__mod__, (1.0/3))
    407 
    408         for num in range(0x00,0x80):
    409             char = chr(num)
    410             self.assertEqual(u"%c" % char, unicode(char))
    411             self.assertEqual(u"%c" % num, unicode(char))
    412             self.assertTrue(char == u"%c" % char)
    413             self.assertTrue(char == u"%c" % num)
    414         # Issue 7649
    415         for num in range(0x80,0x100):
    416             uchar = unichr(num)
    417             self.assertEqual(uchar, u"%c" % num)   # works only with ints
    418             self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
    419             # the implicit decoding should fail for non-ascii chars
    420             self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
    421             self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
    422 
    423         # formatting jobs delegated from the string implementation:
    424         self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
    425         self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
    426         self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
    427         self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
    428         self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123},  u'...abc...')
    429         self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
    430         self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
    431         self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
    432         self.assertEqual('...%s...' % u"abc", u'...abc...')
    433         self.assertEqual('%*s' % (5,u'abc',), u'  abc')
    434         self.assertEqual('%*s' % (-5,u'abc',), u'abc  ')
    435         self.assertEqual('%*.*s' % (5,2,u'abc',), u'   ab')
    436         self.assertEqual('%*.*s' % (5,3,u'abc',), u'  abc')
    437         self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10   abc')
    438         self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103   abc')
    439         self.assertEqual('%c' % u'a', u'a')
    440         class Wrapper:
    441             def __str__(self):
    442                 return u'\u1234'
    443         self.assertEqual('%s' % Wrapper(), u'\u1234')
    444 
    445     def test_startswith_endswith_errors(self):
    446         for meth in (u'foo'.startswith, u'foo'.endswith):
    447             with self.assertRaises(UnicodeDecodeError):
    448                 meth('\xff')
    449             with self.assertRaises(TypeError) as cm:
    450                 meth(['f'])
    451             exc = str(cm.exception)
    452             self.assertIn('unicode', exc)
    453             self.assertIn('str', exc)
    454             self.assertIn('tuple', exc)
    455 
    456     @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
    457     def test_format_float(self):
    458         # should not format with a comma, but always with C locale
    459         self.assertEqual(u'1.0', u'%.1f' % 1.0)
    460 
    461     def test_constructor(self):
    462         # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
    463 
    464         self.assertEqual(
    465             unicode(u'unicode remains unicode'),
    466             u'unicode remains unicode'
    467         )
    468 
    469         class UnicodeSubclass(unicode):
    470             pass
    471 
    472         self.assertEqual(
    473             unicode(UnicodeSubclass('unicode subclass becomes unicode')),
    474             u'unicode subclass becomes unicode'
    475         )
    476 
    477         self.assertEqual(
    478             unicode('strings are converted to unicode'),
    479             u'strings are converted to unicode'
    480         )
    481 
    482         class UnicodeCompat:
    483             def __init__(self, x):
    484                 self.x = x
    485             def __unicode__(self):
    486                 return self.x
    487 
    488         self.assertEqual(
    489             unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
    490             u'__unicode__ compatible objects are recognized')
    491 
    492         class StringCompat:
    493             def __init__(self, x):
    494                 self.x = x
    495             def __str__(self):
    496                 return self.x
    497 
    498         self.assertEqual(
    499             unicode(StringCompat('__str__ compatible objects are recognized')),
    500             u'__str__ compatible objects are recognized'
    501         )
    502 
    503         # unicode(obj) is compatible to str():
    504 
    505         o = StringCompat('unicode(obj) is compatible to str()')
    506         self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
    507         self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
    508 
    509         # %-formatting and .__unicode__()
    510         self.assertEqual(u'%s' %
    511                          UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
    512                          u"u'%s' % obj uses obj.__unicode__()")
    513         self.assertEqual(u'%s' %
    514                          UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
    515                          u"u'%s' % obj falls back to obj.__str__()")
    516 
    517         for obj in (123, 123.45, 123L):
    518             self.assertEqual(unicode(obj), unicode(str(obj)))
    519 
    520         # unicode(obj, encoding, error) tests (this maps to
    521         # PyUnicode_FromEncodedObject() at C level)
    522 
    523         if not sys.platform.startswith('java'):
    524             self.assertRaises(
    525                 TypeError,
    526                 unicode,
    527                 u'decoding unicode is not supported',
    528                 'utf-8',
    529                 'strict'
    530             )
    531 
    532         self.assertEqual(
    533             unicode('strings are decoded to unicode', 'utf-8', 'strict'),
    534             u'strings are decoded to unicode'
    535         )
    536 
    537         if not sys.platform.startswith('java'):
    538             with test_support.check_py3k_warnings():
    539                 buf = buffer('character buffers are decoded to unicode')
    540             self.assertEqual(
    541                 unicode(
    542                     buf,
    543                     'utf-8',
    544                     'strict'
    545                 ),
    546                 u'character buffers are decoded to unicode'
    547             )
    548 
    549         self.assertRaises(TypeError, unicode, 42, 42, 42)
    550 
    551     def test_codecs_utf7(self):
    552         utfTests = [
    553             (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
    554             (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
    555             (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
    556             (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
    557             (u'+', '+-'),
    558             (u'+-', '+--'),
    559             (u'+?', '+-?'),
    560             (u'\?', '+AFw?'),
    561             (u'+?', '+-?'),
    562             (ur'\\?', '+AFwAXA?'),
    563             (ur'\\\?', '+AFwAXABc?'),
    564             (ur'++--', '+-+---'),
    565             (u'\U000abcde', '+2m/c3g-'),                  # surrogate pairs
    566             (u'/', '/'),
    567         ]
    568 
    569         for (x, y) in utfTests:
    570             self.assertEqual(x.encode('utf-7'), y)
    571 
    572         # Unpaired surrogates not supported
    573         self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
    574 
    575         self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
    576 
    577         # Direct encoded characters
    578         set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
    579         # Optional direct characters
    580         set_o = '!"#$%&*;<=>@[]^_`{|}'

    581         for c in set_d:
    582             self.assertEqual(c.encode('utf7'), c.encode('ascii'))
    583             self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
    584             self.assertTrue(c == c.encode('ascii').decode('utf7'))
    585         for c in set_o:
    586             self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
    587             self.assertTrue(c == c.encode('ascii').decode('utf7'))
    588 
    589     def test_codecs_utf8(self):
    590         self.assertEqual(u''.encode('utf-8'), '')
    591         self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
    592         self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
    593         self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
    594         self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
    595         self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
    596         self.assertEqual(
    597             (u'\ud800\udc02'*1000).encode('utf-8'),
    598             '\xf0\x90\x80\x82'*1000
    599         )
    600         self.assertEqual(
    601             u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
    602             u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
    603             u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
    604             u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
    605             u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
    606             u' Nunstuck git und'.encode('utf-8'),
    607             '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
    608             '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
    609             '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
    610             '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
    611             '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
    612             '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
    613             '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
    614             '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
    615             '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
    616             '\xe3\x80\x8cWenn ist das Nunstuck git und'
    617         )
    618 
    619         # UTF-8 specific decoding tests

    620         self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456')
    621         self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002')
    622         self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac')
    623 
    624         # Other possible utf-8 test cases:

    625         # * strict decoding testing for all of the

    626         #   UTF8_ERROR cases in PyUnicode_DecodeUTF8

    627 
    628     def test_utf8_decode_valid_sequences(self):
    629         sequences = [
    630             # single byte

    631             ('\x00', u'\x00'), ('a', u'a'), ('\x7f', u'\x7f'),
    632             # 2 bytes

    633             ('\xc2\x80', u'\x80'), ('\xdf\xbf', u'\u07ff'),
    634             # 3 bytes

    635             ('\xe0\xa0\x80', u'\u0800'), ('\xed\x9f\xbf', u'\ud7ff'),
    636             ('\xee\x80\x80', u'\uE000'), ('\xef\xbf\xbf', u'\uffff'),
    637             # 4 bytes

    638             ('\xF0\x90\x80\x80', u'\U00010000'),
    639             ('\xf4\x8f\xbf\xbf', u'\U0010FFFF')
    640         ]
    641         for seq, res in sequences:
    642             self.assertEqual(seq.decode('utf-8'), res)
    643 
    644         for ch in map(unichr, range(0, sys.maxunicode)):
    645             self.assertEqual(ch, ch.encode('utf-8').decode('utf-8'))
    646 
    647     def test_utf8_decode_invalid_sequences(self):
    648         # continuation bytes in a sequence of 2, 3, or 4 bytes

    649         continuation_bytes = map(chr, range(0x80, 0xC0))
    650         # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F

    651         invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2))
    652         # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF

    653         invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8))
    654         invalid_start_bytes = (
    655             continuation_bytes + invalid_2B_seq_start_bytes +
    656             invalid_4B_seq_start_bytes + map(chr, range(0xF7, 0x100))
    657         )
    658 
    659         for byte in invalid_start_bytes:
    660             self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8')
    661 
    662         for sb in invalid_2B_seq_start_bytes:
    663             for cb in continuation_bytes:
    664                 self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8')
    665 
    666         for sb in invalid_4B_seq_start_bytes:
    667             for cb1 in continuation_bytes[:3]:
    668                 for cb3 in continuation_bytes[:3]:
    669                     self.assertRaises(UnicodeDecodeError,
    670                                       (sb+cb1+'\x80'+cb3).decode, 'utf-8')
    671 
    672         for cb in map(chr, range(0x80, 0xA0)):
    673             self.assertRaises(UnicodeDecodeError,
    674                               ('\xE0'+cb+'\x80').decode, 'utf-8')
    675             self.assertRaises(UnicodeDecodeError,
    676                               ('\xE0'+cb+'\xBF').decode, 'utf-8')
    677         # XXX: surrogates shouldn't be valid UTF-8!

    678         # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf

    679         # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt

    680         #for cb in map(chr, range(0xA0, 0xC0)):

    681             #self.assertRaises(UnicodeDecodeError,

    682                               #('\xED'+cb+'\x80').decode, 'utf-8')

    683             #self.assertRaises(UnicodeDecodeError,

    684                               #('\xED'+cb+'\xBF').decode, 'utf-8')

    685         # but since they are valid on Python 2 add a test for that:

    686         for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)),
    687                                  map(unichr, range(0xd800, 0xe000, 64))):
    688             encoded = '\xED'+cb+'\x80'
    689             self.assertEqual(encoded.decode('utf-8'), surrogate)
    690             self.assertEqual(surrogate.encode('utf-8'), encoded)
    691 
    692         for cb in map(chr, range(0x80, 0x90)):
    693             self.assertRaises(UnicodeDecodeError,
    694                               ('\xF0'+cb+'\x80\x80').decode, 'utf-8')
    695             self.assertRaises(UnicodeDecodeError,
    696                               ('\xF0'+cb+'\xBF\xBF').decode, 'utf-8')
    697         for cb in map(chr, range(0x90, 0xC0)):
    698             self.assertRaises(UnicodeDecodeError,
    699                               ('\xF4'+cb+'\x80\x80').decode, 'utf-8')
    700             self.assertRaises(UnicodeDecodeError,
    701                               ('\xF4'+cb+'\xBF\xBF').decode, 'utf-8')
    702 
    703     def test_issue8271(self):
    704         # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,

    705         # only the start byte and the continuation byte(s) are now considered

    706         # invalid, instead of the number of bytes specified by the start byte.

    707         # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,

    708         # table 3-8, Row 2) for more information about the algorithm used.

    709         FFFD = u'\ufffd'
    710         sequences = [
    711             # invalid start bytes

    712             ('\x80', FFFD), # continuation byte

    713             ('\x80\x80', FFFD*2), # 2 continuation bytes

    714             ('\xc0', FFFD),
    715             ('\xc0\xc0', FFFD*2),
    716             ('\xc1', FFFD),
    717             ('\xc1\xc0', FFFD*2),
    718             ('\xc0\xc1', FFFD*2),
    719             # with start byte of a 2-byte sequence

    720             ('\xc2', FFFD), # only the start byte

    721             ('\xc2\xc2', FFFD*2), # 2 start bytes

    722             ('\xc2\xc2\xc2', FFFD*3), # 2 start bytes

    723             ('\xc2\x41', FFFD+'A'), # invalid continuation byte

    724             # with start byte of a 3-byte sequence

    725             ('\xe1', FFFD), # only the start byte

    726             ('\xe1\xe1', FFFD*2), # 2 start bytes

    727             ('\xe1\xe1\xe1', FFFD*3), # 3 start bytes

    728             ('\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes

    729             ('\xe1\x80', FFFD), # only 1 continuation byte

    730             ('\xe1\x41', FFFD+'A'), # invalid continuation byte

    731             ('\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb

    732             ('\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes

    733             ('\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte

    734             ('\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid

    735             ('\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid

    736             # with start byte of a 4-byte sequence

    737             ('\xf1', FFFD), # only the start byte

    738             ('\xf1\xf1', FFFD*2), # 2 start bytes

    739             ('\xf1\xf1\xf1', FFFD*3), # 3 start bytes

    740             ('\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes

    741             ('\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes

    742             ('\xf1\x80', FFFD), # only 1 continuation bytes

    743             ('\xf1\x80\x80', FFFD), # only 2 continuation bytes

    744             ('\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid

    745             ('\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid

    746             ('\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid

    747             ('\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid

    748             ('\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid

    749             ('\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid

    750             ('\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid

    751             ('\xf1\x41\xf1\x80', FFFD+'A'+FFFD),
    752             ('\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2),
    753             ('\xf1\xf1\x80\x41', FFFD*2+'A'),
    754             ('\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2),
    755             # with invalid start byte of a 4-byte sequence (rfc2279)

    756             ('\xf5', FFFD), # only the start byte

    757             ('\xf5\xf5', FFFD*2), # 2 start bytes

    758             ('\xf5\x80', FFFD*2), # only 1 continuation byte

    759             ('\xf5\x80\x80', FFFD*3), # only 2 continuation byte

    760             ('\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes

    761             ('\xf5\x80\x41', FFFD*2+'A'), #  1 valid cb and 1 invalid

    762             ('\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD),
    763             ('\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'),
    764             # with invalid start byte of a 5-byte sequence (rfc2279)

    765             ('\xf8', FFFD), # only the start byte

    766             ('\xf8\xf8', FFFD*2), # 2 start bytes

    767             ('\xf8\x80', FFFD*2), # only one continuation byte

    768             ('\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid

    769             ('\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes

    770             # with invalid start byte of a 6-byte sequence (rfc2279)

    771             ('\xfc', FFFD), # only the start byte

    772             ('\xfc\xfc', FFFD*2), # 2 start bytes

    773             ('\xfc\x80\x80', FFFD*3), # only 2 continuation bytes

    774             ('\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes

    775             # invalid start byte

    776             ('\xfe', FFFD),
    777             ('\xfe\x80\x80', FFFD*3),
    778             # other sequences

    779             ('\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'),
    780             ('\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'),
    781             ('\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'),
    782             ('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',
    783              u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),
    784         ]
    785         for n, (seq, res) in enumerate(sequences):
    786             self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict')
    787             self.assertEqual(seq.decode('utf-8', 'replace'), res)
    788             self.assertEqual((seq+'b').decode('utf-8', 'replace'), res+'b')
    789             self.assertEqual(seq.decode('utf-8', 'ignore'),
    790                              res.replace(u'\uFFFD', ''))
    791 
    792     def test_codecs_idna(self):
    793         # Test whether trailing dot is preserved

    794         self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
    795 
    796     def test_codecs_errors(self):
    797         # Error handling (encoding)

    798         self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
    799         self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
    800         self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
    801         self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
    802         self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
    803                          u'Andr\202 x'.encode('ascii', errors='replace'))
    804         self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
    805                          u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
    806 
    807         # Error handling (decoding)

    808         self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
    809         self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
    810         self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
    811         self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
    812         self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
    813                          u'abcde'.decode('ascii', errors='ignore'))
    814         self.assertEqual(u'abcde'.decode('ascii', 'replace'),
    815                          u'abcde'.decode(encoding='ascii', errors='replace'))
    816 
    817         # Error handling (unknown character names)

    818         self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
    819 
    820         # Error handling (truncated escape sequence)

    821         self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
    822 
    823         self.assertRaises(TypeError, "hello".decode, "test.unicode1")
    824         self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
    825         self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
    826         self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
    827         # executes PyUnicode_Encode()

    828         import imp
    829         self.assertRaises(
    830             ImportError,
    831             imp.find_module,
    832             "non-existing module",
    833             [u"non-existing dir"]
    834         )
    835 
    836         # Error handling (wrong arguments)

    837         self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
    838 
    839         # Error handling (PyUnicode_EncodeDecimal())

    840         self.assertRaises(UnicodeError, int, u"\u0200")
    841 
    842     def test_codecs(self):
    843         # Encoding

    844         self.assertEqual(u'hello'.encode('ascii'), 'hello')
    845         self.assertEqual(u'hello'.encode('utf-7'), 'hello')
    846         self.assertEqual(u'hello'.encode('utf-8'), 'hello')
    847         self.assertEqual(u'hello'.encode('utf8'), 'hello')
    848         self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
    849         self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
    850         self.assertEqual(u'hello'.encode('latin-1'), 'hello')
    851 
    852         # Roundtrip safety for BMP (just the first 1024 chars)

    853         for c in xrange(1024):
    854             u = unichr(c)
    855             for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
    856                              'utf-16-be', 'raw_unicode_escape',
    857                              'unicode_escape', 'unicode_internal'):
    858                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
    859 
    860         # Roundtrip safety for BMP (just the first 256 chars)

    861         for c in xrange(256):
    862             u = unichr(c)
    863             for encoding in ('latin-1',):
    864                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
    865 
    866         # Roundtrip safety for BMP (just the first 128 chars)

    867         for c in xrange(128):
    868             u = unichr(c)
    869             for encoding in ('ascii',):
    870                 self.assertEqual(unicode(u.encode(encoding),encoding), u)
    871 
    872         # Roundtrip safety for non-BMP (just a few chars)

    873         u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
    874         for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
    875                          #'raw_unicode_escape',

    876                          'unicode_escape', 'unicode_internal'):
    877             self.assertEqual(unicode(u.encode(encoding),encoding), u)
    878 
    879         # UTF-8 must be roundtrip safe for all UCS-2 code points

    880         # This excludes surrogates: in the full range, there would be

    881         # a surrogate pair (\udbff\udc00), which gets converted back

    882         # to a non-BMP character (\U0010fc00)

    883         u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
    884         for encoding in ('utf-8',):
    885             self.assertEqual(unicode(u.encode(encoding),encoding), u)
    886 
    887     def test_codecs_charmap(self):
    888         # 0-127

    889         s = ''.join(map(chr, xrange(128)))
    890         for encoding in (
    891             'cp037', 'cp1026',
    892             'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
    893             'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
    894             'cp863', 'cp865', 'cp866',
    895             'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
    896             'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
    897             'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
    898             'mac_cyrillic', 'mac_latin2',
    899 
    900             'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
    901             'cp1256', 'cp1257', 'cp1258',
    902             'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
    903 
    904             'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
    905             'cp1006', 'iso8859_8',
    906 
    907             ### These have undefined mappings:

    908             #'cp424',

    909 
    910             ### These fail the round-trip:

    911             #'cp875'

    912 
    913             ):
    914             self.assertEqual(unicode(s, encoding).encode(encoding), s)
    915 
    916         # 128-255

    917         s = ''.join(map(chr, xrange(128, 256)))
    918         for encoding in (
    919             'cp037', 'cp1026',
    920             'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
    921             'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
    922             'cp863', 'cp865', 'cp866',
    923             'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
    924             'iso8859_2', 'iso8859_4', 'iso8859_5',
    925             'iso8859_9', 'koi8_r', 'latin_1',
    926             'mac_cyrillic', 'mac_latin2',
    927 
    928             ### These have undefined mappings:

    929             #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',

    930             #'cp1256', 'cp1257', 'cp1258',

    931             #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',

    932             #'iso8859_3', 'iso8859_6', 'iso8859_7',

    933             #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',

    934 
    935             ### These fail the round-trip:

    936             #'cp1006', 'cp875', 'iso8859_8',

    937 
    938             ):
    939             self.assertEqual(unicode(s, encoding).encode(encoding), s)
    940 
    941     def test_concatenation(self):
    942         self.assertEqual((u"abc" u"def"), u"abcdef")
    943         self.assertEqual(("abc" u"def"), u"abcdef")
    944         self.assertEqual((u"abc" "def"), u"abcdef")
    945         self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
    946         self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
    947 
    948     def test_printing(self):
    949         class BitBucket:
    950             def write(self, text):
    951                 pass
    952 
    953         out = BitBucket()
    954         print >>out, u'abc'
    955         print >>out, u'abc', u'def'
    956         print >>out, u'abc', 'def'
    957         print >>out, 'abc', u'def'
    958         print >>out, u'abc\n'
    959         print >>out, u'abc\n',
    960         print >>out, u'abc\n',
    961         print >>out, u'def\n'
    962         print >>out, u'def\n'
    963 
    964     def test_ucs4(self):
    965         x = u'\U00100000'
    966         y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
    967         self.assertEqual(x, y)
    968 
    969         y = r'\U00100000'
    970         x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
    971         self.assertEqual(x, y)
    972         y = r'\U00010000'
    973         x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
    974         self.assertEqual(x, y)
    975 
    976         try:
    977             '\U11111111'.decode("raw-unicode-escape")
    978         except UnicodeDecodeError as e:
    979             self.assertEqual(e.start, 0)
    980             self.assertEqual(e.end, 10)
    981         else:
    982             self.fail("Should have raised UnicodeDecodeError")
    983 
    984     def test_conversion(self):
    985         # Make sure __unicode__() works properly

    986         class Foo0:
    987             def __str__(self):
    988                 return "foo"
    989 
    990         class Foo1:
    991             def __unicode__(self):
    992                 return u"foo"
    993 
    994         class Foo2(object):
    995             def __unicode__(self):
    996                 return u"foo"
    997 
    998         class Foo3(object):
    999             def __unicode__(self):
   1000                 return "foo"
   1001 
   1002         class Foo4(str):
   1003             def __unicode__(self):
   1004                 return "foo"
   1005 
   1006         class Foo5(unicode):
   1007             def __unicode__(self):
   1008                 return "foo"
   1009 
   1010         class Foo6(str):
   1011             def __str__(self):
   1012                 return "foos"
   1013 
   1014             def __unicode__(self):
   1015                 return u"foou"
   1016 
   1017         class Foo7(unicode):
   1018             def __str__(self):
   1019                 return "foos"
   1020             def __unicode__(self):
   1021                 return u"foou"
   1022 
   1023         class Foo8(unicode):
   1024             def __new__(cls, content=""):
   1025                 return unicode.__new__(cls, 2*content)
   1026             def __unicode__(self):
   1027                 return self
   1028 
   1029         class Foo9(unicode):
   1030             def __str__(self):
   1031                 return "string"
   1032             def __unicode__(self):
   1033                 return "not unicode"
   1034 
   1035         self.assertEqual(unicode(Foo0()), u"foo")
   1036         self.assertEqual(unicode(Foo1()), u"foo")
   1037         self.assertEqual(unicode(Foo2()), u"foo")
   1038         self.assertEqual(unicode(Foo3()), u"foo")
   1039         self.assertEqual(unicode(Foo4("bar")), u"foo")
   1040         self.assertEqual(unicode(Foo5("bar")), u"foo")
   1041         self.assertEqual(unicode(Foo6("bar")), u"foou")
   1042         self.assertEqual(unicode(Foo7("bar")), u"foou")
   1043         self.assertEqual(unicode(Foo8("foo")), u"foofoo")
   1044         self.assertEqual(str(Foo9("foo")), "string")
   1045         self.assertEqual(unicode(Foo9("foo")), u"not unicode")
   1046 
   1047     def test_unicode_repr(self):
   1048         class s1:
   1049             def __repr__(self):
   1050                 return '\\n'
   1051 
   1052         class s2:
   1053             def __repr__(self):
   1054                 return u'\\n'
   1055 
   1056         self.assertEqual(repr(s1()), '\\n')
   1057         self.assertEqual(repr(s2()), '\\n')
   1058 
   1059     def test_expandtabs_overflows_gracefully(self):
   1060         # This test only affects 32-bit platforms because expandtabs can only take

   1061         # an int as the max value, not a 64-bit C long.  If expandtabs is changed

   1062         # to take a 64-bit long, this test should apply to all platforms.

   1063         if sys.maxint > (1 << 32) or struct.calcsize('P') != 4:
   1064             return
   1065         self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
   1066 
   1067     def test__format__(self):
   1068         def test(value, format, expected):
   1069             # test both with and without the trailing 's'

   1070             self.assertEqual(value.__format__(format), expected)
   1071             self.assertEqual(value.__format__(format + u's'), expected)
   1072 
   1073         test(u'', u'', u'')
   1074         test(u'abc', u'', u'abc')
   1075         test(u'abc', u'.3', u'abc')
   1076         test(u'ab', u'.3', u'ab')
   1077         test(u'abcdef', u'.3', u'abc')
   1078         test(u'abcdef', u'.0', u'')
   1079         test(u'abc', u'3.3', u'abc')
   1080         test(u'abc', u'2.3', u'abc')
   1081         test(u'abc', u'2.2', u'ab')
   1082         test(u'abc', u'3.2', u'ab ')
   1083         test(u'result', u'x<0', u'result')
   1084         test(u'result', u'x<5', u'result')
   1085         test(u'result', u'x<6', u'result')
   1086         test(u'result', u'x<7', u'resultx')
   1087         test(u'result', u'x<8', u'resultxx')
   1088         test(u'result', u' <7', u'result ')
   1089         test(u'result', u'<7', u'result ')
   1090         test(u'result', u'>7', u' result')
   1091         test(u'result', u'>8', u'  result')
   1092         test(u'result', u'^8', u' result ')
   1093         test(u'result', u'^9', u' result  ')
   1094         test(u'result', u'^10', u'  result  ')
   1095         test(u'a', u'10000', u'a' + u' ' * 9999)
   1096         test(u'', u'10000', u' ' * 10000)
   1097         test(u'', u'10000000', u' ' * 10000000)
   1098 
   1099         # test mixing unicode and str

   1100         self.assertEqual(u'abc'.__format__('s'), u'abc')
   1101         self.assertEqual(u'abc'.__format__('->10s'), u'-------abc')
   1102 
   1103     def test_format(self):
   1104         self.assertEqual(u''.format(), u'')
   1105         self.assertEqual(u'a'.format(), u'a')
   1106         self.assertEqual(u'ab'.format(), u'ab')
   1107         self.assertEqual(u'a{{'.format(), u'a{')
   1108         self.assertEqual(u'a}}'.format(), u'a}')
   1109         self.assertEqual(u'{{b'.format(), u'{b')
   1110         self.assertEqual(u'}}b'.format(), u'}b')
   1111         self.assertEqual(u'a{{b'.format(), u'a{b')
   1112 
   1113         # examples from the PEP:

   1114         import datetime
   1115         self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred")
   1116         self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')),
   1117                          u"My name is Fred")
   1118         self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'),
   1119                          u"My name is Fred :-{}")
   1120 
   1121         # datetime.__format__ doesn't work with unicode

   1122         #d = datetime.date(2007, 8, 18)

   1123         #self.assertEqual("The year is {0.year}".format(d),

   1124         #                 "The year is 2007")

   1125 
   1126         # classes we'll use for testing

   1127         class C:
   1128             def __init__(self, x=100):
   1129                 self._x = x
   1130             def __format__(self, spec):
   1131                 return spec
   1132 
   1133         class D:
   1134             def __init__(self, x):
   1135                 self.x = x
   1136             def __format__(self, spec):
   1137                 return str(self.x)
   1138 
   1139         # class with __str__, but no __format__

   1140         class E:
   1141             def __init__(self, x):
   1142                 self.x = x
   1143             def __str__(self):
   1144                 return u'E(' + self.x + u')'
   1145 
   1146         # class with __repr__, but no __format__ or __str__

   1147         class F:
   1148             def __init__(self, x):
   1149                 self.x = x
   1150             def __repr__(self):
   1151                 return u'F(' + self.x + u')'
   1152 
   1153         # class with __format__ that forwards to string, for some format_spec's

   1154         class G:
   1155             def __init__(self, x):
   1156                 self.x = x
   1157             def __str__(self):
   1158                 return u"string is " + self.x
   1159             def __format__(self, format_spec):
   1160                 if format_spec == 'd':
   1161                     return u'G(' + self.x + u')'
   1162                 return object.__format__(self, format_spec)
   1163 
   1164         # class that returns a bad type from __format__

   1165         class H:
   1166             def __format__(self, format_spec):
   1167                 return 1.0
   1168 
   1169         class I(datetime.date):
   1170             def __format__(self, format_spec):
   1171                 return self.strftime(format_spec)
   1172 
   1173         class J(int):
   1174             def __format__(self, format_spec):
   1175                 return int.__format__(self * 2, format_spec)
   1176 
   1177 
   1178         self.assertEqual(u''.format(), u'')
   1179         self.assertEqual(u'abc'.format(), u'abc')
   1180         self.assertEqual(u'{0}'.format(u'abc'), u'abc')
   1181         self.assertEqual(u'{0:}'.format(u'abc'), u'abc')
   1182         self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc')
   1183         self.assertEqual(u'{0}X'.format(u'abc'), u'abcX')
   1184         self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY')
   1185         self.assertEqual(u'{1}'.format(1, u'abc'), u'abc')
   1186         self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc')
   1187         self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX')
   1188         self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY')
   1189         self.assertEqual(u'{0}'.format(-15), u'-15')
   1190         self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc')
   1191         self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc')
   1192         self.assertEqual(u'{{'.format(), u'{')
   1193         self.assertEqual(u'}}'.format(), u'}')
   1194         self.assertEqual(u'{{}}'.format(), u'{}')
   1195         self.assertEqual(u'{{x}}'.format(), u'{x}')
   1196         self.assertEqual(u'{{{0}}}'.format(123), u'{123}')
   1197         self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}')
   1198         self.assertEqual(u'}}{{'.format(), u'}{')
   1199         self.assertEqual(u'}}x{{'.format(), u'}x{')
   1200 
   1201         # weird field names

   1202         self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz')
   1203         self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz')
   1204         self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3')
   1205 
   1206         self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20')
   1207         self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010')
   1208         self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc')
   1209         self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc')
   1210         self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def')
   1211         self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def')
   1212         self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def')
   1213 
   1214         # strings

   1215         self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc')
   1216         self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab')
   1217         self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc')
   1218         self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'')
   1219         self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc')
   1220         self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc')
   1221         self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab')
   1222         self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ')
   1223         self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result')
   1224         self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result')
   1225         self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result')
   1226         self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx')
   1227         self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx')
   1228         self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ')
   1229         self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ')
   1230         self.assertEqual(u'{0:>7s}'.format(u'result'), u' result')
   1231         self.assertEqual(u'{0:>8s}'.format(u'result'), u'  result')
   1232         self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ')
   1233         self.assertEqual(u'{0:^9s}'.format(u'result'), u' result  ')
   1234         self.assertEqual(u'{0:^10s}'.format(u'result'), u'  result  ')
   1235         self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999)
   1236         self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000)
   1237         self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000)
   1238 
   1239         # format specifiers for user defined type

   1240         self.assertEqual(u'{0:abc}'.format(C()), u'abc')
   1241 
   1242         # !r and !s coercions

   1243         self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
   1244         self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
   1245         self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello          ')
   1246         self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello          ')
   1247         self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'")
   1248         self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'")
   1249         self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)')
   1250 
   1251         # test fallback to object.__format__

   1252         self.assertEqual(u'{0}'.format({}), u'{}')
   1253         self.assertEqual(u'{0}'.format([]), u'[]')
   1254         self.assertEqual(u'{0}'.format([1]), u'[1]')
   1255         self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
   1256         self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
   1257         self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
   1258 
   1259         msg = 'object.__format__ with a non-empty format string is deprecated'
   1260         with test_support.check_warnings((msg, PendingDeprecationWarning)):
   1261             self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
   1262             self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
   1263             self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
   1264 
   1265         self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
   1266                                                         month=8,
   1267                                                         day=27)),
   1268                          u"date: 2007-08-27")
   1269 
   1270         # test deriving from a builtin type and overriding __format__

   1271         self.assertEqual(u"{0}".format(J(10)), u"20")
   1272 
   1273 
   1274         # string format specifiers

   1275         self.assertEqual(u'{0:}'.format('a'), u'a')
   1276 
   1277         # computed format specifiers

   1278         self.assertEqual(u"{0:.{1}}".format(u'hello world', 5), u'hello')
   1279         self.assertEqual(u"{0:.{1}s}".format(u'hello world', 5), u'hello')
   1280         self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), u'hello')
   1281         self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), u'hello     ')
   1282         self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), u'hello     ')
   1283 
   1284         # test various errors

   1285         self.assertRaises(ValueError, u'{'.format)
   1286         self.assertRaises(ValueError, u'}'.format)
   1287         self.assertRaises(ValueError, u'a{'.format)
   1288         self.assertRaises(ValueError, u'a}'.format)
   1289         self.assertRaises(ValueError, u'{a'.format)
   1290         self.assertRaises(ValueError, u'}a'.format)
   1291         self.assertRaises(IndexError, u'{0}'.format)
   1292         self.assertRaises(IndexError, u'{1}'.format, u'abc')
   1293         self.assertRaises(KeyError,   u'{x}'.format)
   1294         self.assertRaises(ValueError, u"}{".format)
   1295         self.assertRaises(ValueError, u"{".format)
   1296         self.assertRaises(ValueError, u"}".format)
   1297         self.assertRaises(ValueError, u"abc{0:{}".format)
   1298         self.assertRaises(ValueError, u"{0".format)
   1299         self.assertRaises(IndexError, u"{0.}".format)
   1300         self.assertRaises(ValueError, u"{0.}".format, 0)
   1301         self.assertRaises(IndexError, u"{0[}".format)
   1302         self.assertRaises(ValueError, u"{0[}".format, [])
   1303         self.assertRaises(KeyError,   u"{0]}".format)
   1304         self.assertRaises(ValueError, u"{0.[]}".format, 0)
   1305         self.assertRaises(ValueError, u"{0..foo}".format, 0)
   1306         self.assertRaises(ValueError, u"{0[0}".format, 0)
   1307         self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
   1308         self.assertRaises(KeyError,   u"{c]}".format)
   1309         self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
   1310         self.assertRaises(ValueError, u"{0}}".format, 0)
   1311         self.assertRaises(KeyError,   u"{foo}".format, bar=3)
   1312         self.assertRaises(ValueError, u"{0!x}".format, 3)
   1313         self.assertRaises(ValueError, u"{0!}".format, 0)
   1314         self.assertRaises(ValueError, u"{0!rs}".format, 0)
   1315         self.assertRaises(ValueError, u"{!}".format)
   1316         self.assertRaises(IndexError, u"{:}".format)
   1317         self.assertRaises(IndexError, u"{:s}".format)
   1318         self.assertRaises(IndexError, u"{}".format)
   1319         big = u"23098475029384702983476098230754973209482573"
   1320         self.assertRaises(ValueError, (u"{" + big + u"}").format)
   1321         self.assertRaises(ValueError, (u"{[" + big + u"]}").format, [0])
   1322 
   1323         # issue 6089

   1324         self.assertRaises(ValueError, u"{0[0]x}".format, [None])
   1325         self.assertRaises(ValueError, u"{0[0](10)}".format, [None])
   1326 
   1327         # can't have a replacement on the field name portion

   1328         self.assertRaises(TypeError, u'{0[{1}]}'.format, u'abcdefg', 4)
   1329 
   1330         # exceed maximum recursion depth

   1331         self.assertRaises(ValueError, u"{0:{1:{2}}}".format, u'abc', u's', u'')
   1332         self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
   1333                           0, 1, 2, 3, 4, 5, 6, 7)
   1334 
   1335         # string format spec errors

   1336         self.assertRaises(ValueError, u"{0:-s}".format, u'')
   1337         self.assertRaises(ValueError, format, u"", u"-")
   1338         self.assertRaises(ValueError, u"{0:=s}".format, u'')
   1339 
   1340         # test combining string and unicode

   1341         self.assertEqual(u"foo{0}".format('bar'), u'foobar')
   1342         # This will try to convert the argument from unicode to str, which

   1343         #  will succeed

   1344         self.assertEqual("foo{0}".format(u'bar'), 'foobar')
   1345         # This will try to convert the argument from unicode to str, which

   1346         #  will fail

   1347         self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
   1348 
   1349     def test_format_auto_numbering(self):
   1350         class C:
   1351             def __init__(self, x=100):
   1352                 self._x = x
   1353             def __format__(self, spec):
   1354                 return spec
   1355 
   1356         self.assertEqual(u'{}'.format(10), u'10')
   1357         self.assertEqual(u'{:5}'.format('s'), u's    ')
   1358         self.assertEqual(u'{!r}'.format('s'), u"'s'")
   1359         self.assertEqual(u'{._x}'.format(C(10)), u'10')
   1360         self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
   1361         self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
   1362         self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
   1363 
   1364         self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a    x     b')
   1365         self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
   1366 
   1367         # can't mix and match numbering and auto-numbering

   1368         self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
   1369         self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
   1370         self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
   1371         self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
   1372 
   1373         # can mix and match auto-numbering and named

   1374         self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
   1375         self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
   1376         self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
   1377         self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
   1378 
   1379     def test_raiseMemError(self):
   1380         # Ensure that the freelist contains a consistent object, even

   1381         # when a string allocation fails with a MemoryError.

   1382         # This used to crash the interpreter,

   1383         # or leak references when the number was smaller.

   1384         charwidth = 4 if sys.maxunicode >= 0x10000 else 2
   1385         # Note: sys.maxsize is half of the actual max allocation because of

   1386         # the signedness of Py_ssize_t.

   1387         alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
   1388         self.assertRaises(MemoryError, alloc)
   1389         self.assertRaises(MemoryError, alloc)
   1390 
   1391     def test_format_subclass(self):
   1392         class U(unicode):
   1393             def __unicode__(self):
   1394                 return u'__unicode__ overridden'
   1395         u = U(u'xxx')
   1396         self.assertEqual("%s" % u, u'__unicode__ overridden')
   1397         self.assertEqual("{}".format(u), '__unicode__ overridden')
   1398 
   1399 
   1400 def test_main():
   1401     test_support.run_unittest(__name__)
   1402 
   1403 if __name__ == "__main__":
   1404     test_main()
   1405