Home | History | Annotate | Download | only in test
      1 from test import test_support
      2 import unittest
      3 import codecs
      4 import locale
      5 import sys, StringIO
      6 
      7 def coding_checker(self, coder):
      8     def check(input, expect):
      9         self.assertEqual(coder(input), (expect, len(input)))
     10     return check
     11 
     12 class Queue(object):
     13     """
     14     queue: write bytes at one end, read bytes from the other end
     15     """
     16     def __init__(self):
     17         self._buffer = ""
     18 
     19     def write(self, chars):
     20         self._buffer += chars
     21 
     22     def read(self, size=-1):
     23         if size<0:
     24             s = self._buffer
     25             self._buffer = ""
     26             return s
     27         else:
     28             s = self._buffer[:size]
     29             self._buffer = self._buffer[size:]
     30             return s
     31 
     32 class ReadTest(unittest.TestCase):
     33     def check_partial(self, input, partialresults):
     34         # get a StreamReader for the encoding and feed the bytestring version
     35         # of input to the reader byte by byte. Read everything available from
     36         # the StreamReader and check that the results equal the appropriate
     37         # entries from partialresults.
     38         q = Queue()
     39         r = codecs.getreader(self.encoding)(q)
     40         result = u""
     41         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
     42             q.write(c)
     43             result += r.read()
     44             self.assertEqual(result, partialresult)
     45         # check that there's nothing left in the buffers
     46         self.assertEqual(r.read(), u"")
     47         self.assertEqual(r.bytebuffer, "")
     48         self.assertEqual(r.charbuffer, u"")
     49 
     50         # do the check again, this time using an incremental decoder
     51         d = codecs.getincrementaldecoder(self.encoding)()
     52         result = u""
     53         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
     54             result += d.decode(c)
     55             self.assertEqual(result, partialresult)
     56         # check that there's nothing left in the buffers
     57         self.assertEqual(d.decode("", True), u"")
     58         self.assertEqual(d.buffer, "")
     59 
     60         # Check whether the reset method works properly
     61         d.reset()
     62         result = u""
     63         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
     64             result += d.decode(c)
     65             self.assertEqual(result, partialresult)
     66         # check that there's nothing left in the buffers
     67         self.assertEqual(d.decode("", True), u"")
     68         self.assertEqual(d.buffer, "")
     69 
     70         # check iterdecode()
     71         encoded = input.encode(self.encoding)
     72         self.assertEqual(
     73             input,
     74             u"".join(codecs.iterdecode(encoded, self.encoding))
     75         )
     76 
     77     def test_readline(self):
     78         def getreader(input):
     79             stream = StringIO.StringIO(input.encode(self.encoding))
     80             return codecs.getreader(self.encoding)(stream)
     81 
     82         def readalllines(input, keepends=True, size=None):
     83             reader = getreader(input)
     84             lines = []
     85             while True:
     86                 line = reader.readline(size=size, keepends=keepends)
     87                 if not line:
     88                     break
     89                 lines.append(line)
     90             return "|".join(lines)
     91 
     92         s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
     93         sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
     94         sexpectednoends = u"foo|bar|baz|spam|eggs"
     95         self.assertEqual(readalllines(s, True), sexpected)
     96         self.assertEqual(readalllines(s, False), sexpectednoends)
     97         self.assertEqual(readalllines(s, True, 10), sexpected)
     98         self.assertEqual(readalllines(s, False, 10), sexpectednoends)
     99 
    100         lineends = ("\n", "\r\n", "\r", u"\u2028")
    101         # Test long lines (multiple calls to read() in readline())
    102         vw = []
    103         vwo = []
    104         for (i, lineend) in enumerate(lineends):
    105             vw.append((i*200+200)*u"\u3042" + lineend)
    106             vwo.append((i*200+200)*u"\u3042")
    107         self.assertEqual(readalllines("".join(vw), True), "|".join(vw))
    108         self.assertEqual(readalllines("".join(vw), False), "|".join(vwo))
    109 
    110         # Test lines where the first read might end with \r, so the
    111         # reader has to look ahead whether this is a lone \r or a \r\n
    112         for size in xrange(80):
    113             for lineend in lineends:
    114                 s = 10*(size*u"a" + lineend + u"xxx\n")
    115                 reader = getreader(s)
    116                 for i in xrange(10):
    117                     self.assertEqual(
    118                         reader.readline(keepends=True),
    119                         size*u"a" + lineend,
    120                     )
    121                     self.assertEqual(
    122                         reader.readline(keepends=True),
    123                         "xxx\n",
    124                     )
    125                 reader = getreader(s)
    126                 for i in xrange(10):
    127                     self.assertEqual(
    128                         reader.readline(keepends=False),
    129                         size*u"a",
    130                     )
    131                     self.assertEqual(
    132                         reader.readline(keepends=False),
    133                         "xxx",
    134                     )
    135 
    136     def test_mixed_readline_and_read(self):
    137         lines = ["Humpty Dumpty sat on a wall,\n",
    138                  "Humpty Dumpty had a great fall.\r\n",
    139                  "All the king's horses and all the king's men\r",
    140                  "Couldn't put Humpty together again."]
    141         data = ''.join(lines)
    142         def getreader():
    143             stream = StringIO.StringIO(data.encode(self.encoding))
    144             return codecs.getreader(self.encoding)(stream)
    145 
    146         # Issue #8260: Test readline() followed by read()
    147         f = getreader()
    148         self.assertEqual(f.readline(), lines[0])
    149         self.assertEqual(f.read(), ''.join(lines[1:]))
    150         self.assertEqual(f.read(), '')
    151 
    152         # Issue #32110: Test readline() followed by read(n)
    153         f = getreader()
    154         self.assertEqual(f.readline(), lines[0])
    155         self.assertEqual(f.read(1), lines[1][0])
    156         self.assertEqual(f.read(0), '')
    157         self.assertEqual(f.read(100), data[len(lines[0]) + 1:][:100])
    158 
    159         # Issue #16636: Test readline() followed by readlines()
    160         f = getreader()
    161         self.assertEqual(f.readline(), lines[0])
    162         self.assertEqual(f.readlines(), lines[1:])
    163         self.assertEqual(f.read(), '')
    164 
    165         # Test read(n) followed by read()
    166         f = getreader()
    167         self.assertEqual(f.read(size=40, chars=5), data[:5])
    168         self.assertEqual(f.read(), data[5:])
    169         self.assertEqual(f.read(), '')
    170 
    171         # Issue #32110: Test read(n) followed by read(n)
    172         f = getreader()
    173         self.assertEqual(f.read(size=40, chars=5), data[:5])
    174         self.assertEqual(f.read(1), data[5])
    175         self.assertEqual(f.read(0), '')
    176         self.assertEqual(f.read(100), data[6:106])
    177 
    178         # Issue #12446: Test read(n) followed by readlines()
    179         f = getreader()
    180         self.assertEqual(f.read(size=40, chars=5), data[:5])
    181         self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:])
    182         self.assertEqual(f.read(), '')
    183 
    184     def test_bug1175396(self):
    185         s = [
    186             '<%!--===================================================\r\n',
    187             '    BLOG index page: show recent articles,\r\n',
    188             '    today\'s articles, or articles of a specific date.\r\n',
    189             '========================================================--%>\r\n',
    190             '<%@inputencoding="ISO-8859-1"%>\r\n',
    191             '<%@pagetemplate=TEMPLATE.y%>\r\n',
    192             '<%@import=import frog.util, frog%>\r\n',
    193             '<%@import=import frog.objects%>\r\n',
    194             '<%@import=from frog.storageerrors import StorageError%>\r\n',
    195             '<%\r\n',
    196             '\r\n',
    197             'import logging\r\n',
    198             'log=logging.getLogger("Snakelets.logger")\r\n',
    199             '\r\n',
    200             '\r\n',
    201             'user=self.SessionCtx.user\r\n',
    202             'storageEngine=self.SessionCtx.storageEngine\r\n',
    203             '\r\n',
    204             '\r\n',
    205             'def readArticlesFromDate(date, count=None):\r\n',
    206             '    entryids=storageEngine.listBlogEntries(date)\r\n',
    207             '    entryids.reverse() # descending\r\n',
    208             '    if count:\r\n',
    209             '        entryids=entryids[:count]\r\n',
    210             '    try:\r\n',
    211             '        return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
    212             '    except StorageError,x:\r\n',
    213             '        log.error("Error loading articles: "+str(x))\r\n',
    214             '        self.abort("cannot load articles")\r\n',
    215             '\r\n',
    216             'showdate=None\r\n',
    217             '\r\n',
    218             'arg=self.Request.getArg()\r\n',
    219             'if arg=="today":\r\n',
    220             '    #-------------------- TODAY\'S ARTICLES\r\n',
    221             '    self.write("<h2>Today\'s articles</h2>")\r\n',
    222             '    showdate = frog.util.isodatestr() \r\n',
    223             '    entries = readArticlesFromDate(showdate)\r\n',
    224             'elif arg=="active":\r\n',
    225             '    #-------------------- ACTIVE ARTICLES redirect\r\n',
    226             '    self.Yredirect("active.y")\r\n',
    227             'elif arg=="login":\r\n',
    228             '    #-------------------- LOGIN PAGE redirect\r\n',
    229             '    self.Yredirect("login.y")\r\n',
    230             'elif arg=="date":\r\n',
    231             '    #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
    232             '    showdate = self.Request.getParameter("date")\r\n',
    233             '    self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
    234             '    entries = readArticlesFromDate(showdate)\r\n',
    235             'else:\r\n',
    236             '    #-------------------- RECENT ARTICLES\r\n',
    237             '    self.write("<h2>Recent articles</h2>")\r\n',
    238             '    dates=storageEngine.listBlogEntryDates()\r\n',
    239             '    if dates:\r\n',
    240             '        entries=[]\r\n',
    241             '        SHOWAMOUNT=10\r\n',
    242             '        for showdate in dates:\r\n',
    243             '            entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
    244             '            if len(entries)>=SHOWAMOUNT:\r\n',
    245             '                break\r\n',
    246             '                \r\n',
    247         ]
    248         stream = StringIO.StringIO("".join(s).encode(self.encoding))
    249         reader = codecs.getreader(self.encoding)(stream)
    250         for (i, line) in enumerate(reader):
    251             self.assertEqual(line, s[i])
    252 
    253     def test_readlinequeue(self):
    254         q = Queue()
    255         writer = codecs.getwriter(self.encoding)(q)
    256         reader = codecs.getreader(self.encoding)(q)
    257 
    258         # No lineends
    259         writer.write(u"foo\r")
    260         self.assertEqual(reader.readline(keepends=False), u"foo")
    261         writer.write(u"\nbar\r")
    262         self.assertEqual(reader.readline(keepends=False), u"")
    263         self.assertEqual(reader.readline(keepends=False), u"bar")
    264         writer.write(u"baz")
    265         self.assertEqual(reader.readline(keepends=False), u"baz")
    266         self.assertEqual(reader.readline(keepends=False), u"")
    267 
    268         # Lineends
    269         writer.write(u"foo\r")
    270         self.assertEqual(reader.readline(keepends=True), u"foo\r")
    271         writer.write(u"\nbar\r")
    272         self.assertEqual(reader.readline(keepends=True), u"\n")
    273         self.assertEqual(reader.readline(keepends=True), u"bar\r")
    274         writer.write(u"baz")
    275         self.assertEqual(reader.readline(keepends=True), u"baz")
    276         self.assertEqual(reader.readline(keepends=True), u"")
    277         writer.write(u"foo\r\n")
    278         self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
    279 
    280     def test_bug1098990_a(self):
    281         s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
    282         s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
    283         s3 = u"next line.\r\n"
    284 
    285         s = (s1+s2+s3).encode(self.encoding)
    286         stream = StringIO.StringIO(s)
    287         reader = codecs.getreader(self.encoding)(stream)
    288         self.assertEqual(reader.readline(), s1)
    289         self.assertEqual(reader.readline(), s2)
    290         self.assertEqual(reader.readline(), s3)
    291         self.assertEqual(reader.readline(), u"")
    292 
    293     def test_bug1098990_b(self):
    294         s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
    295         s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
    296         s3 = u"stillokay:bbbbxx\r\n"
    297         s4 = u"broken!!!!badbad\r\n"
    298         s5 = u"againokay.\r\n"
    299 
    300         s = (s1+s2+s3+s4+s5).encode(self.encoding)
    301         stream = StringIO.StringIO(s)
    302         reader = codecs.getreader(self.encoding)(stream)
    303         self.assertEqual(reader.readline(), s1)
    304         self.assertEqual(reader.readline(), s2)
    305         self.assertEqual(reader.readline(), s3)
    306         self.assertEqual(reader.readline(), s4)
    307         self.assertEqual(reader.readline(), s5)
    308         self.assertEqual(reader.readline(), u"")
    309 
    310 class UTF32Test(ReadTest):
    311     encoding = "utf-32"
    312 
    313     spamle = ('\xff\xfe\x00\x00'
    314               's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
    315               's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
    316     spambe = ('\x00\x00\xfe\xff'
    317               '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
    318               '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
    319 
    320     def test_only_one_bom(self):
    321         _,_,reader,writer = codecs.lookup(self.encoding)
    322         # encode some stream
    323         s = StringIO.StringIO()
    324         f = writer(s)
    325         f.write(u"spam")
    326         f.write(u"spam")
    327         d = s.getvalue()
    328         # check whether there is exactly one BOM in it
    329         self.assertTrue(d == self.spamle or d == self.spambe)
    330         # try to read it back
    331         s = StringIO.StringIO(d)
    332         f = reader(s)
    333         self.assertEqual(f.read(), u"spamspam")
    334 
    335     def test_badbom(self):
    336         s = StringIO.StringIO(4*"\xff")
    337         f = codecs.getreader(self.encoding)(s)
    338         self.assertRaises(UnicodeError, f.read)
    339 
    340         s = StringIO.StringIO(8*"\xff")
    341         f = codecs.getreader(self.encoding)(s)
    342         self.assertRaises(UnicodeError, f.read)
    343 
    344     def test_partial(self):
    345         self.check_partial(
    346             u"\x00\xff\u0100\uffff\U00010000",
    347             [
    348                 u"", # first byte of BOM read
    349                 u"", # second byte of BOM read
    350                 u"", # third byte of BOM read
    351                 u"", # fourth byte of BOM read => byteorder known
    352                 u"",
    353                 u"",
    354                 u"",
    355                 u"\x00",
    356                 u"\x00",
    357                 u"\x00",
    358                 u"\x00",
    359                 u"\x00\xff",
    360                 u"\x00\xff",
    361                 u"\x00\xff",
    362                 u"\x00\xff",
    363                 u"\x00\xff\u0100",
    364                 u"\x00\xff\u0100",
    365                 u"\x00\xff\u0100",
    366                 u"\x00\xff\u0100",
    367                 u"\x00\xff\u0100\uffff",
    368                 u"\x00\xff\u0100\uffff",
    369                 u"\x00\xff\u0100\uffff",
    370                 u"\x00\xff\u0100\uffff",
    371                 u"\x00\xff\u0100\uffff\U00010000",
    372             ]
    373         )
    374 
    375     def test_handlers(self):
    376         self.assertEqual((u'\ufffd', 1),
    377                          codecs.utf_32_decode('\x01', 'replace', True))
    378         self.assertEqual((u'', 1),
    379                          codecs.utf_32_decode('\x01', 'ignore', True))
    380 
    381     def test_errors(self):
    382         self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
    383                           "\xff", "strict", True)
    384 
    385     def test_issue8941(self):
    386         # Issue #8941: insufficient result allocation when decoding into
    387         # surrogate pairs on UCS-2 builds.
    388         encoded_le = '\xff\xfe\x00\x00' + '\x00\x00\x01\x00' * 1024
    389         self.assertEqual(u'\U00010000' * 1024,
    390                          codecs.utf_32_decode(encoded_le)[0])
    391         encoded_be = '\x00\x00\xfe\xff' + '\x00\x01\x00\x00' * 1024
    392         self.assertEqual(u'\U00010000' * 1024,
    393                          codecs.utf_32_decode(encoded_be)[0])
    394 
    395 class UTF32LETest(ReadTest):
    396     encoding = "utf-32-le"
    397 
    398     def test_partial(self):
    399         self.check_partial(
    400             u"\x00\xff\u0100\uffff\U00010000",
    401             [
    402                 u"",
    403                 u"",
    404                 u"",
    405                 u"\x00",
    406                 u"\x00",
    407                 u"\x00",
    408                 u"\x00",
    409                 u"\x00\xff",
    410                 u"\x00\xff",
    411                 u"\x00\xff",
    412                 u"\x00\xff",
    413                 u"\x00\xff\u0100",
    414                 u"\x00\xff\u0100",
    415                 u"\x00\xff\u0100",
    416                 u"\x00\xff\u0100",
    417                 u"\x00\xff\u0100\uffff",
    418                 u"\x00\xff\u0100\uffff",
    419                 u"\x00\xff\u0100\uffff",
    420                 u"\x00\xff\u0100\uffff",
    421                 u"\x00\xff\u0100\uffff\U00010000",
    422             ]
    423         )
    424 
    425     def test_simple(self):
    426         self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
    427 
    428     def test_errors(self):
    429         self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
    430                           "\xff", "strict", True)
    431 
    432     def test_issue8941(self):
    433         # Issue #8941: insufficient result allocation when decoding into
    434         # surrogate pairs on UCS-2 builds.
    435         encoded = '\x00\x00\x01\x00' * 1024
    436         self.assertEqual(u'\U00010000' * 1024,
    437                          codecs.utf_32_le_decode(encoded)[0])
    438 
    439 class UTF32BETest(ReadTest):
    440     encoding = "utf-32-be"
    441 
    442     def test_partial(self):
    443         self.check_partial(
    444             u"\x00\xff\u0100\uffff\U00010000",
    445             [
    446                 u"",
    447                 u"",
    448                 u"",
    449                 u"\x00",
    450                 u"\x00",
    451                 u"\x00",
    452                 u"\x00",
    453                 u"\x00\xff",
    454                 u"\x00\xff",
    455                 u"\x00\xff",
    456                 u"\x00\xff",
    457                 u"\x00\xff\u0100",
    458                 u"\x00\xff\u0100",
    459                 u"\x00\xff\u0100",
    460                 u"\x00\xff\u0100",
    461                 u"\x00\xff\u0100\uffff",
    462                 u"\x00\xff\u0100\uffff",
    463                 u"\x00\xff\u0100\uffff",
    464                 u"\x00\xff\u0100\uffff",
    465                 u"\x00\xff\u0100\uffff\U00010000",
    466             ]
    467         )
    468 
    469     def test_simple(self):
    470         self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
    471 
    472     def test_errors(self):
    473         self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
    474                           "\xff", "strict", True)
    475 
    476     def test_issue8941(self):
    477         # Issue #8941: insufficient result allocation when decoding into
    478         # surrogate pairs on UCS-2 builds.
    479         encoded = '\x00\x01\x00\x00' * 1024
    480         self.assertEqual(u'\U00010000' * 1024,
    481                          codecs.utf_32_be_decode(encoded)[0])
    482 
    483 
    484 class UTF16Test(ReadTest):
    485     encoding = "utf-16"
    486 
    487     spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
    488     spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
    489 
    490     def test_only_one_bom(self):
    491         _,_,reader,writer = codecs.lookup(self.encoding)
    492         # encode some stream
    493         s = StringIO.StringIO()
    494         f = writer(s)
    495         f.write(u"spam")
    496         f.write(u"spam")
    497         d = s.getvalue()
    498         # check whether there is exactly one BOM in it
    499         self.assertTrue(d == self.spamle or d == self.spambe)
    500         # try to read it back
    501         s = StringIO.StringIO(d)
    502         f = reader(s)
    503         self.assertEqual(f.read(), u"spamspam")
    504 
    505     def test_badbom(self):
    506         s = StringIO.StringIO("\xff\xff")
    507         f = codecs.getreader(self.encoding)(s)
    508         self.assertRaises(UnicodeError, f.read)
    509 
    510         s = StringIO.StringIO("\xff\xff\xff\xff")
    511         f = codecs.getreader(self.encoding)(s)
    512         self.assertRaises(UnicodeError, f.read)
    513 
    514     def test_partial(self):
    515         self.check_partial(
    516             u"\x00\xff\u0100\uffff\U00010000",
    517             [
    518                 u"", # first byte of BOM read
    519                 u"", # second byte of BOM read => byteorder known
    520                 u"",
    521                 u"\x00",
    522                 u"\x00",
    523                 u"\x00\xff",
    524                 u"\x00\xff",
    525                 u"\x00\xff\u0100",
    526                 u"\x00\xff\u0100",
    527                 u"\x00\xff\u0100\uffff",
    528                 u"\x00\xff\u0100\uffff",
    529                 u"\x00\xff\u0100\uffff",
    530                 u"\x00\xff\u0100\uffff",
    531                 u"\x00\xff\u0100\uffff\U00010000",
    532             ]
    533         )
    534 
    535     def test_handlers(self):
    536         self.assertEqual((u'\ufffd', 1),
    537                          codecs.utf_16_decode('\x01', 'replace', True))
    538         self.assertEqual((u'', 1),
    539                          codecs.utf_16_decode('\x01', 'ignore', True))
    540 
    541     def test_errors(self):
    542         self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
    543 
    544     def test_bug691291(self):
    545         # Files are always opened in binary mode, even if no binary mode was
    546         # specified.  This means that no automatic conversion of '\n' is done
    547         # on reading and writing.
    548         s1 = u'Hello\r\nworld\r\n'
    549 
    550         s = s1.encode(self.encoding)
    551         self.addCleanup(test_support.unlink, test_support.TESTFN)
    552         with open(test_support.TESTFN, 'wb') as fp:
    553             fp.write(s)
    554         with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader:
    555             self.assertEqual(reader.read(), s1)
    556 
    557 class UTF16LETest(ReadTest):
    558     encoding = "utf-16-le"
    559 
    560     def test_partial(self):
    561         self.check_partial(
    562             u"\x00\xff\u0100\uffff\U00010000",
    563             [
    564                 u"",
    565                 u"\x00",
    566                 u"\x00",
    567                 u"\x00\xff",
    568                 u"\x00\xff",
    569                 u"\x00\xff\u0100",
    570                 u"\x00\xff\u0100",
    571                 u"\x00\xff\u0100\uffff",
    572                 u"\x00\xff\u0100\uffff",
    573                 u"\x00\xff\u0100\uffff",
    574                 u"\x00\xff\u0100\uffff",
    575                 u"\x00\xff\u0100\uffff\U00010000",
    576             ]
    577         )
    578 
    579     def test_errors(self):
    580         tests = [
    581             (b'\xff', u'\ufffd'),
    582             (b'A\x00Z', u'A\ufffd'),
    583             (b'A\x00B\x00C\x00D\x00Z', u'ABCD\ufffd'),
    584             (b'\x00\xd8', u'\ufffd'),
    585             (b'\x00\xd8A', u'\ufffd'),
    586             (b'\x00\xd8A\x00', u'\ufffdA'),
    587             (b'\x00\xdcA\x00', u'\ufffdA'),
    588         ]
    589         for raw, expected in tests:
    590             try:
    591                 with self.assertRaises(UnicodeDecodeError):
    592                     codecs.utf_16_le_decode(raw, 'strict', True)
    593                 self.assertEqual(raw.decode('utf-16le', 'replace'), expected)
    594             except:
    595                 print 'raw=%r' % raw
    596                 raise
    597 
    598 class UTF16BETest(ReadTest):
    599     encoding = "utf-16-be"
    600 
    601     def test_partial(self):
    602         self.check_partial(
    603             u"\x00\xff\u0100\uffff\U00010000",
    604             [
    605                 u"",
    606                 u"\x00",
    607                 u"\x00",
    608                 u"\x00\xff",
    609                 u"\x00\xff",
    610                 u"\x00\xff\u0100",
    611                 u"\x00\xff\u0100",
    612                 u"\x00\xff\u0100\uffff",
    613                 u"\x00\xff\u0100\uffff",
    614                 u"\x00\xff\u0100\uffff",
    615                 u"\x00\xff\u0100\uffff",
    616                 u"\x00\xff\u0100\uffff\U00010000",
    617             ]
    618         )
    619 
    620     def test_errors(self):
    621         tests = [
    622             (b'\xff', u'\ufffd'),
    623             (b'\x00A\xff', u'A\ufffd'),
    624             (b'\x00A\x00B\x00C\x00DZ', u'ABCD\ufffd'),
    625             (b'\xd8\x00', u'\ufffd'),
    626             (b'\xd8\x00\xdc', u'\ufffd'),
    627             (b'\xd8\x00\x00A', u'\ufffdA'),
    628             (b'\xdc\x00\x00A', u'\ufffdA'),
    629         ]
    630         for raw, expected in tests:
    631             try:
    632                 with self.assertRaises(UnicodeDecodeError):
    633                     codecs.utf_16_be_decode(raw, 'strict', True)
    634                 self.assertEqual(raw.decode('utf-16be', 'replace'), expected)
    635             except:
    636                 print 'raw=%r' % raw
    637                 raise
    638 
    639 class UTF8Test(ReadTest):
    640     encoding = "utf-8"
    641 
    642     def test_partial(self):
    643         self.check_partial(
    644             u"\x00\xff\u07ff\u0800\uffff\U00010000",
    645             [
    646                 u"\x00",
    647                 u"\x00",
    648                 u"\x00\xff",
    649                 u"\x00\xff",
    650                 u"\x00\xff\u07ff",
    651                 u"\x00\xff\u07ff",
    652                 u"\x00\xff\u07ff",
    653                 u"\x00\xff\u07ff\u0800",
    654                 u"\x00\xff\u07ff\u0800",
    655                 u"\x00\xff\u07ff\u0800",
    656                 u"\x00\xff\u07ff\u0800\uffff",
    657                 u"\x00\xff\u07ff\u0800\uffff",
    658                 u"\x00\xff\u07ff\u0800\uffff",
    659                 u"\x00\xff\u07ff\u0800\uffff",
    660                 u"\x00\xff\u07ff\u0800\uffff\U00010000",
    661             ]
    662         )
    663 
    664 class UTF7Test(ReadTest):
    665     encoding = "utf-7"
    666 
    667     def test_ascii(self):
    668         # Set D (directly encoded characters)
    669         set_d = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    670                  'abcdefghijklmnopqrstuvwxyz'
    671                  '0123456789'
    672                  '\'(),-./:?')
    673         self.assertEqual(set_d.encode(self.encoding), set_d)
    674         self.assertEqual(set_d.decode(self.encoding), set_d)
    675         # Set O (optional direct characters)
    676         set_o = ' !"#$%&*;<=>@[]^_`{|}'
    677         self.assertEqual(set_o.encode(self.encoding), set_o)
    678         self.assertEqual(set_o.decode(self.encoding), set_o)
    679         # +
    680         self.assertEqual(u'a+b'.encode(self.encoding), 'a+-b')
    681         self.assertEqual('a+-b'.decode(self.encoding), u'a+b')
    682         # White spaces
    683         ws = ' \t\n\r'
    684         self.assertEqual(ws.encode(self.encoding), ws)
    685         self.assertEqual(ws.decode(self.encoding), ws)
    686         # Other ASCII characters
    687         other_ascii = ''.join(sorted(set(chr(i) for i in range(0x80)) -
    688                                      set(set_d + set_o + '+' + ws)))
    689         self.assertEqual(other_ascii.encode(self.encoding),
    690                          '+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
    691                          'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')
    692 
    693     def test_partial(self):
    694         self.check_partial(
    695             u"a+-b",
    696             [
    697                 u"a",
    698                 u"a",
    699                 u"a+",
    700                 u"a+-",
    701                 u"a+-b",
    702             ]
    703         )
    704 
    705     def test_errors(self):
    706         tests = [
    707             ('\xe1b', u'\ufffdb'),
    708             ('a\xe1b', u'a\ufffdb'),
    709             ('a\xe1\xe1b', u'a\ufffd\ufffdb'),
    710             ('a+IK', u'a\ufffd'),
    711             ('a+IK-b', u'a\ufffdb'),
    712             ('a+IK,b', u'a\ufffdb'),
    713             ('a+IKx', u'a\u20ac\ufffd'),
    714             ('a+IKx-b', u'a\u20ac\ufffdb'),
    715             ('a+IKwgr', u'a\u20ac\ufffd'),
    716             ('a+IKwgr-b', u'a\u20ac\ufffdb'),
    717             ('a+IKwgr,', u'a\u20ac\ufffd'),
    718             ('a+IKwgr,-b', u'a\u20ac\ufffd-b'),
    719             ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'),
    720             ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'),
    721             ('a+/,+IKw-b', u'a\ufffd\u20acb'),
    722             ('a+//,+IKw-b', u'a\ufffd\u20acb'),
    723             ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
    724             ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
    725             ('a+IKw-b\xe1', u'a\u20acb\ufffd'),
    726             ('a+IKw\xe1b', u'a\u20ac\ufffdb'),
    727         ]
    728         for raw, expected in tests:
    729             try:
    730                 with self.assertRaises(UnicodeDecodeError):
    731                     codecs.utf_7_decode(raw, 'strict', True)
    732                 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
    733             except:
    734                 print 'raw=%r' % raw
    735                 raise
    736 
    737     def test_nonbmp(self):
    738         self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-')
    739         self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-')
    740         self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0')
    741         self.assertEqual('+2AHcoA'.decode(self.encoding), u'\U000104A0')
    742         self.assertEqual(u'\u20ac\U000104A0'.encode(self.encoding), '+IKzYAdyg-')
    743         self.assertEqual('+IKzYAdyg-'.decode(self.encoding), u'\u20ac\U000104A0')
    744         self.assertEqual('+IKzYAdyg'.decode(self.encoding), u'\u20ac\U000104A0')
    745         self.assertEqual(u'\u20ac\u20ac\U000104A0'.encode(self.encoding),
    746                          '+IKwgrNgB3KA-')
    747         self.assertEqual('+IKwgrNgB3KA-'.decode(self.encoding),
    748                          u'\u20ac\u20ac\U000104A0')
    749         self.assertEqual('+IKwgrNgB3KA'.decode(self.encoding),
    750                          u'\u20ac\u20ac\U000104A0')
    751 
    752     def test_lone_surrogates(self):
    753         tests = [
    754             ('a+2AE-b', u'a\ud801b'),
    755             ('a+2AE\xe1b', u'a\ufffdb'),
    756             ('a+2AE', u'a\ufffd'),
    757             ('a+2AEA-b', u'a\ufffdb'),
    758             ('a+2AH-b', u'a\ufffdb'),
    759             ('a+IKzYAQ-b', u'a\u20ac\ud801b'),
    760             ('a+IKzYAQ\xe1b', u'a\u20ac\ufffdb'),
    761             ('a+IKzYAQA-b', u'a\u20ac\ufffdb'),
    762             ('a+IKzYAd-b', u'a\u20ac\ufffdb'),
    763             ('a+IKwgrNgB-b', u'a\u20ac\u20ac\ud801b'),
    764             ('a+IKwgrNgB\xe1b', u'a\u20ac\u20ac\ufffdb'),
    765             ('a+IKwgrNgB', u'a\u20ac\u20ac\ufffd'),
    766             ('a+IKwgrNgBA-b', u'a\u20ac\u20ac\ufffdb'),
    767         ]
    768         for raw, expected in tests:
    769             try:
    770                 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
    771             except:
    772                 print 'raw=%r' % raw
    773                 raise
    774 
    775 class UTF16ExTest(unittest.TestCase):
    776 
    777     def test_errors(self):
    778         self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
    779 
    780     def test_bad_args(self):
    781         self.assertRaises(TypeError, codecs.utf_16_ex_decode)
    782 
    783 class ReadBufferTest(unittest.TestCase):
    784 
    785     def test_array(self):
    786         import array
    787         self.assertEqual(
    788             codecs.readbuffer_encode(array.array("c", "spam")),
    789             ("spam", 4)
    790         )
    791 
    792     def test_empty(self):
    793         self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
    794 
    795     def test_bad_args(self):
    796         self.assertRaises(TypeError, codecs.readbuffer_encode)
    797         self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
    798 
    799 class CharBufferTest(unittest.TestCase):
    800 
    801     def test_string(self):
    802         self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
    803 
    804     def test_empty(self):
    805         self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
    806 
    807     def test_bad_args(self):
    808         self.assertRaises(TypeError, codecs.charbuffer_encode)
    809         self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
    810 
    811 class UTF8SigTest(ReadTest):
    812     encoding = "utf-8-sig"
    813 
    814     def test_partial(self):
    815         self.check_partial(
    816             u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000",
    817             [
    818                 u"",
    819                 u"",
    820                 u"", # First BOM has been read and skipped
    821                 u"",
    822                 u"",
    823                 u"\ufeff", # Second BOM has been read and emitted
    824                 u"\ufeff\x00", # "\x00" read and emitted
    825                 u"\ufeff\x00", # First byte of encoded u"\xff" read
    826                 u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
    827                 u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
    828                 u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
    829                 u"\ufeff\x00\xff\u07ff",
    830                 u"\ufeff\x00\xff\u07ff",
    831                 u"\ufeff\x00\xff\u07ff\u0800",
    832                 u"\ufeff\x00\xff\u07ff\u0800",
    833                 u"\ufeff\x00\xff\u07ff\u0800",
    834                 u"\ufeff\x00\xff\u07ff\u0800\uffff",
    835                 u"\ufeff\x00\xff\u07ff\u0800\uffff",
    836                 u"\ufeff\x00\xff\u07ff\u0800\uffff",
    837                 u"\ufeff\x00\xff\u07ff\u0800\uffff",
    838                 u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000",
    839             ]
    840         )
    841 
    842     def test_bug1601501(self):
    843         # SF bug #1601501: check that the codec works with a buffer
    844         unicode("\xef\xbb\xbf", "utf-8-sig")
    845 
    846     def test_bom(self):
    847         d = codecs.getincrementaldecoder("utf-8-sig")()
    848         s = u"spam"
    849         self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
    850 
    851     def test_stream_bom(self):
    852         unistring = u"ABC\u00A1\u2200XYZ"
    853         bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
    854 
    855         reader = codecs.getreader("utf-8-sig")
    856         for sizehint in [None] + range(1, 11) + \
    857                         [64, 128, 256, 512, 1024]:
    858             istream = reader(StringIO.StringIO(bytestring))
    859             ostream = StringIO.StringIO()
    860             while 1:
    861                 if sizehint is not None:
    862                     data = istream.read(sizehint)
    863                 else:
    864                     data = istream.read()
    865 
    866                 if not data:
    867                     break
    868                 ostream.write(data)
    869 
    870             got = ostream.getvalue()
    871             self.assertEqual(got, unistring)
    872 
    873     def test_stream_bare(self):
    874         unistring = u"ABC\u00A1\u2200XYZ"
    875         bytestring = "ABC\xC2\xA1\xE2\x88\x80XYZ"
    876 
    877         reader = codecs.getreader("utf-8-sig")
    878         for sizehint in [None] + range(1, 11) + \
    879                         [64, 128, 256, 512, 1024]:
    880             istream = reader(StringIO.StringIO(bytestring))
    881             ostream = StringIO.StringIO()
    882             while 1:
    883                 if sizehint is not None:
    884                     data = istream.read(sizehint)
    885                 else:
    886                     data = istream.read()
    887 
    888                 if not data:
    889                     break
    890                 ostream.write(data)
    891 
    892             got = ostream.getvalue()
    893             self.assertEqual(got, unistring)
    894 
    895 class EscapeDecodeTest(unittest.TestCase):
    896     def test_empty(self):
    897         self.assertEqual(codecs.escape_decode(""), ("", 0))
    898 
    899     def test_raw(self):
    900         decode = codecs.escape_decode
    901         for b in range(256):
    902             b = chr(b)
    903             if b != '\\':
    904                 self.assertEqual(decode(b + '0'), (b + '0', 2))
    905 
    906     def test_escape(self):
    907         decode = codecs.escape_decode
    908         check = coding_checker(self, decode)
    909         check(b"[\\\n]", b"[]")
    910         check(br'[\"]', b'["]')
    911         check(br"[\']", b"[']")
    912         check(br"[\\]", br"[\]")
    913         check(br"[\a]", b"[\x07]")
    914         check(br"[\b]", b"[\x08]")
    915         check(br"[\t]", b"[\x09]")
    916         check(br"[\n]", b"[\x0a]")
    917         check(br"[\v]", b"[\x0b]")
    918         check(br"[\f]", b"[\x0c]")
    919         check(br"[\r]", b"[\x0d]")
    920         check(br"[\7]", b"[\x07]")
    921         check(br"[\8]", br"[\8]")
    922         check(br"[\78]", b"[\x078]")
    923         check(br"[\41]", b"[!]")
    924         check(br"[\418]", b"[!8]")
    925         check(br"[\101]", b"[A]")
    926         check(br"[\1010]", b"[A0]")
    927         check(br"[\501]", b"[A]")
    928         check(br"[\x41]", b"[A]")
    929         check(br"[\X41]", br"[\X41]")
    930         check(br"[\x410]", b"[A0]")
    931         for b in range(256):
    932             b = chr(b)
    933             if b not in '\n"\'\\abtnvfr01234567x':
    934                 check('\\' + b, '\\' + b)
    935 
    936     def test_errors(self):
    937         decode = codecs.escape_decode
    938         self.assertRaises(ValueError, decode, br"\x")
    939         self.assertRaises(ValueError, decode, br"[\x]")
    940         self.assertEqual(decode(br"[\x]\x", "ignore"), (b"[]", 6))
    941         self.assertEqual(decode(br"[\x]\x", "replace"), (b"[?]?", 6))
    942         self.assertRaises(ValueError, decode, br"\x0")
    943         self.assertRaises(ValueError, decode, br"[\x0]")
    944         self.assertEqual(decode(br"[\x0]\x0", "ignore"), (b"[]", 8))
    945         self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
    946 
    947 class RecodingTest(unittest.TestCase):
    948     def test_recoding(self):
    949         f = StringIO.StringIO()
    950         f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
    951         f2.write(u"a")
    952         f2.close()
    953         # Python used to crash on this at exit because of a refcount
    954         # bug in _codecsmodule.c
    955 
    956 # From RFC 3492
    957 punycode_testcases = [
    958     # A Arabic (Egyptian):
    959     (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
    960      u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
    961      "egbpdaj6bu4bxfgehfvwxn"),
    962     # B Chinese (simplified):
    963     (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
    964      "ihqwcrb4cv8a8dqg056pqjye"),
    965     # C Chinese (traditional):
    966     (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
    967      "ihqwctvzc91f659drss3x8bo0yb"),
    968     # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
    969     (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
    970      u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
    971      u"\u0065\u0073\u006B\u0079",
    972      "Proprostnemluvesky-uyb24dma41a"),
    973     # E Hebrew:
    974     (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
    975      u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
    976      u"\u05D1\u05E8\u05D9\u05EA",
    977      "4dbcagdahymbxekheh6e0a7fei0b"),
    978     # F Hindi (Devanagari):
    979     (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
    980     u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
    981     u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
    982     u"\u0939\u0948\u0902",
    983     "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
    984 
    985     #(G) Japanese (kanji and hiragana):
    986     (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
    987     u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
    988      "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
    989 
    990     # (H) Korean (Hangul syllables):
    991     (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
    992      u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
    993      u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
    994      "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
    995      "psd879ccm6fea98c"),
    996 
    997     # (I) Russian (Cyrillic):
    998     (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
    999      u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
   1000      u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
   1001      u"\u0438",
   1002      "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
   1003 
   1004     # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
   1005     (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
   1006      u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
   1007      u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
   1008      u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
   1009      u"\u0061\u00F1\u006F\u006C",
   1010      "PorqunopuedensimplementehablarenEspaol-fmd56a"),
   1011 
   1012     # (K) Vietnamese:
   1013     #  T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
   1014     #   <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
   1015     (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
   1016      u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
   1017      u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
   1018      u"\u0056\u0069\u1EC7\u0074",
   1019      "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
   1020 
   1021     #(L) 3<nen>B<gumi><kinpachi><sensei>
   1022     (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
   1023      "3B-ww4c5e180e575a65lsy2b"),
   1024 
   1025     # (M) <amuro><namie>-with-SUPER-MONKEYS
   1026     (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
   1027      u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
   1028      u"\u004F\u004E\u004B\u0045\u0059\u0053",
   1029      "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
   1030 
   1031     # (N) Hello-Another-Way-<sorezore><no><basho>
   1032     (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
   1033      u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
   1034      u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
   1035      "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
   1036 
   1037     # (O) <hitotsu><yane><no><shita>2
   1038     (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
   1039      "2-u9tlzr9756bt3uc0v"),
   1040 
   1041     # (P) Maji<de>Koi<suru>5<byou><mae>
   1042     (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
   1043      u"\u308B\u0035\u79D2\u524D",
   1044      "MajiKoi5-783gue6qz075azm5e"),
   1045 
   1046      # (Q) <pafii>de<runba>
   1047     (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
   1048      "de-jg4avhby1noc0d"),
   1049 
   1050     # (R) <sono><supiido><de>
   1051     (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
   1052      "d9juau41awczczp"),
   1053 
   1054     # (S) -> $1.00 <-
   1055     (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
   1056      u"\u003C\u002D",
   1057      "-> $1.00 <--")
   1058     ]
   1059 
   1060 for i in punycode_testcases:
   1061     if len(i)!=2:
   1062         print repr(i)
   1063 
   1064 class PunycodeTest(unittest.TestCase):
   1065     def test_encode(self):
   1066         for uni, puny in punycode_testcases:
   1067             # Need to convert both strings to lower case, since
   1068             # some of the extended encodings use upper case, but our
   1069             # code produces only lower case. Converting just puny to
   1070             # lower is also insufficient, since some of the input characters
   1071             # are upper case.
   1072             self.assertEqual(uni.encode("punycode").lower(), puny.lower())
   1073 
   1074     def test_decode(self):
   1075         for uni, puny in punycode_testcases:
   1076             self.assertEqual(uni, puny.decode("punycode"))
   1077 
   1078 class UnicodeInternalTest(unittest.TestCase):
   1079     def test_bug1251300(self):
   1080         # Decoding with unicode_internal used to not correctly handle "code
   1081         # points" above 0x10ffff on UCS-4 builds.
   1082         if sys.maxunicode > 0xffff:
   1083             ok = [
   1084                 ("\x00\x10\xff\xff", u"\U0010ffff"),
   1085                 ("\x00\x00\x01\x01", u"\U00000101"),
   1086                 ("", u""),
   1087             ]
   1088             not_ok = [
   1089                 "\x7f\xff\xff\xff",
   1090                 "\x80\x00\x00\x00",
   1091                 "\x81\x00\x00\x00",
   1092                 "\x00",
   1093                 "\x00\x00\x00\x00\x00",
   1094             ]
   1095             for internal, uni in ok:
   1096                 if sys.byteorder == "little":
   1097                     internal = "".join(reversed(internal))
   1098                 self.assertEqual(uni, internal.decode("unicode_internal"))
   1099             for internal in not_ok:
   1100                 if sys.byteorder == "little":
   1101                     internal = "".join(reversed(internal))
   1102                 self.assertRaises(UnicodeDecodeError, internal.decode,
   1103                     "unicode_internal")
   1104 
   1105     def test_decode_error_attributes(self):
   1106         if sys.maxunicode > 0xffff:
   1107             try:
   1108                 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
   1109             except UnicodeDecodeError, ex:
   1110                 self.assertEqual("unicode_internal", ex.encoding)
   1111                 self.assertEqual("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
   1112                 self.assertEqual(4, ex.start)
   1113                 self.assertEqual(8, ex.end)
   1114             else:
   1115                 self.fail()
   1116 
   1117     def test_decode_callback(self):
   1118         if sys.maxunicode > 0xffff:
   1119             codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
   1120             decoder = codecs.getdecoder("unicode_internal")
   1121             ab = u"ab".encode("unicode_internal")
   1122             ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
   1123                 "UnicodeInternalTest")
   1124             self.assertEqual((u"ab", 12), ignored)
   1125 
   1126     def test_encode_length(self):
   1127         # Issue 3739
   1128         encoder = codecs.getencoder("unicode_internal")
   1129         self.assertEqual(encoder(u"a")[1], 1)
   1130         self.assertEqual(encoder(u"\xe9\u0142")[1], 2)
   1131 
   1132         encoder = codecs.getencoder("string-escape")
   1133         self.assertEqual(encoder(r'\x00')[1], 4)
   1134 
   1135 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
   1136 nameprep_tests = [
   1137     # 3.1 Map to nothing.
   1138     ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
   1139      '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
   1140      '\xb8\x8f\xef\xbb\xbf',
   1141      'foobarbaz'),
   1142     # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
   1143     ('CAFE',
   1144      'cafe'),
   1145     # 3.3 Case folding 8bit U+00DF (german sharp s).
   1146     # The original test case is bogus; it says \xc3\xdf
   1147     ('\xc3\x9f',
   1148      'ss'),
   1149     # 3.4 Case folding U+0130 (turkish capital I with dot).
   1150     ('\xc4\xb0',
   1151      'i\xcc\x87'),
   1152     # 3.5 Case folding multibyte U+0143 U+037A.
   1153     ('\xc5\x83\xcd\xba',
   1154      '\xc5\x84 \xce\xb9'),
   1155     # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
   1156     # XXX: skip this as it fails in UCS-2 mode
   1157     #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
   1158     # 'telc\xe2\x88\x95kg\xcf\x83'),
   1159     (None, None),
   1160     # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
   1161     ('j\xcc\x8c\xc2\xa0\xc2\xaa',
   1162      '\xc7\xb0 a'),
   1163     # 3.8 Case folding U+1FB7 and normalization.
   1164     ('\xe1\xbe\xb7',
   1165      '\xe1\xbe\xb6\xce\xb9'),
   1166     # 3.9 Self-reverting case folding U+01F0 and normalization.
   1167     # The original test case is bogus, it says `\xc7\xf0'
   1168     ('\xc7\xb0',
   1169      '\xc7\xb0'),
   1170     # 3.10 Self-reverting case folding U+0390 and normalization.
   1171     ('\xce\x90',
   1172      '\xce\x90'),
   1173     # 3.11 Self-reverting case folding U+03B0 and normalization.
   1174     ('\xce\xb0',
   1175      '\xce\xb0'),
   1176     # 3.12 Self-reverting case folding U+1E96 and normalization.
   1177     ('\xe1\xba\x96',
   1178      '\xe1\xba\x96'),
   1179     # 3.13 Self-reverting case folding U+1F56 and normalization.
   1180     ('\xe1\xbd\x96',
   1181      '\xe1\xbd\x96'),
   1182     # 3.14 ASCII space character U+0020.
   1183     (' ',
   1184      ' '),
   1185     # 3.15 Non-ASCII 8bit space character U+00A0.
   1186     ('\xc2\xa0',
   1187      ' '),
   1188     # 3.16 Non-ASCII multibyte space character U+1680.
   1189     ('\xe1\x9a\x80',
   1190      None),
   1191     # 3.17 Non-ASCII multibyte space character U+2000.
   1192     ('\xe2\x80\x80',
   1193      ' '),
   1194     # 3.18 Zero Width Space U+200b.
   1195     ('\xe2\x80\x8b',
   1196      ''),
   1197     # 3.19 Non-ASCII multibyte space character U+3000.
   1198     ('\xe3\x80\x80',
   1199      ' '),
   1200     # 3.20 ASCII control characters U+0010 U+007F.
   1201     ('\x10\x7f',
   1202      '\x10\x7f'),
   1203     # 3.21 Non-ASCII 8bit control character U+0085.
   1204     ('\xc2\x85',
   1205      None),
   1206     # 3.22 Non-ASCII multibyte control character U+180E.
   1207     ('\xe1\xa0\x8e',
   1208      None),
   1209     # 3.23 Zero Width No-Break Space U+FEFF.
   1210     ('\xef\xbb\xbf',
   1211      ''),
   1212     # 3.24 Non-ASCII control character U+1D175.
   1213     ('\xf0\x9d\x85\xb5',
   1214      None),
   1215     # 3.25 Plane 0 private use character U+F123.
   1216     ('\xef\x84\xa3',
   1217      None),
   1218     # 3.26 Plane 15 private use character U+F1234.
   1219     ('\xf3\xb1\x88\xb4',
   1220      None),
   1221     # 3.27 Plane 16 private use character U+10F234.
   1222     ('\xf4\x8f\x88\xb4',
   1223      None),
   1224     # 3.28 Non-character code point U+8FFFE.
   1225     ('\xf2\x8f\xbf\xbe',
   1226      None),
   1227     # 3.29 Non-character code point U+10FFFF.
   1228     ('\xf4\x8f\xbf\xbf',
   1229      None),
   1230     # 3.30 Surrogate code U+DF42.
   1231     ('\xed\xbd\x82',
   1232      None),
   1233     # 3.31 Non-plain text character U+FFFD.
   1234     ('\xef\xbf\xbd',
   1235      None),
   1236     # 3.32 Ideographic description character U+2FF5.
   1237     ('\xe2\xbf\xb5',
   1238      None),
   1239     # 3.33 Display property character U+0341.
   1240     ('\xcd\x81',
   1241      '\xcc\x81'),
   1242     # 3.34 Left-to-right mark U+200E.
   1243     ('\xe2\x80\x8e',
   1244      None),
   1245     # 3.35 Deprecated U+202A.
   1246     ('\xe2\x80\xaa',
   1247      None),
   1248     # 3.36 Language tagging character U+E0001.
   1249     ('\xf3\xa0\x80\x81',
   1250      None),
   1251     # 3.37 Language tagging character U+E0042.
   1252     ('\xf3\xa0\x81\x82',
   1253      None),
   1254     # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
   1255     ('foo\xd6\xbebar',
   1256      None),
   1257     # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
   1258     ('foo\xef\xb5\x90bar',
   1259      None),
   1260     # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
   1261     ('foo\xef\xb9\xb6bar',
   1262      'foo \xd9\x8ebar'),
   1263     # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
   1264     ('\xd8\xa71',
   1265      None),
   1266     # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
   1267     ('\xd8\xa71\xd8\xa8',
   1268      '\xd8\xa71\xd8\xa8'),
   1269     # 3.43 Unassigned code point U+E0002.
   1270     # Skip this test as we allow unassigned
   1271     #('\xf3\xa0\x80\x82',
   1272     # None),
   1273     (None, None),
   1274     # 3.44 Larger test (shrinking).
   1275     # Original test case reads \xc3\xdf
   1276     ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
   1277      '\xaa\xce\xb0\xe2\x80\x80',
   1278      'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
   1279     # 3.45 Larger test (expanding).
   1280     # Original test case reads \xc3\x9f
   1281     ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
   1282      '\x80',
   1283      'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
   1284      '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
   1285      '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
   1286     ]
   1287 
   1288 
   1289 class NameprepTest(unittest.TestCase):
   1290     def test_nameprep(self):
   1291         from encodings.idna import nameprep
   1292         for pos, (orig, prepped) in enumerate(nameprep_tests):
   1293             if orig is None:
   1294                 # Skipped
   1295                 continue
   1296             # The Unicode strings are given in UTF-8
   1297             orig = unicode(orig, "utf-8")
   1298             if prepped is None:
   1299                 # Input contains prohibited characters
   1300                 self.assertRaises(UnicodeError, nameprep, orig)
   1301             else:
   1302                 prepped = unicode(prepped, "utf-8")
   1303                 try:
   1304                     self.assertEqual(nameprep(orig), prepped)
   1305                 except Exception,e:
   1306                     raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
   1307 
   1308 class IDNACodecTest(unittest.TestCase):
   1309     def test_builtin_decode(self):
   1310         self.assertEqual(unicode("python.org", "idna"), u"python.org")
   1311         self.assertEqual(unicode("python.org.", "idna"), u"python.org.")
   1312         self.assertEqual(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
   1313         self.assertEqual(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
   1314 
   1315     def test_builtin_encode(self):
   1316         self.assertEqual(u"python.org".encode("idna"), "python.org")
   1317         self.assertEqual("python.org.".encode("idna"), "python.org.")
   1318         self.assertEqual(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
   1319         self.assertEqual(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
   1320 
   1321     def test_stream(self):
   1322         import StringIO
   1323         r = codecs.getreader("idna")(StringIO.StringIO("abc"))
   1324         r.read(3)
   1325         self.assertEqual(r.read(), u"")
   1326 
   1327     def test_incremental_decode(self):
   1328         self.assertEqual(
   1329             "".join(codecs.iterdecode("python.org", "idna")),
   1330             u"python.org"
   1331         )
   1332         self.assertEqual(
   1333             "".join(codecs.iterdecode("python.org.", "idna")),
   1334             u"python.org."
   1335         )
   1336         self.assertEqual(
   1337             "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
   1338             u"pyth\xf6n.org."
   1339         )
   1340         self.assertEqual(
   1341             "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
   1342             u"pyth\xf6n.org."
   1343         )
   1344 
   1345         decoder = codecs.getincrementaldecoder("idna")()
   1346         self.assertEqual(decoder.decode("xn--xam", ), u"")
   1347         self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
   1348         self.assertEqual(decoder.decode(u"rg"), u"")
   1349         self.assertEqual(decoder.decode(u"", True), u"org")
   1350 
   1351         decoder.reset()
   1352         self.assertEqual(decoder.decode("xn--xam", ), u"")
   1353         self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
   1354         self.assertEqual(decoder.decode("rg."), u"org.")
   1355         self.assertEqual(decoder.decode("", True), u"")
   1356 
   1357     def test_incremental_encode(self):
   1358         self.assertEqual(
   1359             "".join(codecs.iterencode(u"python.org", "idna")),
   1360             "python.org"
   1361         )
   1362         self.assertEqual(
   1363             "".join(codecs.iterencode(u"python.org.", "idna")),
   1364             "python.org."
   1365         )
   1366         self.assertEqual(
   1367             "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
   1368             "xn--pythn-mua.org."
   1369         )
   1370         self.assertEqual(
   1371             "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
   1372             "xn--pythn-mua.org."
   1373         )
   1374 
   1375         encoder = codecs.getincrementalencoder("idna")()
   1376         self.assertEqual(encoder.encode(u"\xe4x"), "")
   1377         self.assertEqual(encoder.encode(u"ample.org"), "xn--xample-9ta.")
   1378         self.assertEqual(encoder.encode(u"", True), "org")
   1379 
   1380         encoder.reset()
   1381         self.assertEqual(encoder.encode(u"\xe4x"), "")
   1382         self.assertEqual(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
   1383         self.assertEqual(encoder.encode(u"", True), "")
   1384 
   1385 class CodecsModuleTest(unittest.TestCase):
   1386 
   1387     def test_decode(self):
   1388         self.assertEqual(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
   1389                           u'\xe4\xf6\xfc')
   1390         self.assertRaises(TypeError, codecs.decode)
   1391         self.assertEqual(codecs.decode('abc'), u'abc')
   1392         self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
   1393 
   1394     def test_encode(self):
   1395         self.assertEqual(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
   1396                           '\xe4\xf6\xfc')
   1397         self.assertRaises(TypeError, codecs.encode)
   1398         self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
   1399         self.assertEqual(codecs.encode(u'abc'), 'abc')
   1400         self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
   1401 
   1402     def test_register(self):
   1403         self.assertRaises(TypeError, codecs.register)
   1404         self.assertRaises(TypeError, codecs.register, 42)
   1405 
   1406     def test_lookup(self):
   1407         self.assertRaises(TypeError, codecs.lookup)
   1408         self.assertRaises(LookupError, codecs.lookup, "__spam__")
   1409         self.assertRaises(LookupError, codecs.lookup, " ")
   1410 
   1411     def test_getencoder(self):
   1412         self.assertRaises(TypeError, codecs.getencoder)
   1413         self.assertRaises(LookupError, codecs.getencoder, "__spam__")
   1414 
   1415     def test_getdecoder(self):
   1416         self.assertRaises(TypeError, codecs.getdecoder)
   1417         self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
   1418 
   1419     def test_getreader(self):
   1420         self.assertRaises(TypeError, codecs.getreader)
   1421         self.assertRaises(LookupError, codecs.getreader, "__spam__")
   1422 
   1423     def test_getwriter(self):
   1424         self.assertRaises(TypeError, codecs.getwriter)
   1425         self.assertRaises(LookupError, codecs.getwriter, "__spam__")
   1426 
   1427     def test_lookup_issue1813(self):
   1428         # Issue #1813: under Turkish locales, lookup of some codecs failed
   1429         # because 'I' is lowercased as a dotless "i"
   1430         oldlocale = locale.getlocale(locale.LC_CTYPE)
   1431         self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
   1432         try:
   1433             locale.setlocale(locale.LC_CTYPE, 'tr_TR')
   1434         except locale.Error:
   1435             # Unsupported locale on this system
   1436             self.skipTest('test needs Turkish locale')
   1437         c = codecs.lookup('ASCII')
   1438         self.assertEqual(c.name, 'ascii')
   1439 
   1440     def test_all(self):
   1441         api = (
   1442             "encode", "decode",
   1443             "register", "CodecInfo", "Codec", "IncrementalEncoder",
   1444             "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
   1445             "getencoder", "getdecoder", "getincrementalencoder",
   1446             "getincrementaldecoder", "getreader", "getwriter",
   1447             "register_error", "lookup_error",
   1448             "strict_errors", "replace_errors", "ignore_errors",
   1449             "xmlcharrefreplace_errors", "backslashreplace_errors",
   1450             "open", "EncodedFile",
   1451             "iterencode", "iterdecode",
   1452             "BOM", "BOM_BE", "BOM_LE",
   1453             "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
   1454             "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
   1455             "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",  # Undocumented
   1456             "StreamReaderWriter", "StreamRecoder",
   1457         )
   1458         self.assertEqual(sorted(api), sorted(codecs.__all__))
   1459         for api in codecs.__all__:
   1460             getattr(codecs, api)
   1461 
   1462 class StreamReaderTest(unittest.TestCase):
   1463 
   1464     def setUp(self):
   1465         self.reader = codecs.getreader('utf-8')
   1466         self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
   1467 
   1468     def test_readlines(self):
   1469         f = self.reader(self.stream)
   1470         self.assertEqual(f.readlines(), [u'\ud55c\n', u'\uae00'])
   1471 
   1472 class EncodedFileTest(unittest.TestCase):
   1473 
   1474     def test_basic(self):
   1475         f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
   1476         ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
   1477         self.assertEqual(ef.read(), '\\\xd5\n\x00\x00\xae')
   1478 
   1479         f = StringIO.StringIO()
   1480         ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
   1481         ef.write('\xc3\xbc')
   1482         self.assertEqual(f.getvalue(), '\xfc')
   1483 
   1484 class Str2StrTest(unittest.TestCase):
   1485 
   1486     def test_read(self):
   1487         sin = codecs.encode("\x80", "base64_codec")
   1488         reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
   1489         sout = reader.read()
   1490         self.assertEqual(sout, "\x80")
   1491         self.assertIsInstance(sout, str)
   1492 
   1493     def test_readline(self):
   1494         sin = codecs.encode("\x80", "base64_codec")
   1495         reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
   1496         sout = reader.readline()
   1497         self.assertEqual(sout, "\x80")
   1498         self.assertIsInstance(sout, str)
   1499 
   1500 all_unicode_encodings = [
   1501     "ascii",
   1502     "base64_codec",
   1503     "big5",
   1504     "big5hkscs",
   1505     "charmap",
   1506     "cp037",
   1507     "cp1006",
   1508     "cp1026",
   1509     "cp1140",
   1510     "cp1250",
   1511     "cp1251",
   1512     "cp1252",
   1513     "cp1253",
   1514     "cp1254",
   1515     "cp1255",
   1516     "cp1256",
   1517     "cp1257",
   1518     "cp1258",
   1519     "cp424",
   1520     "cp437",
   1521     "cp500",
   1522     "cp720",
   1523     "cp737",
   1524     "cp775",
   1525     "cp850",
   1526     "cp852",
   1527     "cp855",
   1528     "cp856",
   1529     "cp857",
   1530     "cp858",
   1531     "cp860",
   1532     "cp861",
   1533     "cp862",
   1534     "cp863",
   1535     "cp864",
   1536     "cp865",
   1537     "cp866",
   1538     "cp869",
   1539     "cp874",
   1540     "cp875",
   1541     "cp932",
   1542     "cp949",
   1543     "cp950",
   1544     "euc_jis_2004",
   1545     "euc_jisx0213",
   1546     "euc_jp",
   1547     "euc_kr",
   1548     "gb18030",
   1549     "gb2312",
   1550     "gbk",
   1551     "hex_codec",
   1552     "hp_roman8",
   1553     "hz",
   1554     "idna",
   1555     "iso2022_jp",
   1556     "iso2022_jp_1",
   1557     "iso2022_jp_2",
   1558     "iso2022_jp_2004",
   1559     "iso2022_jp_3",
   1560     "iso2022_jp_ext",
   1561     "iso2022_kr",
   1562     "iso8859_1",
   1563     "iso8859_10",
   1564     "iso8859_11",
   1565     "iso8859_13",
   1566     "iso8859_14",
   1567     "iso8859_15",
   1568     "iso8859_16",
   1569     "iso8859_2",
   1570     "iso8859_3",
   1571     "iso8859_4",
   1572     "iso8859_5",
   1573     "iso8859_6",
   1574     "iso8859_7",
   1575     "iso8859_8",
   1576     "iso8859_9",
   1577     "johab",
   1578     "koi8_r",
   1579     "koi8_u",
   1580     "latin_1",
   1581     "mac_cyrillic",
   1582     "mac_greek",
   1583     "mac_iceland",
   1584     "mac_latin2",
   1585     "mac_roman",
   1586     "mac_turkish",
   1587     "palmos",
   1588     "ptcp154",
   1589     "punycode",
   1590     "raw_unicode_escape",
   1591     "rot_13",
   1592     "shift_jis",
   1593     "shift_jis_2004",
   1594     "shift_jisx0213",
   1595     "tis_620",
   1596     "unicode_escape",
   1597     "unicode_internal",
   1598     "utf_16",
   1599     "utf_16_be",
   1600     "utf_16_le",
   1601     "utf_7",
   1602     "utf_8",
   1603 ]
   1604 
   1605 if hasattr(codecs, "mbcs_encode"):
   1606     all_unicode_encodings.append("mbcs")
   1607 
   1608 # The following encodings work only with str, not unicode
   1609 all_string_encodings = [
   1610     "quopri_codec",
   1611     "string_escape",
   1612     "uu_codec",
   1613 ]
   1614 
   1615 # The following encoding is not tested, because it's not supposed
   1616 # to work:
   1617 #    "undefined"
   1618 
   1619 # The following encodings don't work in stateful mode
   1620 broken_unicode_with_streams = [
   1621     "base64_codec",
   1622     "hex_codec",
   1623     "punycode",
   1624     "unicode_internal"
   1625 ]
   1626 broken_incremental_coders = broken_unicode_with_streams[:]
   1627 
   1628 if sys.flags.py3k_warning:
   1629     broken_unicode_with_streams.append("rot_13")
   1630 
   1631 # The following encodings only support "strict" mode
   1632 only_strict_mode = [
   1633     "idna",
   1634     "zlib_codec",
   1635     "bz2_codec",
   1636 ]
   1637 
   1638 try:
   1639     import bz2
   1640 except ImportError:
   1641     pass
   1642 else:
   1643     all_unicode_encodings.append("bz2_codec")
   1644     broken_unicode_with_streams.append("bz2_codec")
   1645 
   1646 try:
   1647     import zlib
   1648 except ImportError:
   1649     pass
   1650 else:
   1651     all_unicode_encodings.append("zlib_codec")
   1652     broken_unicode_with_streams.append("zlib_codec")
   1653 
   1654 class BasicUnicodeTest(unittest.TestCase):
   1655     def test_basics(self):
   1656         s = u"abc123"  # all codecs should be able to encode these
   1657         for encoding in all_unicode_encodings:
   1658             name = codecs.lookup(encoding).name
   1659             if encoding.endswith("_codec"):
   1660                 name += "_codec"
   1661             elif encoding == "latin_1":
   1662                 name = "latin_1"
   1663             self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
   1664             (bytes, size) = codecs.getencoder(encoding)(s)
   1665             self.assertEqual(size, len(s), "encoding=%r" % encoding)
   1666             (chars, size) = codecs.getdecoder(encoding)(bytes)
   1667             self.assertEqual(chars, s, "encoding=%r" % encoding)
   1668 
   1669             if encoding not in broken_unicode_with_streams:
   1670                 # check stream reader/writer
   1671                 q = Queue()
   1672                 writer = codecs.getwriter(encoding)(q)
   1673                 encodedresult = ""
   1674                 for c in s:
   1675                     writer.write(c)
   1676                     encodedresult += q.read()
   1677                 q = Queue()
   1678                 reader = codecs.getreader(encoding)(q)
   1679                 decodedresult = u""
   1680                 for c in encodedresult:
   1681                     q.write(c)
   1682                     decodedresult += reader.read()
   1683                 self.assertEqual(decodedresult, s, "encoding=%r" % encoding)
   1684 
   1685             if encoding not in broken_incremental_coders:
   1686                 # check incremental decoder/encoder and iterencode()/iterdecode()
   1687                 try:
   1688                     encoder = codecs.getincrementalencoder(encoding)()
   1689                 except LookupError:  # no IncrementalEncoder
   1690                     pass
   1691                 else:
   1692                     # check incremental decoder/encoder
   1693                     encodedresult = ""
   1694                     for c in s:
   1695                         encodedresult += encoder.encode(c)
   1696                     encodedresult += encoder.encode(u"", True)
   1697                     decoder = codecs.getincrementaldecoder(encoding)()
   1698                     decodedresult = u""
   1699                     for c in encodedresult:
   1700                         decodedresult += decoder.decode(c)
   1701                     decodedresult += decoder.decode("", True)
   1702                     self.assertEqual(decodedresult, s,
   1703                                      "encoding=%r" % encoding)
   1704 
   1705                     # check iterencode()/iterdecode()
   1706                     result = u"".join(codecs.iterdecode(
   1707                             codecs.iterencode(s, encoding), encoding))
   1708                     self.assertEqual(result, s, "encoding=%r" % encoding)
   1709 
   1710                     # check iterencode()/iterdecode() with empty string
   1711                     result = u"".join(codecs.iterdecode(
   1712                             codecs.iterencode(u"", encoding), encoding))
   1713                     self.assertEqual(result, u"")
   1714 
   1715                 if encoding not in only_strict_mode:
   1716                     # check incremental decoder/encoder with errors argument
   1717                     try:
   1718                         encoder = codecs.getincrementalencoder(encoding)("ignore")
   1719                     except LookupError:  # no IncrementalEncoder
   1720                         pass
   1721                     else:
   1722                         encodedresult = "".join(encoder.encode(c) for c in s)
   1723                         decoder = codecs.getincrementaldecoder(encoding)("ignore")
   1724                         decodedresult = u"".join(decoder.decode(c)
   1725                                                  for c in encodedresult)
   1726                         self.assertEqual(decodedresult, s,
   1727                                          "encoding=%r" % encoding)
   1728 
   1729     @test_support.cpython_only
   1730     def test_basics_capi(self):
   1731         from _testcapi import codec_incrementalencoder, codec_incrementaldecoder
   1732         s = u"abc123"  # all codecs should be able to encode these
   1733         for encoding in all_unicode_encodings:
   1734             if encoding not in broken_incremental_coders:
   1735                 # check incremental decoder/encoder and iterencode()/iterdecode()
   1736                 try:
   1737                     cencoder = codec_incrementalencoder(encoding)
   1738                 except LookupError:  # no IncrementalEncoder
   1739                     pass
   1740                 else:
   1741                     # check C API
   1742                     encodedresult = ""
   1743                     for c in s:
   1744                         encodedresult += cencoder.encode(c)
   1745                     encodedresult += cencoder.encode(u"", True)
   1746                     cdecoder = codec_incrementaldecoder(encoding)
   1747                     decodedresult = u""
   1748                     for c in encodedresult:
   1749                         decodedresult += cdecoder.decode(c)
   1750                     decodedresult += cdecoder.decode("", True)
   1751                     self.assertEqual(decodedresult, s,
   1752                                      "encoding=%r" % encoding)
   1753 
   1754                 if encoding not in only_strict_mode:
   1755                     # check incremental decoder/encoder with errors argument
   1756                     try:
   1757                         cencoder = codec_incrementalencoder(encoding, "ignore")
   1758                     except LookupError:  # no IncrementalEncoder
   1759                         pass
   1760                     else:
   1761                         encodedresult = "".join(cencoder.encode(c) for c in s)
   1762                         cdecoder = codec_incrementaldecoder(encoding, "ignore")
   1763                         decodedresult = u"".join(cdecoder.decode(c)
   1764                                                  for c in encodedresult)
   1765                         self.assertEqual(decodedresult, s,
   1766                                          "encoding=%r" % encoding)
   1767 
   1768     def test_seek(self):
   1769         # all codecs should be able to encode these
   1770         s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
   1771         for encoding in all_unicode_encodings:
   1772             if encoding == "idna": # FIXME: See SF bug #1163178
   1773                 continue
   1774             if encoding in broken_unicode_with_streams:
   1775                 continue
   1776             reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
   1777             for t in xrange(5):
   1778                 # Test that calling seek resets the internal codec state and buffers
   1779                 reader.seek(0, 0)
   1780                 line = reader.readline()
   1781                 self.assertEqual(s[:len(line)], line)
   1782 
   1783     def test_bad_decode_args(self):
   1784         for encoding in all_unicode_encodings:
   1785             decoder = codecs.getdecoder(encoding)
   1786             self.assertRaises(TypeError, decoder)
   1787             if encoding not in ("idna", "punycode"):
   1788                 self.assertRaises(TypeError, decoder, 42)
   1789 
   1790     def test_bad_encode_args(self):
   1791         for encoding in all_unicode_encodings:
   1792             encoder = codecs.getencoder(encoding)
   1793             self.assertRaises(TypeError, encoder)
   1794 
   1795     def test_encoding_map_type_initialized(self):
   1796         from encodings import cp1140
   1797         # This used to crash, we are only verifying there's no crash.
   1798         table_type = type(cp1140.encoding_table)
   1799         self.assertEqual(table_type, table_type)
   1800 
   1801 class BasicStrTest(unittest.TestCase):
   1802     def test_basics(self):
   1803         s = "abc123"
   1804         for encoding in all_string_encodings:
   1805             (bytes, size) = codecs.getencoder(encoding)(s)
   1806             self.assertEqual(size, len(s))
   1807             (chars, size) = codecs.getdecoder(encoding)(bytes)
   1808             self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
   1809 
   1810 class CharmapTest(unittest.TestCase):
   1811     def test_decode_with_string_map(self):
   1812         self.assertEqual(
   1813             codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
   1814             (u"abc", 3)
   1815         )
   1816 
   1817         self.assertRaises(UnicodeDecodeError,
   1818             codecs.charmap_decode, b"\x00\x01\x02", "strict", u"ab"
   1819         )
   1820 
   1821         self.assertRaises(UnicodeDecodeError,
   1822             codecs.charmap_decode, "\x00\x01\x02", "strict", u"ab\ufffe"
   1823         )
   1824 
   1825         self.assertEqual(
   1826             codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
   1827             (u"ab\ufffd", 3)
   1828         )
   1829 
   1830         self.assertEqual(
   1831             codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
   1832             (u"ab\ufffd", 3)
   1833         )
   1834 
   1835         self.assertEqual(
   1836             codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
   1837             (u"ab", 3)
   1838         )
   1839 
   1840         self.assertEqual(
   1841             codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
   1842             (u"ab", 3)
   1843         )
   1844 
   1845         allbytes = "".join(chr(i) for i in xrange(256))
   1846         self.assertEqual(
   1847             codecs.charmap_decode(allbytes, "ignore", u""),
   1848             (u"", len(allbytes))
   1849         )
   1850 
   1851     def test_decode_with_int2str_map(self):
   1852         self.assertEqual(
   1853             codecs.charmap_decode("\x00\x01\x02", "strict",
   1854                                   {0: u'a', 1: u'b', 2: u'c'}),
   1855             (u"abc", 3)
   1856         )
   1857 
   1858         self.assertEqual(
   1859             codecs.charmap_decode("\x00\x01\x02", "strict",
   1860                                   {0: u'Aa', 1: u'Bb', 2: u'Cc'}),
   1861             (u"AaBbCc", 3)
   1862         )
   1863 
   1864         self.assertEqual(
   1865             codecs.charmap_decode("\x00\x01\x02", "strict",
   1866                                   {0: u'\U0010FFFF', 1: u'b', 2: u'c'}),
   1867             (u"\U0010FFFFbc", 3)
   1868         )
   1869 
   1870         self.assertEqual(
   1871             codecs.charmap_decode("\x00\x01\x02", "strict",
   1872                                   {0: u'a', 1: u'b', 2: u''}),
   1873             (u"ab", 3)
   1874         )
   1875 
   1876         self.assertRaises(UnicodeDecodeError,
   1877             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1878                                    {0: u'a', 1: u'b'}
   1879         )
   1880 
   1881         self.assertRaises(UnicodeDecodeError,
   1882             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1883                                    {0: u'a', 1: u'b', 2: None}
   1884         )
   1885 
   1886         # Issue #14850
   1887         self.assertRaises(UnicodeDecodeError,
   1888             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1889                                    {0: u'a', 1: u'b', 2: u'\ufffe'}
   1890         )
   1891 
   1892         self.assertEqual(
   1893             codecs.charmap_decode("\x00\x01\x02", "replace",
   1894                                   {0: u'a', 1: u'b'}),
   1895             (u"ab\ufffd", 3)
   1896         )
   1897 
   1898         self.assertEqual(
   1899             codecs.charmap_decode("\x00\x01\x02", "replace",
   1900                                   {0: u'a', 1: u'b', 2: None}),
   1901             (u"ab\ufffd", 3)
   1902         )
   1903 
   1904         # Issue #14850
   1905         self.assertEqual(
   1906             codecs.charmap_decode("\x00\x01\x02", "replace",
   1907                                   {0: u'a', 1: u'b', 2: u'\ufffe'}),
   1908             (u"ab\ufffd", 3)
   1909         )
   1910 
   1911         self.assertEqual(
   1912             codecs.charmap_decode("\x00\x01\x02", "ignore",
   1913                                   {0: u'a', 1: u'b'}),
   1914             (u"ab", 3)
   1915         )
   1916 
   1917         self.assertEqual(
   1918             codecs.charmap_decode("\x00\x01\x02", "ignore",
   1919                                   {0: u'a', 1: u'b', 2: None}),
   1920             (u"ab", 3)
   1921         )
   1922 
   1923         # Issue #14850
   1924         self.assertEqual(
   1925             codecs.charmap_decode("\x00\x01\x02", "ignore",
   1926                                   {0: u'a', 1: u'b', 2: u'\ufffe'}),
   1927             (u"ab", 3)
   1928         )
   1929 
   1930         allbytes = "".join(chr(i) for i in xrange(256))
   1931         self.assertEqual(
   1932             codecs.charmap_decode(allbytes, "ignore", {}),
   1933             (u"", len(allbytes))
   1934         )
   1935 
   1936     def test_decode_with_int2int_map(self):
   1937         a = ord(u'a')
   1938         b = ord(u'b')
   1939         c = ord(u'c')
   1940 
   1941         self.assertEqual(
   1942             codecs.charmap_decode("\x00\x01\x02", "strict",
   1943                                   {0: a, 1: b, 2: c}),
   1944             (u"abc", 3)
   1945         )
   1946 
   1947         # Issue #15379
   1948         self.assertEqual(
   1949             codecs.charmap_decode("\x00\x01\x02", "strict",
   1950                                   {0: 0x10FFFF, 1: b, 2: c}),
   1951             (u"\U0010FFFFbc", 3)
   1952         )
   1953 
   1954         self.assertRaises(TypeError,
   1955             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1956                                    {0: 0x110000, 1: b, 2: c}
   1957         )
   1958 
   1959         self.assertRaises(UnicodeDecodeError,
   1960             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1961                                    {0: a, 1: b},
   1962         )
   1963 
   1964         self.assertRaises(UnicodeDecodeError,
   1965             codecs.charmap_decode, "\x00\x01\x02", "strict",
   1966                                    {0: a, 1: b, 2: 0xFFFE},
   1967         )
   1968 
   1969         self.assertEqual(
   1970             codecs.charmap_decode("\x00\x01\x02", "replace",
   1971                                   {0: a, 1: b}),
   1972             (u"ab\ufffd", 3)
   1973         )
   1974 
   1975         self.assertEqual(
   1976             codecs.charmap_decode("\x00\x01\x02", "replace",
   1977                                   {0: a, 1: b, 2: 0xFFFE}),
   1978             (u"ab\ufffd", 3)
   1979         )
   1980 
   1981         self.assertEqual(
   1982             codecs.charmap_decode("\x00\x01\x02", "ignore",
   1983                                   {0: a, 1: b}),
   1984             (u"ab", 3)
   1985         )
   1986 
   1987         self.assertEqual(
   1988             codecs.charmap_decode("\x00\x01\x02", "ignore",
   1989                                   {0: a, 1: b, 2: 0xFFFE}),
   1990             (u"ab", 3)
   1991         )
   1992 
   1993 
   1994 class WithStmtTest(unittest.TestCase):
   1995     def test_encodedfile(self):
   1996         f = StringIO.StringIO("\xc3\xbc")
   1997         with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
   1998             self.assertEqual(ef.read(), "\xfc")
   1999 
   2000     def test_streamreaderwriter(self):
   2001         f = StringIO.StringIO("\xc3\xbc")
   2002         info = codecs.lookup("utf-8")
   2003         with codecs.StreamReaderWriter(f, info.streamreader,
   2004                                        info.streamwriter, 'strict') as srw:
   2005             self.assertEqual(srw.read(), u"\xfc")
   2006 
   2007 
   2008 class UnicodeEscapeTest(unittest.TestCase):
   2009     def test_empty(self):
   2010         self.assertEqual(codecs.unicode_escape_encode(u""), ("", 0))
   2011         self.assertEqual(codecs.unicode_escape_decode(""), (u"", 0))
   2012 
   2013     def test_raw_encode(self):
   2014         encode = codecs.unicode_escape_encode
   2015         for b in range(32, 127):
   2016             if b != ord('\\'):
   2017                 self.assertEqual(encode(unichr(b)), (chr(b), 1))
   2018 
   2019     def test_raw_decode(self):
   2020         decode = codecs.unicode_escape_decode
   2021         for b in range(256):
   2022             if b != ord('\\'):
   2023                 self.assertEqual(decode(chr(b) + '0'), (unichr(b) + u'0', 2))
   2024 
   2025     def test_escape_encode(self):
   2026         encode = codecs.unicode_escape_encode
   2027         check = coding_checker(self, encode)
   2028         check(u'\t', r'\t')
   2029         check(u'\n', r'\n')
   2030         check(u'\r', r'\r')
   2031         check(u'\\', r'\\')
   2032         for b in range(32):
   2033             if chr(b) not in '\t\n\r':
   2034                 check(unichr(b), '\\x%02x' % b)
   2035         for b in range(127, 256):
   2036             check(unichr(b), '\\x%02x' % b)
   2037         check(u'\u20ac', r'\u20ac')
   2038         check(u'\U0001d120', r'\U0001d120')
   2039 
   2040     def test_escape_decode(self):
   2041         decode = codecs.unicode_escape_decode
   2042         check = coding_checker(self, decode)
   2043         check("[\\\n]", u"[]")
   2044         check(r'[\"]', u'["]')
   2045         check(r"[\']", u"[']")
   2046         check(r"[\\]", ur"[\]")
   2047         check(r"[\a]", u"[\x07]")
   2048         check(r"[\b]", u"[\x08]")
   2049         check(r"[\t]", u"[\x09]")
   2050         check(r"[\n]", u"[\x0a]")
   2051         check(r"[\v]", u"[\x0b]")
   2052         check(r"[\f]", u"[\x0c]")
   2053         check(r"[\r]", u"[\x0d]")
   2054         check(r"[\7]", u"[\x07]")
   2055         check(r"[\8]", ur"[\8]")
   2056         check(r"[\78]", u"[\x078]")
   2057         check(r"[\41]", u"[!]")
   2058         check(r"[\418]", u"[!8]")
   2059         check(r"[\101]", u"[A]")
   2060         check(r"[\1010]", u"[A0]")
   2061         check(r"[\x41]", u"[A]")
   2062         check(r"[\x410]", u"[A0]")
   2063         check(r"\u20ac", u"\u20ac")
   2064         check(r"\U0001d120", u"\U0001d120")
   2065         for b in range(256):
   2066             if chr(b) not in '\n"\'\\abtnvfr01234567xuUN':
   2067                 check('\\' + chr(b), u'\\' + unichr(b))
   2068 
   2069     def test_decode_errors(self):
   2070         decode = codecs.unicode_escape_decode
   2071         for c, d in ('x', 2), ('u', 4), ('U', 4):
   2072             for i in range(d):
   2073                 self.assertRaises(UnicodeDecodeError, decode,
   2074                                   "\\" + c + "0"*i)
   2075                 self.assertRaises(UnicodeDecodeError, decode,
   2076                                   "[\\" + c + "0"*i + "]")
   2077                 data = "[\\" + c + "0"*i + "]\\" + c + "0"*i
   2078                 self.assertEqual(decode(data, "ignore"), (u"[]", len(data)))
   2079                 self.assertEqual(decode(data, "replace"),
   2080                                  (u"[\ufffd]\ufffd", len(data)))
   2081         self.assertRaises(UnicodeDecodeError, decode, r"\U00110000")
   2082         self.assertEqual(decode(r"\U00110000", "ignore"), (u"", 10))
   2083         self.assertEqual(decode(r"\U00110000", "replace"), (u"\ufffd", 10))
   2084 
   2085 
   2086 class RawUnicodeEscapeTest(unittest.TestCase):
   2087     def test_empty(self):
   2088         self.assertEqual(codecs.raw_unicode_escape_encode(u""), ("", 0))
   2089         self.assertEqual(codecs.raw_unicode_escape_decode(""), (u"", 0))
   2090 
   2091     def test_raw_encode(self):
   2092         encode = codecs.raw_unicode_escape_encode
   2093         for b in range(256):
   2094             self.assertEqual(encode(unichr(b)), (chr(b), 1))
   2095 
   2096     def test_raw_decode(self):
   2097         decode = codecs.raw_unicode_escape_decode
   2098         for b in range(256):
   2099             self.assertEqual(decode(chr(b) + '0'), (unichr(b) + u'0', 2))
   2100 
   2101     def test_escape_encode(self):
   2102         encode = codecs.raw_unicode_escape_encode
   2103         check = coding_checker(self, encode)
   2104         for b in range(256):
   2105             if chr(b) not in 'uU':
   2106                 check(u'\\' + unichr(b), '\\' + chr(b))
   2107         check(u'\u20ac', r'\u20ac')
   2108         check(u'\U0001d120', r'\U0001d120')
   2109 
   2110     def test_escape_decode(self):
   2111         decode = codecs.raw_unicode_escape_decode
   2112         check = coding_checker(self, decode)
   2113         for b in range(256):
   2114             if chr(b) not in 'uU':
   2115                 check('\\' + chr(b), u'\\' + unichr(b))
   2116         check(r"\u20ac", u"\u20ac")
   2117         check(r"\U0001d120", u"\U0001d120")
   2118 
   2119     def test_decode_errors(self):
   2120         decode = codecs.raw_unicode_escape_decode
   2121         for c, d in ('u', 4), ('U', 4):
   2122             for i in range(d):
   2123                 self.assertRaises(UnicodeDecodeError, decode,
   2124                                   "\\" + c + "0"*i)
   2125                 self.assertRaises(UnicodeDecodeError, decode,
   2126                                   "[\\" + c + "0"*i + "]")
   2127                 data = "[\\" + c + "0"*i + "]\\" + c + "0"*i
   2128                 self.assertEqual(decode(data, "ignore"), (u"[]", len(data)))
   2129                 self.assertEqual(decode(data, "replace"),
   2130                                  (u"[\ufffd]\ufffd", len(data)))
   2131         self.assertRaises(UnicodeDecodeError, decode, r"\U00110000")
   2132         self.assertEqual(decode(r"\U00110000", "ignore"), (u"", 10))
   2133         self.assertEqual(decode(r"\U00110000", "replace"), (u"\ufffd", 10))
   2134 
   2135 
   2136 class BomTest(unittest.TestCase):
   2137     def test_seek0(self):
   2138         data = u"1234567890"
   2139         tests = ("utf-16",
   2140                  "utf-16-le",
   2141                  "utf-16-be",
   2142                  "utf-32",
   2143                  "utf-32-le",
   2144                  "utf-32-be")
   2145         self.addCleanup(test_support.unlink, test_support.TESTFN)
   2146         for encoding in tests:
   2147             # Check if the BOM is written only once
   2148             with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
   2149                 f.write(data)
   2150                 f.write(data)
   2151                 f.seek(0)
   2152                 self.assertEqual(f.read(), data * 2)
   2153                 f.seek(0)
   2154                 self.assertEqual(f.read(), data * 2)
   2155 
   2156             # Check that the BOM is written after a seek(0)
   2157             with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
   2158                 f.write(data[0])
   2159                 self.assertNotEqual(f.tell(), 0)
   2160                 f.seek(0)
   2161                 f.write(data)
   2162                 f.seek(0)
   2163                 self.assertEqual(f.read(), data)
   2164 
   2165             # (StreamWriter) Check that the BOM is written after a seek(0)
   2166             with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
   2167                 f.writer.write(data[0])
   2168                 self.assertNotEqual(f.writer.tell(), 0)
   2169                 f.writer.seek(0)
   2170                 f.writer.write(data)
   2171                 f.seek(0)
   2172                 self.assertEqual(f.read(), data)
   2173 
   2174             # Check that the BOM is not written after a seek() at a position
   2175             # different than the start
   2176             with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
   2177                 f.write(data)
   2178                 f.seek(f.tell())
   2179                 f.write(data)
   2180                 f.seek(0)
   2181                 self.assertEqual(f.read(), data * 2)
   2182 
   2183             # (StreamWriter) Check that the BOM is not written after a seek()
   2184             # at a position different than the start
   2185             with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
   2186                 f.writer.write(data)
   2187                 f.writer.seek(f.writer.tell())
   2188                 f.writer.write(data)
   2189                 f.seek(0)
   2190                 self.assertEqual(f.read(), data * 2)
   2191 
   2192 
   2193 class TransformCodecTest(unittest.TestCase):
   2194 
   2195     def test_quopri_stateless(self):
   2196         # Should encode with quotetabs=True
   2197         encoded = codecs.encode(b"space tab\teol \n", "quopri-codec")
   2198         self.assertEqual(encoded, b"space=20tab=09eol=20\n")
   2199         # But should still support unescaped tabs and spaces
   2200         unescaped = b"space tab eol\n"
   2201         self.assertEqual(codecs.decode(unescaped, "quopri-codec"), unescaped)
   2202 
   2203     def test_uu_invalid(self):
   2204         # Missing "begin" line
   2205         self.assertRaises(ValueError, codecs.decode, "", "uu-codec")
   2206 
   2207 
   2208 def test_main():
   2209     test_support.run_unittest(
   2210         UTF32Test,
   2211         UTF32LETest,
   2212         UTF32BETest,
   2213         UTF16Test,
   2214         UTF16LETest,
   2215         UTF16BETest,
   2216         UTF8Test,
   2217         UTF8SigTest,
   2218         UTF7Test,
   2219         UTF16ExTest,
   2220         ReadBufferTest,
   2221         CharBufferTest,
   2222         EscapeDecodeTest,
   2223         RecodingTest,
   2224         PunycodeTest,
   2225         UnicodeInternalTest,
   2226         NameprepTest,
   2227         IDNACodecTest,
   2228         CodecsModuleTest,
   2229         StreamReaderTest,
   2230         EncodedFileTest,
   2231         Str2StrTest,
   2232         BasicUnicodeTest,
   2233         BasicStrTest,
   2234         CharmapTest,
   2235         WithStmtTest,
   2236         UnicodeEscapeTest,
   2237         RawUnicodeEscapeTest,
   2238         BomTest,
   2239         TransformCodecTest,
   2240     )
   2241 
   2242 
   2243 if __name__ == "__main__":
   2244     test_main()
   2245