Home | History | Annotate | Download | only in test
      1 # -*- coding: koi8-r -*-
      2 
      3 import unittest
      4 from test.support import TESTFN, unlink, unload, rmtree, script_helper, captured_stdout
      5 import importlib
      6 import os
      7 import sys
      8 import subprocess
      9 import tempfile
     10 
     11 class MiscSourceEncodingTest(unittest.TestCase):
     12 
     13     def test_pep263(self):
     14         self.assertEqual(
     15             "".encode("utf-8"),
     16             b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
     17         )
     18         self.assertEqual(
     19             "\".encode("utf-8"),
     20             b'\\\xd0\x9f'
     21         )
     22 
     23     def test_compilestring(self):
     24         # see #1882
     25         c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec")
     26         d = {}
     27         exec(c, d)
     28         self.assertEqual(d['u'], '\xf3')
     29 
     30     def test_issue2301(self):
     31         try:
     32             compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
     33         except SyntaxError as v:
     34             self.assertEqual(v.text, "print '\u5e74'\n")
     35         else:
     36             self.fail()
     37 
     38     def test_issue4626(self):
     39         c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
     40         d = {}
     41         exec(c, d)
     42         self.assertEqual(d['\xc6'], '\xc6')
     43 
     44     def test_issue3297(self):
     45         c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
     46         d = {}
     47         exec(c, d)
     48         self.assertEqual(d['a'], d['b'])
     49         self.assertEqual(len(d['a']), len(d['b']))
     50         self.assertEqual(ascii(d['a']), ascii(d['b']))
     51 
     52     def test_issue7820(self):
     53         # Ensure that check_bom() restores all bytes in the right order if
     54         # check_bom() fails in pydebug mode: a buffer starts with the first
     55         # byte of a valid BOM, but next bytes are different
     56 
     57         # one byte in common with the UTF-16-LE BOM
     58         self.assertRaises(SyntaxError, eval, b'\xff\x20')
     59 
     60         # two bytes in common with the UTF-8 BOM
     61         self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
     62 
     63     def test_20731(self):
     64         sub = subprocess.Popen([sys.executable,
     65                         os.path.join(os.path.dirname(__file__),
     66                                      'coding20731.py')],
     67                         stderr=subprocess.PIPE)
     68         err = sub.communicate()[1]
     69         self.assertEqual(sub.returncode, 0)
     70         self.assertNotIn(b'SyntaxError', err)
     71 
     72     def test_error_message(self):
     73         compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec')
     74         compile(b'\xef\xbb\xbf\n', 'dummy', 'exec')
     75         compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec')
     76         with self.assertRaisesRegex(SyntaxError, 'fake'):
     77             compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec')
     78         with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'):
     79             compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
     80                     'dummy', 'exec')
     81         with self.assertRaisesRegex(SyntaxError, 'BOM'):
     82             compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
     83                     'dummy', 'exec')
     84         with self.assertRaisesRegex(SyntaxError, 'fake'):
     85             compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
     86         with self.assertRaisesRegex(SyntaxError, 'BOM'):
     87             compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
     88 
     89     def test_bad_coding(self):
     90         module_name = 'bad_coding'
     91         self.verify_bad_module(module_name)
     92 
     93     def test_bad_coding2(self):
     94         module_name = 'bad_coding2'
     95         self.verify_bad_module(module_name)
     96 
     97     def verify_bad_module(self, module_name):
     98         self.assertRaises(SyntaxError, __import__, 'test.' + module_name)
     99 
    100         path = os.path.dirname(__file__)
    101         filename = os.path.join(path, module_name + '.py')
    102         with open(filename, "rb") as fp:
    103             bytes = fp.read()
    104         self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
    105 
    106     def test_exec_valid_coding(self):
    107         d = {}
    108         exec(b'# coding: cp949\na = "\xaa\xa7"\n', d)
    109         self.assertEqual(d['a'], '\u3047')
    110 
    111     def test_file_parse(self):
    112         # issue1134: all encodings outside latin-1 and utf-8 fail on
    113         # multiline strings and long lines (>512 columns)
    114         unload(TESTFN)
    115         filename = TESTFN + ".py"
    116         f = open(filename, "w", encoding="cp1252")
    117         sys.path.insert(0, os.curdir)
    118         try:
    119             with f:
    120                 f.write("# -*- coding: cp1252 -*-\n")
    121                 f.write("'''A short string\n")
    122                 f.write("'''\n")
    123                 f.write("'A very long string %s'\n" % ("X" * 1000))
    124 
    125             importlib.invalidate_caches()
    126             __import__(TESTFN)
    127         finally:
    128             del sys.path[0]
    129             unlink(filename)
    130             unlink(filename + "c")
    131             unlink(filename + "o")
    132             unload(TESTFN)
    133             rmtree('__pycache__')
    134 
    135     def test_error_from_string(self):
    136         # See http://bugs.python.org/issue6289
    137         input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8')
    138         with self.assertRaises(SyntaxError) as c:
    139             compile(input, "<string>", "exec")
    140         expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \
    141                    "ordinal not in range(128)"
    142         self.assertTrue(c.exception.args[0].startswith(expected),
    143                         msg=c.exception.args[0])
    144 
    145 
    146 class AbstractSourceEncodingTest:
    147 
    148     def test_default_coding(self):
    149         src = (b'print(ascii("\xc3\xa4"))\n')
    150         self.check_script_output(src, br"'\xe4'")
    151 
    152     def test_first_coding_line(self):
    153         src = (b'#coding:iso8859-15\n'
    154                b'print(ascii("\xc3\xa4"))\n')
    155         self.check_script_output(src, br"'\xc3\u20ac'")
    156 
    157     def test_second_coding_line(self):
    158         src = (b'#\n'
    159                b'#coding:iso8859-15\n'
    160                b'print(ascii("\xc3\xa4"))\n')
    161         self.check_script_output(src, br"'\xc3\u20ac'")
    162 
    163     def test_third_coding_line(self):
    164         # Only first two lines are tested for a magic comment.
    165         src = (b'#\n'
    166                b'#\n'
    167                b'#coding:iso8859-15\n'
    168                b'print(ascii("\xc3\xa4"))\n')
    169         self.check_script_output(src, br"'\xe4'")
    170 
    171     def test_double_coding_line(self):
    172         # If the first line matches the second line is ignored.
    173         src = (b'#coding:iso8859-15\n'
    174                b'#coding:latin1\n'
    175                b'print(ascii("\xc3\xa4"))\n')
    176         self.check_script_output(src, br"'\xc3\u20ac'")
    177 
    178     def test_double_coding_same_line(self):
    179         src = (b'#coding:iso8859-15 coding:latin1\n'
    180                b'print(ascii("\xc3\xa4"))\n')
    181         self.check_script_output(src, br"'\xc3\u20ac'")
    182 
    183     def test_first_non_utf8_coding_line(self):
    184         src = (b'#coding:iso-8859-15 \xa4\n'
    185                b'print(ascii("\xc3\xa4"))\n')
    186         self.check_script_output(src, br"'\xc3\u20ac'")
    187 
    188     def test_second_non_utf8_coding_line(self):
    189         src = (b'\n'
    190                b'#coding:iso-8859-15 \xa4\n'
    191                b'print(ascii("\xc3\xa4"))\n')
    192         self.check_script_output(src, br"'\xc3\u20ac'")
    193 
    194     def test_utf8_bom(self):
    195         src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')
    196         self.check_script_output(src, br"'\xe4'")
    197 
    198     def test_utf8_bom_and_utf8_coding_line(self):
    199         src = (b'\xef\xbb\xbf#coding:utf-8\n'
    200                b'print(ascii("\xc3\xa4"))\n')
    201         self.check_script_output(src, br"'\xe4'")
    202 
    203 
    204 class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
    205 
    206     def check_script_output(self, src, expected):
    207         with captured_stdout() as stdout:
    208             exec(src)
    209         out = stdout.getvalue().encode('latin1')
    210         self.assertEqual(out.rstrip(), expected)
    211 
    212 
    213 class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
    214 
    215     def check_script_output(self, src, expected):
    216         with tempfile.TemporaryDirectory() as tmpd:
    217             fn = os.path.join(tmpd, 'test.py')
    218             with open(fn, 'wb') as fp:
    219                 fp.write(src)
    220             res = script_helper.assert_python_ok(fn)
    221         self.assertEqual(res.out.rstrip(), expected)
    222 
    223 
    224 if __name__ == "__main__":
    225     unittest.main()
    226