Home | History | Annotate | Download | only in test
      1 #
      2 # test_codecencodings_jp.py
      3 #   Codec encoding tests for Japanese encodings.
      4 #
      5 
      6 from test import multibytecodec_support
      7 import unittest
      8 
      9 class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
     10     encoding = 'cp932'
     11     tstring = multibytecodec_support.load_teststring('shift_jis')
     12     codectests = (
     13         # invalid bytes
     14         (b"abc\x81\x00\x81\x00\x82\x84", "strict",  None),
     15         (b"abc\xf8", "strict",  None),
     16         (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
     17         (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
     18         (b"abc\x81\x00\x82\x84", "ignore",  "abc\x00\uff44"),
     19         (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
     20         (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
     21         (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
     22         # sjis vs cp932
     23         (b"\\\x7e", "replace", "\\\x7e"),
     24         (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
     25     )
     26 
     27 euc_commontests = (
     28     # invalid bytes
     29     (b"abc\x80\x80\xc1\xc4", "strict",  None),
     30     (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
     31     (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
     32     (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
     33     (b"abc\xc8", "strict",  None),
     34     (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
     35     (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
     36     (b"\xc1\x64", "strict", None),
     37     (b"\xa1\xc0", "strict", "\uff3c"),
     38     (b"\xa1\xc0\\", "strict", "\uff3c\\"),
     39     (b"\x8eXY", "replace", "\ufffdXY"),
     40 )
     41 
     42 class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
     43                         unittest.TestCase):
     44     encoding = 'euc_jis_2004'
     45     tstring = multibytecodec_support.load_teststring('euc_jisx0213')
     46     codectests = euc_commontests
     47     xmlcharnametest = (
     48         "\xab\u211c\xbb = \u2329\u1234\u232a",
     49         b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
     50     )
     51 
     52 class Test_EUC_JISX0213(multibytecodec_support.TestBase,
     53                         unittest.TestCase):
     54     encoding = 'euc_jisx0213'
     55     tstring = multibytecodec_support.load_teststring('euc_jisx0213')
     56     codectests = euc_commontests
     57     xmlcharnametest = (
     58         "\xab\u211c\xbb = \u2329\u1234\u232a",
     59         b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
     60     )
     61 
     62 class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
     63                          unittest.TestCase):
     64     encoding = 'euc_jp'
     65     tstring = multibytecodec_support.load_teststring('euc_jp')
     66     codectests = euc_commontests + (
     67         ("\xa5", "strict", b"\x5c"),
     68         ("\u203e", "strict", b"\x7e"),
     69     )
     70 
     71 shiftjis_commonenctests = (
     72     (b"abc\x80\x80\x82\x84", "strict",  None),
     73     (b"abc\xf8", "strict",  None),
     74     (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
     75 )
     76 
     77 class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
     78     encoding = 'shift_jis'
     79     tstring = multibytecodec_support.load_teststring('shift_jis')
     80     codectests = shiftjis_commonenctests + (
     81         (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
     82         (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
     83 
     84         (b"\\\x7e", "strict", "\\\x7e"),
     85         (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
     86         (b"abc\x81\x39", "replace",  "abc\ufffd9"),
     87         (b"abc\xEA\xFC", "replace",  "abc\ufffd\ufffd"),
     88         (b"abc\xFF\x58", "replace",  "abc\ufffdX"),
     89     )
     90 
     91 class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
     92     encoding = 'shift_jis_2004'
     93     tstring = multibytecodec_support.load_teststring('shift_jis')
     94     codectests = shiftjis_commonenctests + (
     95         (b"\\\x7e", "strict", "\xa5\u203e"),
     96         (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
     97         (b"abc\xEA\xFC", "strict",  "abc\u64bf"),
     98         (b"\x81\x39xy", "replace",  "\ufffd9xy"),
     99         (b"\xFF\x58xy", "replace",  "\ufffdXxy"),
    100         (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
    101         (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
    102         (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
    103     )
    104     xmlcharnametest = (
    105         "\xab\u211c\xbb = \u2329\u1234\u232a",
    106         b"\x85Gℜ\x85Q = ⟨ሴ⟩"
    107     )
    108 
    109 class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
    110     encoding = 'shift_jisx0213'
    111     tstring = multibytecodec_support.load_teststring('shift_jisx0213')
    112     codectests = shiftjis_commonenctests + (
    113         (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
    114         (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
    115 
    116         # sjis vs cp932
    117         (b"\\\x7e", "replace", "\xa5\u203e"),
    118         (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
    119     )
    120     xmlcharnametest = (
    121         "\xab\u211c\xbb = \u2329\u1234\u232a",
    122         b"\x85Gℜ\x85Q = ⟨ሴ⟩"
    123     )
    124 
    125 if __name__ == "__main__":
    126     unittest.main()
    127