1 # 2 # test_codecencodings_jp.py 3 # Codec encoding tests for Japanese encodings. 4 # 5 6 from test import multibytecodec_support 7 import unittest 8 9 class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase): 10 encoding = 'cp932' 11 tstring = multibytecodec_support.load_teststring('shift_jis') 12 codectests = ( 13 # invalid bytes 14 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), 15 (b"abc\xf8", "strict", None), 16 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), 17 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), 18 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), 19 (b"ab\xEBxy", "replace", "ab\uFFFDxy"), 20 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), 21 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), 22 # sjis vs cp932 23 (b"\\\x7e", "replace", "\\\x7e"), 24 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), 25 ) 26 27 euc_commontests = ( 28 # invalid bytes 29 (b"abc\x80\x80\xc1\xc4", "strict", None), 30 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"), 31 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"), 32 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), 33 (b"abc\xc8", "strict", None), 34 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"), 35 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"), 36 (b"\xc1\x64", "strict", None), 37 (b"\xa1\xc0", "strict", "\uff3c"), 38 (b"\xa1\xc0\\", "strict", "\uff3c\\"), 39 (b"\x8eXY", "replace", "\ufffdXY"), 40 ) 41 42 class Test_EUC_JIS_2004(multibytecodec_support.TestBase, 43 unittest.TestCase): 44 encoding = 'euc_jis_2004' 45 tstring = multibytecodec_support.load_teststring('euc_jisx0213') 46 codectests = euc_commontests 47 xmlcharnametest = ( 48 "\xab\u211c\xbb = \u2329\u1234\u232a", 49 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 50 ) 51 52 class Test_EUC_JISX0213(multibytecodec_support.TestBase, 53 unittest.TestCase): 54 encoding = 'euc_jisx0213' 55 tstring = multibytecodec_support.load_teststring('euc_jisx0213') 56 codectests = euc_commontests 57 xmlcharnametest = ( 58 "\xab\u211c\xbb = \u2329\u1234\u232a", 59 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 60 ) 61 62 class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase, 63 unittest.TestCase): 64 encoding = 'euc_jp' 65 tstring = multibytecodec_support.load_teststring('euc_jp') 66 codectests = euc_commontests + ( 67 ("\xa5", "strict", b"\x5c"), 68 ("\u203e", "strict", b"\x7e"), 69 ) 70 71 shiftjis_commonenctests = ( 72 (b"abc\x80\x80\x82\x84", "strict", None), 73 (b"abc\xf8", "strict", None), 74 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), 75 ) 76 77 class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase): 78 encoding = 'shift_jis' 79 tstring = multibytecodec_support.load_teststring('shift_jis') 80 codectests = shiftjis_commonenctests + ( 81 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), 82 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), 83 84 (b"\\\x7e", "strict", "\\\x7e"), 85 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), 86 (b"abc\x81\x39", "replace", "abc\ufffd9"), 87 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), 88 (b"abc\xFF\x58", "replace", "abc\ufffdX"), 89 ) 90 91 class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase): 92 encoding = 'shift_jis_2004' 93 tstring = multibytecodec_support.load_teststring('shift_jis') 94 codectests = shiftjis_commonenctests + ( 95 (b"\\\x7e", "strict", "\xa5\u203e"), 96 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), 97 (b"abc\xEA\xFC", "strict", "abc\u64bf"), 98 (b"\x81\x39xy", "replace", "\ufffd9xy"), 99 (b"\xFF\x58xy", "replace", "\ufffdXxy"), 100 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), 101 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), 102 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), 103 ) 104 xmlcharnametest = ( 105 "\xab\u211c\xbb = \u2329\u1234\u232a", 106 b"\x85Gℜ\x85Q = ⟨ሴ⟩" 107 ) 108 109 class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase): 110 encoding = 'shift_jisx0213' 111 tstring = multibytecodec_support.load_teststring('shift_jisx0213') 112 codectests = shiftjis_commonenctests + ( 113 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), 114 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), 115 116 # sjis vs cp932 117 (b"\\\x7e", "replace", "\xa5\u203e"), 118 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), 119 ) 120 xmlcharnametest = ( 121 "\xab\u211c\xbb = \u2329\u1234\u232a", 122 b"\x85Gℜ\x85Q = ⟨ሴ⟩" 123 ) 124 125 if __name__ == "__main__": 126 unittest.main() 127