1 # 2 # test_codecencodings_jp.py 3 # Codec encoding tests for Japanese encodings. 4 # 5 6 from test import test_support 7 from test import test_multibytecodec_support 8 import unittest 9 10 class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): 11 encoding = 'cp932' 12 tstring = test_multibytecodec_support.load_teststring('shift_jis') 13 codectests = ( 14 # invalid bytes 15 ("abc\x81\x00\x81\x00\x82\x84", "strict", None), 16 ("abc\xf8", "strict", None), 17 ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"), 18 ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 19 ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"), 20 # sjis vs cp932 21 ("\\\x7e", "replace", u"\\\x7e"), 22 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"), 23 ) 24 25 class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, 26 unittest.TestCase): 27 encoding = 'euc_jisx0213' 28 tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') 29 codectests = ( 30 # invalid bytes 31 ("abc\x80\x80\xc1\xc4", "strict", None), 32 ("abc\xc8", "strict", None), 33 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 34 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 35 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 36 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 37 ("\xc1\x64", "strict", None), 38 ("\xa1\xc0", "strict", u"\uff3c"), 39 ) 40 xmlcharnametest = ( 41 u"\xab\u211c\xbb = \u2329\u1234\u232a", 42 "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 43 ) 44 45 eucjp_commontests = ( 46 ("abc\x80\x80\xc1\xc4", "strict", None), 47 ("abc\xc8", "strict", None), 48 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 49 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 50 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 51 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 52 ("\xc1\x64", "strict", None), 53 ) 54 55 class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, 56 unittest.TestCase): 57 encoding = 'euc_jp' 58 tstring = test_multibytecodec_support.load_teststring('euc_jp') 59 codectests = eucjp_commontests + ( 60 ("\xa1\xc0\\", "strict", u"\uff3c\\"), 61 (u"\xa5", "strict", "\x5c"), 62 (u"\u203e", "strict", "\x7e"), 63 ) 64 65 shiftjis_commonenctests = ( 66 ("abc\x80\x80\x82\x84", "strict", None), 67 ("abc\xf8", "strict", None), 68 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 69 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 70 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 71 ) 72 73 class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): 74 encoding = 'shift_jis' 75 tstring = test_multibytecodec_support.load_teststring('shift_jis') 76 codectests = shiftjis_commonenctests + ( 77 ("\\\x7e", "strict", u"\\\x7e"), 78 ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"), 79 ) 80 81 class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): 82 encoding = 'shift_jisx0213' 83 tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') 84 codectests = ( 85 # invalid bytes 86 ("abc\x80\x80\x82\x84", "strict", None), 87 ("abc\xf8", "strict", None), 88 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 89 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 90 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 91 # sjis vs cp932 92 ("\\\x7e", "replace", u"\xa5\u203e"), 93 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"), 94 ) 95 xmlcharnametest = ( 96 u"\xab\u211c\xbb = \u2329\u1234\u232a", 97 "\x85Gℜ\x85Q = ⟨ሴ⟩" 98 ) 99 100 def test_main(): 101 test_support.run_unittest(__name__) 102 103 if __name__ == "__main__": 104 test_main() 105