1 #!/usr/bin/env python 2 # 3 # test_codecencodings_jp.py 4 # Codec encoding tests for Japanese encodings. 5 # 6 7 from test import test_support 8 from test import test_multibytecodec_support 9 import unittest 10 11 class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): 12 encoding = 'cp932' 13 tstring = test_multibytecodec_support.load_teststring('shift_jis') 14 codectests = ( 15 # invalid bytes 16 ("abc\x81\x00\x81\x00\x82\x84", "strict", None), 17 ("abc\xf8", "strict", None), 18 ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"), 19 ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 20 ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"), 21 # sjis vs cp932 22 ("\\\x7e", "replace", u"\\\x7e"), 23 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"), 24 ) 25 26 class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, 27 unittest.TestCase): 28 encoding = 'euc_jisx0213' 29 tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') 30 codectests = ( 31 # invalid bytes 32 ("abc\x80\x80\xc1\xc4", "strict", None), 33 ("abc\xc8", "strict", None), 34 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 35 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 36 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 37 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 38 ("\xc1\x64", "strict", None), 39 ("\xa1\xc0", "strict", u"\uff3c"), 40 ) 41 xmlcharnametest = ( 42 u"\xab\u211c\xbb = \u2329\u1234\u232a", 43 "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 44 ) 45 46 eucjp_commontests = ( 47 ("abc\x80\x80\xc1\xc4", "strict", None), 48 ("abc\xc8", "strict", None), 49 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 50 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 51 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 52 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 53 ("\xc1\x64", "strict", None), 54 ) 55 56 class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, 57 unittest.TestCase): 58 encoding = 'euc_jp' 59 tstring = test_multibytecodec_support.load_teststring('euc_jp') 60 codectests = eucjp_commontests + ( 61 ("\xa1\xc0\\", "strict", u"\uff3c\\"), 62 (u"\xa5", "strict", "\x5c"), 63 (u"\u203e", "strict", "\x7e"), 64 ) 65 66 shiftjis_commonenctests = ( 67 ("abc\x80\x80\x82\x84", "strict", None), 68 ("abc\xf8", "strict", None), 69 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 70 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 71 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 72 ) 73 74 class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): 75 encoding = 'shift_jis' 76 tstring = test_multibytecodec_support.load_teststring('shift_jis') 77 codectests = shiftjis_commonenctests + ( 78 ("\\\x7e", "strict", u"\\\x7e"), 79 ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"), 80 ) 81 82 class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): 83 encoding = 'shift_jisx0213' 84 tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') 85 codectests = ( 86 # invalid bytes 87 ("abc\x80\x80\x82\x84", "strict", None), 88 ("abc\xf8", "strict", None), 89 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 90 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 91 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 92 # sjis vs cp932 93 ("\\\x7e", "replace", u"\xa5\u203e"), 94 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"), 95 ) 96 xmlcharnametest = ( 97 u"\xab\u211c\xbb = \u2329\u1234\u232a", 98 "\x85Gℜ\x85Q = ⟨ሴ⟩" 99 ) 100 101 def test_main(): 102 test_support.run_unittest(__name__) 103 104 if __name__ == "__main__": 105 test_main() 106