Home | History | Annotate | Download | only in test
      1 #
      2 # test_codecencodings_cn.py
      3 #   Codec encoding tests for PRC encodings.
      4 #
      5 
      6 from test import test_support
      7 from test import test_multibytecodec_support
      8 import unittest
      9 
     10 class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
     11     encoding = 'gb2312'
     12     tstring = test_multibytecodec_support.load_teststring('gb2312')
     13     codectests = (
     14         # invalid bytes
     15         ("abc\x81\x81\xc1\xc4", "strict",  None),
     16         ("abc\xc8", "strict",  None),
     17         ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
     18         ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
     19         ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
     20         ("\xc1\x64", "strict", None),
     21     )
     22 
     23 class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
     24     encoding = 'gbk'
     25     tstring = test_multibytecodec_support.load_teststring('gbk')
     26     codectests = (
     27         # invalid bytes
     28         ("abc\x80\x80\xc1\xc4", "strict",  None),
     29         ("abc\xc8", "strict",  None),
     30         ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
     31         ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
     32         ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
     33         ("\x83\x34\x83\x31", "strict", None),
     34         (u"\u30fb", "strict", None),
     35     )
     36 
     37 class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
     38     encoding = 'gb18030'
     39     tstring = test_multibytecodec_support.load_teststring('gb18030')
     40     codectests = (
     41         # invalid bytes
     42         ("abc\x80\x80\xc1\xc4", "strict",  None),
     43         ("abc\xc8", "strict",  None),
     44         ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
     45         ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
     46         ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
     47         ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
     48         (u"\u30fb", "strict", "\x819\xa79"),
     49     )
     50     has_iso10646 = True
     51 
     52 class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
     53     encoding = 'hz'
     54     tstring = test_multibytecodec_support.load_teststring('hz')
     55     codectests = (
     56         # test '~\n' (3 lines)
     57         (b'This sentence is in ASCII.\n'
     58          b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
     59          b'~{NpJ)l6HK!#~}Bye.\n',
     60          'strict',
     61          u'This sentence is in ASCII.\n'
     62          u'The next sentence is in GB.'
     63          u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
     64          u'Bye.\n'),
     65         # test '~\n' (4 lines)
     66         (b'This sentence is in ASCII.\n'
     67          b'The next sentence is in GB.~\n'
     68          b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
     69          b'Bye.\n',
     70          'strict',
     71          u'This sentence is in ASCII.\n'
     72          u'The next sentence is in GB.'
     73          u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
     74          u'Bye.\n'),
     75         # invalid bytes
     76         (b'ab~cd', 'replace', u'ab\uFFFDd'),
     77         (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
     78         (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
     79     )
     80 
     81 def test_main():
     82     test_support.run_unittest(__name__)
     83 
     84 if __name__ == "__main__":
     85     test_main()
     86