1 # Copyright (C) 2002-2006 Python Software Foundation 2 # Contact: email-sig (at] python.org 3 # email package unit tests for (optional) Asian codecs 4 5 import unittest 6 7 from test.test_email import TestEmailBase 8 from email.charset import Charset 9 from email.header import Header, decode_header 10 from email.message import Message 11 12 # We're compatible with Python 2.3, but it doesn't have the built-in Asian 13 # codecs, so we have to skip all these tests. 14 try: 15 str(b'foo', 'euc-jp') 16 except LookupError: 17 raise unittest.SkipTest 18 19 20 21 class TestEmailAsianCodecs(TestEmailBase): 22 def test_japanese_codecs(self): 23 eq = self.ndiffAssertEqual 24 jcode = "euc-jp" 25 gcode = "iso-8859-1" 26 j = Charset(jcode) 27 g = Charset(gcode) 28 h = Header("Hello World!") 29 jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' 30 b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode) 31 ghello = str(b'Gr\xfc\xdf Gott!', gcode) 32 h.append(jhello, j) 33 h.append(ghello, g) 34 # BAW: This used to -- and maybe should -- fold the two iso-8859-1 35 # chunks into a single encoded word. However it doesn't violate the 36 # standard to have them as two encoded chunks and maybe it's 37 # reasonable <wink> for each .append() call to result in a separate 38 # encoded word. 39 eq(h.encode(), """\ 40 Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= 41 =?iso-8859-1?q?Gr=FC=DF_Gott!?=""") 42 eq(decode_header(h.encode()), 43 [(b'Hello World! ', None), 44 (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), 45 (b'Gr\xfc\xdf Gott!', gcode)]) 46 subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5' 47 b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2' 48 b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3' 49 b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9') 50 subject = str(subject_bytes, jcode) 51 h = Header(subject, j, header_name="Subject") 52 # test a very long header 53 enc = h.encode() 54 # TK: splitting point may differ by codec design and/or Header encoding 55 eq(enc , """\ 56 =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= 57 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") 58 # TK: full decode comparison 59 eq(str(h).encode(jcode), subject_bytes) 60 61 def test_payload_encoding_utf8(self): 62 jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' 63 b'\xa5\xeb\xa5\xc9\xa1\xaa', 'euc-jp') 64 msg = Message() 65 msg.set_payload(jhello, 'utf-8') 66 ustr = msg.get_payload(decode=True).decode(msg.get_content_charset()) 67 self.assertEqual(jhello, ustr) 68 69 def test_payload_encoding(self): 70 jcode = 'euc-jp' 71 jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' 72 b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode) 73 msg = Message() 74 msg.set_payload(jhello, jcode) 75 ustr = msg.get_payload(decode=True).decode(msg.get_content_charset()) 76 self.assertEqual(jhello, ustr) 77 78 79 80 if __name__ == '__main__': 81 unittest.main() 82