Home | History | Annotate | Download | only in test_email
      1 # Copyright (C) 2001-2010 Python Software Foundation
      2 # Contact: email-sig (at] python.org
      3 # email package unit tests
      4 
      5 import re
      6 import time
      7 import base64
      8 import unittest
      9 import textwrap
     10 
     11 from io import StringIO, BytesIO
     12 from itertools import chain
     13 from random import choice
     14 from socket import getfqdn
     15 try:
     16     from threading import Thread
     17 except ImportError:
     18     from dummy_threading import Thread
     19 
     20 import email
     21 import email.policy
     22 
     23 from email.charset import Charset
     24 from email.header import Header, decode_header, make_header
     25 from email.parser import Parser, HeaderParser
     26 from email.generator import Generator, DecodedGenerator, BytesGenerator
     27 from email.message import Message
     28 from email.mime.application import MIMEApplication
     29 from email.mime.audio import MIMEAudio
     30 from email.mime.text import MIMEText
     31 from email.mime.image import MIMEImage
     32 from email.mime.base import MIMEBase
     33 from email.mime.message import MIMEMessage
     34 from email.mime.multipart import MIMEMultipart
     35 from email.mime.nonmultipart import MIMENonMultipart
     36 from email import utils
     37 from email import errors
     38 from email import encoders
     39 from email import iterators
     40 from email import base64mime
     41 from email import quoprimime
     42 
     43 from test.support import unlink, start_threads
     44 from test.test_email import openfile, TestEmailBase
     45 
     46 # These imports are documented to work, but we are testing them using a
     47 # different path, so we import them here just to make sure they are importable.
     48 from email.parser import FeedParser, BytesFeedParser
     49 
     50 NL = '\n'
     51 EMPTYSTRING = ''
     52 SPACE = ' '
     53 
     54 
     55 # Test various aspects of the Message class's API
     56 class TestMessageAPI(TestEmailBase):
     57     def test_get_all(self):
     58         eq = self.assertEqual
     59         msg = self._msgobj('msg_20.txt')
     60         eq(msg.get_all('cc'), ['ccc (at] zzz.org', 'ddd (at] zzz.org', 'eee (at] zzz.org'])
     61         eq(msg.get_all('xx', 'n/a'), 'n/a')
     62 
     63     def test_getset_charset(self):
     64         eq = self.assertEqual
     65         msg = Message()
     66         eq(msg.get_charset(), None)
     67         charset = Charset('iso-8859-1')
     68         msg.set_charset(charset)
     69         eq(msg['mime-version'], '1.0')
     70         eq(msg.get_content_type(), 'text/plain')
     71         eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
     72         eq(msg.get_param('charset'), 'iso-8859-1')
     73         eq(msg['content-transfer-encoding'], 'quoted-printable')
     74         eq(msg.get_charset().input_charset, 'iso-8859-1')
     75         # Remove the charset
     76         msg.set_charset(None)
     77         eq(msg.get_charset(), None)
     78         eq(msg['content-type'], 'text/plain')
     79         # Try adding a charset when there's already MIME headers present
     80         msg = Message()
     81         msg['MIME-Version'] = '2.0'
     82         msg['Content-Type'] = 'text/x-weird'
     83         msg['Content-Transfer-Encoding'] = 'quinted-puntable'
     84         msg.set_charset(charset)
     85         eq(msg['mime-version'], '2.0')
     86         eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
     87         eq(msg['content-transfer-encoding'], 'quinted-puntable')
     88 
     89     def test_set_charset_from_string(self):
     90         eq = self.assertEqual
     91         msg = Message()
     92         msg.set_charset('us-ascii')
     93         eq(msg.get_charset().input_charset, 'us-ascii')
     94         eq(msg['content-type'], 'text/plain; charset="us-ascii"')
     95 
     96     def test_set_payload_with_charset(self):
     97         msg = Message()
     98         charset = Charset('iso-8859-1')
     99         msg.set_payload('This is a string payload', charset)
    100         self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
    101 
    102     def test_set_payload_with_8bit_data_and_charset(self):
    103         data = b'\xd0\x90\xd0\x91\xd0\x92'
    104         charset = Charset('utf-8')
    105         msg = Message()
    106         msg.set_payload(data, charset)
    107         self.assertEqual(msg['content-transfer-encoding'], 'base64')
    108         self.assertEqual(msg.get_payload(decode=True), data)
    109         self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
    110 
    111     def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
    112         data = b'\xd0\x90\xd0\x91\xd0\x92'
    113         charset = Charset('utf-8')
    114         charset.body_encoding = None # Disable base64 encoding
    115         msg = Message()
    116         msg.set_payload(data.decode('utf-8'), charset)
    117         self.assertEqual(msg['content-transfer-encoding'], '8bit')
    118         self.assertEqual(msg.get_payload(decode=True), data)
    119 
    120     def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
    121         data = b'\xd0\x90\xd0\x91\xd0\x92'
    122         charset = Charset('utf-8')
    123         charset.body_encoding = None # Disable base64 encoding
    124         msg = Message()
    125         msg.set_payload(data, charset)
    126         self.assertEqual(msg['content-transfer-encoding'], '8bit')
    127         self.assertEqual(msg.get_payload(decode=True), data)
    128 
    129     def test_set_payload_to_list(self):
    130         msg = Message()
    131         msg.set_payload([])
    132         self.assertEqual(msg.get_payload(), [])
    133 
    134     def test_attach_when_payload_is_string(self):
    135         msg = Message()
    136         msg['Content-Type'] = 'multipart/mixed'
    137         msg.set_payload('string payload')
    138         sub_msg = MIMEMessage(Message())
    139         self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
    140                                msg.attach, sub_msg)
    141 
    142     def test_get_charsets(self):
    143         eq = self.assertEqual
    144 
    145         msg = self._msgobj('msg_08.txt')
    146         charsets = msg.get_charsets()
    147         eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
    148 
    149         msg = self._msgobj('msg_09.txt')
    150         charsets = msg.get_charsets('dingbat')
    151         eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
    152                       'koi8-r'])
    153 
    154         msg = self._msgobj('msg_12.txt')
    155         charsets = msg.get_charsets()
    156         eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
    157                       'iso-8859-3', 'us-ascii', 'koi8-r'])
    158 
    159     def test_get_filename(self):
    160         eq = self.assertEqual
    161 
    162         msg = self._msgobj('msg_04.txt')
    163         filenames = [p.get_filename() for p in msg.get_payload()]
    164         eq(filenames, ['msg.txt', 'msg.txt'])
    165 
    166         msg = self._msgobj('msg_07.txt')
    167         subpart = msg.get_payload(1)
    168         eq(subpart.get_filename(), 'dingusfish.gif')
    169 
    170     def test_get_filename_with_name_parameter(self):
    171         eq = self.assertEqual
    172 
    173         msg = self._msgobj('msg_44.txt')
    174         filenames = [p.get_filename() for p in msg.get_payload()]
    175         eq(filenames, ['msg.txt', 'msg.txt'])
    176 
    177     def test_get_boundary(self):
    178         eq = self.assertEqual
    179         msg = self._msgobj('msg_07.txt')
    180         # No quotes!
    181         eq(msg.get_boundary(), 'BOUNDARY')
    182 
    183     def test_set_boundary(self):
    184         eq = self.assertEqual
    185         # This one has no existing boundary parameter, but the Content-Type:
    186         # header appears fifth.
    187         msg = self._msgobj('msg_01.txt')
    188         msg.set_boundary('BOUNDARY')
    189         header, value = msg.items()[4]
    190         eq(header.lower(), 'content-type')
    191         eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
    192         # This one has a Content-Type: header, with a boundary, stuck in the
    193         # middle of its headers.  Make sure the order is preserved; it should
    194         # be fifth.
    195         msg = self._msgobj('msg_04.txt')
    196         msg.set_boundary('BOUNDARY')
    197         header, value = msg.items()[4]
    198         eq(header.lower(), 'content-type')
    199         eq(value, 'multipart/mixed; boundary="BOUNDARY"')
    200         # And this one has no Content-Type: header at all.
    201         msg = self._msgobj('msg_03.txt')
    202         self.assertRaises(errors.HeaderParseError,
    203                           msg.set_boundary, 'BOUNDARY')
    204 
    205     def test_make_boundary(self):
    206         msg = MIMEMultipart('form-data')
    207         # Note that when the boundary gets created is an implementation
    208         # detail and might change.
    209         self.assertEqual(msg.items()[0][1], 'multipart/form-data')
    210         # Trigger creation of boundary
    211         msg.as_string()
    212         self.assertEqual(msg.items()[0][1][:33],
    213                         'multipart/form-data; boundary="==')
    214         # XXX: there ought to be tests of the uniqueness of the boundary, too.
    215 
    216     def test_message_rfc822_only(self):
    217         # Issue 7970: message/rfc822 not in multipart parsed by
    218         # HeaderParser caused an exception when flattened.
    219         with openfile('msg_46.txt') as fp:
    220             msgdata = fp.read()
    221         parser = HeaderParser()
    222         msg = parser.parsestr(msgdata)
    223         out = StringIO()
    224         gen = Generator(out, True, 0)
    225         gen.flatten(msg, False)
    226         self.assertEqual(out.getvalue(), msgdata)
    227 
    228     def test_byte_message_rfc822_only(self):
    229         # Make sure new bytes header parser also passes this.
    230         with openfile('msg_46.txt') as fp:
    231             msgdata = fp.read().encode('ascii')
    232         parser = email.parser.BytesHeaderParser()
    233         msg = parser.parsebytes(msgdata)
    234         out = BytesIO()
    235         gen = email.generator.BytesGenerator(out)
    236         gen.flatten(msg)
    237         self.assertEqual(out.getvalue(), msgdata)
    238 
    239     def test_get_decoded_payload(self):
    240         eq = self.assertEqual
    241         msg = self._msgobj('msg_10.txt')
    242         # The outer message is a multipart
    243         eq(msg.get_payload(decode=True), None)
    244         # Subpart 1 is 7bit encoded
    245         eq(msg.get_payload(0).get_payload(decode=True),
    246            b'This is a 7bit encoded message.\n')
    247         # Subpart 2 is quopri
    248         eq(msg.get_payload(1).get_payload(decode=True),
    249            b'\xa1This is a Quoted Printable encoded message!\n')
    250         # Subpart 3 is base64
    251         eq(msg.get_payload(2).get_payload(decode=True),
    252            b'This is a Base64 encoded message.')
    253         # Subpart 4 is base64 with a trailing newline, which
    254         # used to be stripped (issue 7143).
    255         eq(msg.get_payload(3).get_payload(decode=True),
    256            b'This is a Base64 encoded message.\n')
    257         # Subpart 5 has no Content-Transfer-Encoding: header.
    258         eq(msg.get_payload(4).get_payload(decode=True),
    259            b'This has no Content-Transfer-Encoding: header.\n')
    260 
    261     def test_get_decoded_uu_payload(self):
    262         eq = self.assertEqual
    263         msg = Message()
    264         msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
    265         for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
    266             msg['content-transfer-encoding'] = cte
    267             eq(msg.get_payload(decode=True), b'hello world')
    268         # Now try some bogus data
    269         msg.set_payload('foo')
    270         eq(msg.get_payload(decode=True), b'foo')
    271 
    272     def test_get_payload_n_raises_on_non_multipart(self):
    273         msg = Message()
    274         self.assertRaises(TypeError, msg.get_payload, 1)
    275 
    276     def test_decoded_generator(self):
    277         eq = self.assertEqual
    278         msg = self._msgobj('msg_07.txt')
    279         with openfile('msg_17.txt') as fp:
    280             text = fp.read()
    281         s = StringIO()
    282         g = DecodedGenerator(s)
    283         g.flatten(msg)
    284         eq(s.getvalue(), text)
    285 
    286     def test__contains__(self):
    287         msg = Message()
    288         msg['From'] = 'Me'
    289         msg['to'] = 'You'
    290         # Check for case insensitivity
    291         self.assertIn('from', msg)
    292         self.assertIn('From', msg)
    293         self.assertIn('FROM', msg)
    294         self.assertIn('to', msg)
    295         self.assertIn('To', msg)
    296         self.assertIn('TO', msg)
    297 
    298     def test_as_string(self):
    299         msg = self._msgobj('msg_01.txt')
    300         with openfile('msg_01.txt') as fp:
    301             text = fp.read()
    302         self.assertEqual(text, str(msg))
    303         fullrepr = msg.as_string(unixfrom=True)
    304         lines = fullrepr.split('\n')
    305         self.assertTrue(lines[0].startswith('From '))
    306         self.assertEqual(text, NL.join(lines[1:]))
    307 
    308     def test_as_string_policy(self):
    309         msg = self._msgobj('msg_01.txt')
    310         newpolicy = msg.policy.clone(linesep='\r\n')
    311         fullrepr = msg.as_string(policy=newpolicy)
    312         s = StringIO()
    313         g = Generator(s, policy=newpolicy)
    314         g.flatten(msg)
    315         self.assertEqual(fullrepr, s.getvalue())
    316 
    317     def test_as_bytes(self):
    318         msg = self._msgobj('msg_01.txt')
    319         with openfile('msg_01.txt') as fp:
    320             data = fp.read().encode('ascii')
    321         self.assertEqual(data, bytes(msg))
    322         fullrepr = msg.as_bytes(unixfrom=True)
    323         lines = fullrepr.split(b'\n')
    324         self.assertTrue(lines[0].startswith(b'From '))
    325         self.assertEqual(data, b'\n'.join(lines[1:]))
    326 
    327     def test_as_bytes_policy(self):
    328         msg = self._msgobj('msg_01.txt')
    329         newpolicy = msg.policy.clone(linesep='\r\n')
    330         fullrepr = msg.as_bytes(policy=newpolicy)
    331         s = BytesIO()
    332         g = BytesGenerator(s,policy=newpolicy)
    333         g.flatten(msg)
    334         self.assertEqual(fullrepr, s.getvalue())
    335 
    336     # test_headerregistry.TestContentTypeHeader.bad_params
    337     def test_bad_param(self):
    338         msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
    339         self.assertEqual(msg.get_param('baz'), '')
    340 
    341     def test_missing_filename(self):
    342         msg = email.message_from_string("From: foo\n")
    343         self.assertEqual(msg.get_filename(), None)
    344 
    345     def test_bogus_filename(self):
    346         msg = email.message_from_string(
    347         "Content-Disposition: blarg; filename\n")
    348         self.assertEqual(msg.get_filename(), '')
    349 
    350     def test_missing_boundary(self):
    351         msg = email.message_from_string("From: foo\n")
    352         self.assertEqual(msg.get_boundary(), None)
    353 
    354     def test_get_params(self):
    355         eq = self.assertEqual
    356         msg = email.message_from_string(
    357             'X-Header: foo=one; bar=two; baz=three\n')
    358         eq(msg.get_params(header='x-header'),
    359            [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
    360         msg = email.message_from_string(
    361             'X-Header: foo; bar=one; baz=two\n')
    362         eq(msg.get_params(header='x-header'),
    363            [('foo', ''), ('bar', 'one'), ('baz', 'two')])
    364         eq(msg.get_params(), None)
    365         msg = email.message_from_string(
    366             'X-Header: foo; bar="one"; baz=two\n')
    367         eq(msg.get_params(header='x-header'),
    368            [('foo', ''), ('bar', 'one'), ('baz', 'two')])
    369 
    370     # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
    371     def test_get_param_liberal(self):
    372         msg = Message()
    373         msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
    374         self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
    375 
    376     def test_get_param(self):
    377         eq = self.assertEqual
    378         msg = email.message_from_string(
    379             "X-Header: foo=one; bar=two; baz=three\n")
    380         eq(msg.get_param('bar', header='x-header'), 'two')
    381         eq(msg.get_param('quuz', header='x-header'), None)
    382         eq(msg.get_param('quuz'), None)
    383         msg = email.message_from_string(
    384             'X-Header: foo; bar="one"; baz=two\n')
    385         eq(msg.get_param('foo', header='x-header'), '')
    386         eq(msg.get_param('bar', header='x-header'), 'one')
    387         eq(msg.get_param('baz', header='x-header'), 'two')
    388         # XXX: We are not RFC-2045 compliant!  We cannot parse:
    389         # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
    390         # msg.get_param("weird")
    391         # yet.
    392 
    393     # test_headerregistry.TestContentTypeHeader.spaces_around_semis
    394     def test_get_param_funky_continuation_lines(self):
    395         msg = self._msgobj('msg_22.txt')
    396         self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
    397 
    398     # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
    399     def test_get_param_with_semis_in_quotes(self):
    400         msg = email.message_from_string(
    401             'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
    402         self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
    403         self.assertEqual(msg.get_param('name', unquote=False),
    404                          '"Jim&amp;&amp;Jill"')
    405 
    406     # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
    407     def test_get_param_with_quotes(self):
    408         msg = email.message_from_string(
    409             'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
    410         self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
    411         msg = email.message_from_string(
    412             "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
    413         self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
    414 
    415     def test_field_containment(self):
    416         msg = email.message_from_string('Header: exists')
    417         self.assertIn('header', msg)
    418         self.assertIn('Header', msg)
    419         self.assertIn('HEADER', msg)
    420         self.assertNotIn('headerx', msg)
    421 
    422     def test_set_param(self):
    423         eq = self.assertEqual
    424         msg = Message()
    425         msg.set_param('charset', 'iso-2022-jp')
    426         eq(msg.get_param('charset'), 'iso-2022-jp')
    427         msg.set_param('importance', 'high value')
    428         eq(msg.get_param('importance'), 'high value')
    429         eq(msg.get_param('importance', unquote=False), '"high value"')
    430         eq(msg.get_params(), [('text/plain', ''),
    431                               ('charset', 'iso-2022-jp'),
    432                               ('importance', 'high value')])
    433         eq(msg.get_params(unquote=False), [('text/plain', ''),
    434                                        ('charset', '"iso-2022-jp"'),
    435                                        ('importance', '"high value"')])
    436         msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
    437         eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
    438 
    439     def test_del_param(self):
    440         eq = self.assertEqual
    441         msg = self._msgobj('msg_05.txt')
    442         eq(msg.get_params(),
    443            [('multipart/report', ''), ('report-type', 'delivery-status'),
    444             ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
    445         old_val = msg.get_param("report-type")
    446         msg.del_param("report-type")
    447         eq(msg.get_params(),
    448            [('multipart/report', ''),
    449             ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
    450         msg.set_param("report-type", old_val)
    451         eq(msg.get_params(),
    452            [('multipart/report', ''),
    453             ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
    454             ('report-type', old_val)])
    455 
    456     def test_del_param_on_other_header(self):
    457         msg = Message()
    458         msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
    459         msg.del_param('filename', 'content-disposition')
    460         self.assertEqual(msg['content-disposition'], 'attachment')
    461 
    462     def test_del_param_on_nonexistent_header(self):
    463         msg = Message()
    464         # Deleting param on empty msg should not raise exception.
    465         msg.del_param('filename', 'content-disposition')
    466 
    467     def test_del_nonexistent_param(self):
    468         msg = Message()
    469         msg.add_header('Content-Type', 'text/plain', charset='utf-8')
    470         existing_header = msg['Content-Type']
    471         msg.del_param('foobar', header='Content-Type')
    472         self.assertEqual(msg['Content-Type'], existing_header)
    473 
    474     def test_set_type(self):
    475         eq = self.assertEqual
    476         msg = Message()
    477         self.assertRaises(ValueError, msg.set_type, 'text')
    478         msg.set_type('text/plain')
    479         eq(msg['content-type'], 'text/plain')
    480         msg.set_param('charset', 'us-ascii')
    481         eq(msg['content-type'], 'text/plain; charset="us-ascii"')
    482         msg.set_type('text/html')
    483         eq(msg['content-type'], 'text/html; charset="us-ascii"')
    484 
    485     def test_set_type_on_other_header(self):
    486         msg = Message()
    487         msg['X-Content-Type'] = 'text/plain'
    488         msg.set_type('application/octet-stream', 'X-Content-Type')
    489         self.assertEqual(msg['x-content-type'], 'application/octet-stream')
    490 
    491     def test_get_content_type_missing(self):
    492         msg = Message()
    493         self.assertEqual(msg.get_content_type(), 'text/plain')
    494 
    495     def test_get_content_type_missing_with_default_type(self):
    496         msg = Message()
    497         msg.set_default_type('message/rfc822')
    498         self.assertEqual(msg.get_content_type(), 'message/rfc822')
    499 
    500     def test_get_content_type_from_message_implicit(self):
    501         msg = self._msgobj('msg_30.txt')
    502         self.assertEqual(msg.get_payload(0).get_content_type(),
    503                          'message/rfc822')
    504 
    505     def test_get_content_type_from_message_explicit(self):
    506         msg = self._msgobj('msg_28.txt')
    507         self.assertEqual(msg.get_payload(0).get_content_type(),
    508                          'message/rfc822')
    509 
    510     def test_get_content_type_from_message_text_plain_implicit(self):
    511         msg = self._msgobj('msg_03.txt')
    512         self.assertEqual(msg.get_content_type(), 'text/plain')
    513 
    514     def test_get_content_type_from_message_text_plain_explicit(self):
    515         msg = self._msgobj('msg_01.txt')
    516         self.assertEqual(msg.get_content_type(), 'text/plain')
    517 
    518     def test_get_content_maintype_missing(self):
    519         msg = Message()
    520         self.assertEqual(msg.get_content_maintype(), 'text')
    521 
    522     def test_get_content_maintype_missing_with_default_type(self):
    523         msg = Message()
    524         msg.set_default_type('message/rfc822')
    525         self.assertEqual(msg.get_content_maintype(), 'message')
    526 
    527     def test_get_content_maintype_from_message_implicit(self):
    528         msg = self._msgobj('msg_30.txt')
    529         self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
    530 
    531     def test_get_content_maintype_from_message_explicit(self):
    532         msg = self._msgobj('msg_28.txt')
    533         self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
    534 
    535     def test_get_content_maintype_from_message_text_plain_implicit(self):
    536         msg = self._msgobj('msg_03.txt')
    537         self.assertEqual(msg.get_content_maintype(), 'text')
    538 
    539     def test_get_content_maintype_from_message_text_plain_explicit(self):
    540         msg = self._msgobj('msg_01.txt')
    541         self.assertEqual(msg.get_content_maintype(), 'text')
    542 
    543     def test_get_content_subtype_missing(self):
    544         msg = Message()
    545         self.assertEqual(msg.get_content_subtype(), 'plain')
    546 
    547     def test_get_content_subtype_missing_with_default_type(self):
    548         msg = Message()
    549         msg.set_default_type('message/rfc822')
    550         self.assertEqual(msg.get_content_subtype(), 'rfc822')
    551 
    552     def test_get_content_subtype_from_message_implicit(self):
    553         msg = self._msgobj('msg_30.txt')
    554         self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
    555 
    556     def test_get_content_subtype_from_message_explicit(self):
    557         msg = self._msgobj('msg_28.txt')
    558         self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
    559 
    560     def test_get_content_subtype_from_message_text_plain_implicit(self):
    561         msg = self._msgobj('msg_03.txt')
    562         self.assertEqual(msg.get_content_subtype(), 'plain')
    563 
    564     def test_get_content_subtype_from_message_text_plain_explicit(self):
    565         msg = self._msgobj('msg_01.txt')
    566         self.assertEqual(msg.get_content_subtype(), 'plain')
    567 
    568     def test_get_content_maintype_error(self):
    569         msg = Message()
    570         msg['Content-Type'] = 'no-slash-in-this-string'
    571         self.assertEqual(msg.get_content_maintype(), 'text')
    572 
    573     def test_get_content_subtype_error(self):
    574         msg = Message()
    575         msg['Content-Type'] = 'no-slash-in-this-string'
    576         self.assertEqual(msg.get_content_subtype(), 'plain')
    577 
    578     def test_replace_header(self):
    579         eq = self.assertEqual
    580         msg = Message()
    581         msg.add_header('First', 'One')
    582         msg.add_header('Second', 'Two')
    583         msg.add_header('Third', 'Three')
    584         eq(msg.keys(), ['First', 'Second', 'Third'])
    585         eq(msg.values(), ['One', 'Two', 'Three'])
    586         msg.replace_header('Second', 'Twenty')
    587         eq(msg.keys(), ['First', 'Second', 'Third'])
    588         eq(msg.values(), ['One', 'Twenty', 'Three'])
    589         msg.add_header('First', 'Eleven')
    590         msg.replace_header('First', 'One Hundred')
    591         eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
    592         eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
    593         self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
    594 
    595     def test_get_content_disposition(self):
    596         msg = Message()
    597         self.assertIsNone(msg.get_content_disposition())
    598         msg.add_header('Content-Disposition', 'attachment',
    599                        filename='random.avi')
    600         self.assertEqual(msg.get_content_disposition(), 'attachment')
    601         msg.replace_header('Content-Disposition', 'inline')
    602         self.assertEqual(msg.get_content_disposition(), 'inline')
    603         msg.replace_header('Content-Disposition', 'InlinE')
    604         self.assertEqual(msg.get_content_disposition(), 'inline')
    605 
    606     # test_defect_handling:test_invalid_chars_in_base64_payload
    607     def test_broken_base64_payload(self):
    608         x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
    609         msg = Message()
    610         msg['content-type'] = 'audio/x-midi'
    611         msg['content-transfer-encoding'] = 'base64'
    612         msg.set_payload(x)
    613         self.assertEqual(msg.get_payload(decode=True),
    614                          (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
    615                           b'\xa1\x00p\xf6\xbf\xe9\x0f'))
    616         self.assertIsInstance(msg.defects[0],
    617                               errors.InvalidBase64CharactersDefect)
    618 
    619     def test_broken_unicode_payload(self):
    620         # This test improves coverage but is not a compliance test.
    621         # The behavior in this situation is currently undefined by the API.
    622         x = 'this is a br\xf6ken thing to do'
    623         msg = Message()
    624         msg['content-type'] = 'text/plain'
    625         msg['content-transfer-encoding'] = '8bit'
    626         msg.set_payload(x)
    627         self.assertEqual(msg.get_payload(decode=True),
    628                          bytes(x, 'raw-unicode-escape'))
    629 
    630     def test_questionable_bytes_payload(self):
    631         # This test improves coverage but is not a compliance test,
    632         # since it involves poking inside the black box.
    633         x = 'this is a qustionable thing to do'.encode('utf-8')
    634         msg = Message()
    635         msg['content-type'] = 'text/plain; charset="utf-8"'
    636         msg['content-transfer-encoding'] = '8bit'
    637         msg._payload = x
    638         self.assertEqual(msg.get_payload(decode=True), x)
    639 
    640     # Issue 1078919
    641     def test_ascii_add_header(self):
    642         msg = Message()
    643         msg.add_header('Content-Disposition', 'attachment',
    644                        filename='bud.gif')
    645         self.assertEqual('attachment; filename="bud.gif"',
    646             msg['Content-Disposition'])
    647 
    648     def test_noascii_add_header(self):
    649         msg = Message()
    650         msg.add_header('Content-Disposition', 'attachment',
    651             filename="Fuballer.ppt")
    652         self.assertEqual(
    653             'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
    654             msg['Content-Disposition'])
    655 
    656     def test_nonascii_add_header_via_triple(self):
    657         msg = Message()
    658         msg.add_header('Content-Disposition', 'attachment',
    659             filename=('iso-8859-1', '', 'Fuballer.ppt'))
    660         self.assertEqual(
    661             'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
    662             msg['Content-Disposition'])
    663 
    664     def test_ascii_add_header_with_tspecial(self):
    665         msg = Message()
    666         msg.add_header('Content-Disposition', 'attachment',
    667             filename="windows [filename].ppt")
    668         self.assertEqual(
    669             'attachment; filename="windows [filename].ppt"',
    670             msg['Content-Disposition'])
    671 
    672     def test_nonascii_add_header_with_tspecial(self):
    673         msg = Message()
    674         msg.add_header('Content-Disposition', 'attachment',
    675             filename="Fuballer [filename].ppt")
    676         self.assertEqual(
    677             "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
    678             msg['Content-Disposition'])
    679 
    680     def test_binary_quopri_payload(self):
    681         for charset in ('latin-1', 'ascii'):
    682             msg = Message()
    683             msg['content-type'] = 'text/plain; charset=%s' % charset
    684             msg['content-transfer-encoding'] = 'quoted-printable'
    685             msg.set_payload(b'foo=e6=96=87bar')
    686             self.assertEqual(
    687                 msg.get_payload(decode=True),
    688                 b'foo\xe6\x96\x87bar',
    689                 'get_payload returns wrong result with charset %s.' % charset)
    690 
    691     def test_binary_base64_payload(self):
    692         for charset in ('latin-1', 'ascii'):
    693             msg = Message()
    694             msg['content-type'] = 'text/plain; charset=%s' % charset
    695             msg['content-transfer-encoding'] = 'base64'
    696             msg.set_payload(b'Zm9v5paHYmFy')
    697             self.assertEqual(
    698                 msg.get_payload(decode=True),
    699                 b'foo\xe6\x96\x87bar',
    700                 'get_payload returns wrong result with charset %s.' % charset)
    701 
    702     def test_binary_uuencode_payload(self):
    703         for charset in ('latin-1', 'ascii'):
    704             for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
    705                 msg = Message()
    706                 msg['content-type'] = 'text/plain; charset=%s' % charset
    707                 msg['content-transfer-encoding'] = encoding
    708                 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
    709                 self.assertEqual(
    710                     msg.get_payload(decode=True),
    711                     b'foo\xe6\x96\x87bar',
    712                     str(('get_payload returns wrong result ',
    713                          'with charset {0} and encoding {1}.')).\
    714                         format(charset, encoding))
    715 
    716     def test_add_header_with_name_only_param(self):
    717         msg = Message()
    718         msg.add_header('Content-Disposition', 'inline', foo_bar=None)
    719         self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
    720 
    721     def test_add_header_with_no_value(self):
    722         msg = Message()
    723         msg.add_header('X-Status', None)
    724         self.assertEqual('', msg['X-Status'])
    725 
    726     # Issue 5871: reject an attempt to embed a header inside a header value
    727     # (header injection attack).
    728     def test_embedded_header_via_Header_rejected(self):
    729         msg = Message()
    730         msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
    731         self.assertRaises(errors.HeaderParseError, msg.as_string)
    732 
    733     def test_embedded_header_via_string_rejected(self):
    734         msg = Message()
    735         msg['Dummy'] = 'dummy\nX-Injected-Header: test'
    736         self.assertRaises(errors.HeaderParseError, msg.as_string)
    737 
    738     def test_unicode_header_defaults_to_utf8_encoding(self):
    739         # Issue 14291
    740         m = MIMEText('abc\n')
    741         m['Subject'] = ' test'
    742         self.assertEqual(str(m),textwrap.dedent("""\
    743             Content-Type: text/plain; charset="us-ascii"
    744             MIME-Version: 1.0
    745             Content-Transfer-Encoding: 7bit
    746             Subject: =?utf-8?q?=C3=89_test?=
    747 
    748             abc
    749             """))
    750 
    751     def test_unicode_body_defaults_to_utf8_encoding(self):
    752         # Issue 14291
    753         m = MIMEText(' testabc\n')
    754         self.assertEqual(str(m),textwrap.dedent("""\
    755             Content-Type: text/plain; charset="utf-8"
    756             MIME-Version: 1.0
    757             Content-Transfer-Encoding: base64
    758 
    759             w4kgdGVzdGFiYwo=
    760             """))
    761 
    762 
    763 # Test the email.encoders module
    764 class TestEncoders(unittest.TestCase):
    765 
    766     def test_EncodersEncode_base64(self):
    767         with openfile('PyBanner048.gif', 'rb') as fp:
    768             bindata = fp.read()
    769         mimed = email.mime.image.MIMEImage(bindata)
    770         base64ed = mimed.get_payload()
    771         # the transfer-encoded body lines should all be <=76 characters
    772         lines = base64ed.split('\n')
    773         self.assertLessEqual(max([ len(x) for x in lines ]), 76)
    774 
    775     def test_encode_empty_payload(self):
    776         eq = self.assertEqual
    777         msg = Message()
    778         msg.set_charset('us-ascii')
    779         eq(msg['content-transfer-encoding'], '7bit')
    780 
    781     def test_default_cte(self):
    782         eq = self.assertEqual
    783         # 7bit data and the default us-ascii _charset
    784         msg = MIMEText('hello world')
    785         eq(msg['content-transfer-encoding'], '7bit')
    786         # Similar, but with 8bit data
    787         msg = MIMEText('hello \xf8 world')
    788         eq(msg['content-transfer-encoding'], 'base64')
    789         # And now with a different charset
    790         msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
    791         eq(msg['content-transfer-encoding'], 'quoted-printable')
    792 
    793     def test_encode7or8bit(self):
    794         # Make sure a charset whose input character set is 8bit but
    795         # whose output character set is 7bit gets a transfer-encoding
    796         # of 7bit.
    797         eq = self.assertEqual
    798         msg = MIMEText('\n', _charset='euc-jp')
    799         eq(msg['content-transfer-encoding'], '7bit')
    800         eq(msg.as_string(), textwrap.dedent("""\
    801             MIME-Version: 1.0
    802             Content-Type: text/plain; charset="iso-2022-jp"
    803             Content-Transfer-Encoding: 7bit
    804 
    805             \x1b$BJ8\x1b(B
    806             """))
    807 
    808     def test_qp_encode_latin1(self):
    809         msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
    810         self.assertEqual(str(msg), textwrap.dedent("""\
    811             MIME-Version: 1.0
    812             Content-Type: text/text; charset="iso-8859-1"
    813             Content-Transfer-Encoding: quoted-printable
    814 
    815             =E1=F6
    816             """))
    817 
    818     def test_qp_encode_non_latin1(self):
    819         # Issue 16948
    820         msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
    821         self.assertEqual(str(msg), textwrap.dedent("""\
    822             MIME-Version: 1.0
    823             Content-Type: text/text; charset="iso-8859-2"
    824             Content-Transfer-Encoding: quoted-printable
    825 
    826             =BF
    827             """))
    828 
    829 
    830 # Test long header wrapping
    831 class TestLongHeaders(TestEmailBase):
    832 
    833     maxDiff = None
    834 
    835     def test_split_long_continuation(self):
    836         eq = self.ndiffAssertEqual
    837         msg = email.message_from_string("""\
    838 Subject: bug demonstration
    839 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
    840 \tmore text
    841 
    842 test
    843 """)
    844         sfp = StringIO()
    845         g = Generator(sfp)
    846         g.flatten(msg)
    847         eq(sfp.getvalue(), """\
    848 Subject: bug demonstration
    849 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
    850 \tmore text
    851 
    852 test
    853 """)
    854 
    855     def test_another_long_almost_unsplittable_header(self):
    856         eq = self.ndiffAssertEqual
    857         hstr = """\
    858 bug demonstration
    859 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
    860 \tmore text"""
    861         h = Header(hstr, continuation_ws='\t')
    862         eq(h.encode(), """\
    863 bug demonstration
    864 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
    865 \tmore text""")
    866         h = Header(hstr.replace('\t', ' '))
    867         eq(h.encode(), """\
    868 bug demonstration
    869  12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
    870  more text""")
    871 
    872     def test_long_nonstring(self):
    873         eq = self.ndiffAssertEqual
    874         g = Charset("iso-8859-1")
    875         cz = Charset("iso-8859-2")
    876         utf8 = Charset("utf-8")
    877         g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
    878                   b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
    879                   b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
    880                   b'bef\xf6rdert. ')
    881         cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
    882                    b'd\xf9vtipu.. ')
    883         utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
    884                      '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
    885                      '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
    886                      '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
    887                      '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
    888                      'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
    889                      'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
    890                      '\u3044\u307e\u3059\u3002')
    891         h = Header(g_head, g, header_name='Subject')
    892         h.append(cz_head, cz)
    893         h.append(utf8_head, utf8)
    894         msg = Message()
    895         msg['Subject'] = h
    896         sfp = StringIO()
    897         g = Generator(sfp)
    898         g.flatten(msg)
    899         eq(sfp.getvalue(), """\
    900 Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
    901  =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
    902  =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
    903  =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
    904  =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
    905  =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
    906  =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
    907  =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
    908  =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
    909  =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
    910  =?utf-8?b?44CC?=
    911 
    912 """)
    913         eq(h.encode(maxlinelen=76), """\
    914 =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
    915  =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
    916  =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
    917  =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
    918  =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
    919  =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
    920  =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
    921  =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
    922  =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
    923  =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
    924  =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
    925 
    926     def test_long_header_encode(self):
    927         eq = self.ndiffAssertEqual
    928         h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
    929                    'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
    930                    header_name='X-Foobar-Spoink-Defrobnit')
    931         eq(h.encode(), '''\
    932 wasnipoop; giraffes="very-long-necked-animals";
    933  spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
    934 
    935     def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
    936         eq = self.ndiffAssertEqual
    937         h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
    938                    'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
    939                    header_name='X-Foobar-Spoink-Defrobnit',
    940                    continuation_ws='\t')
    941         eq(h.encode(), '''\
    942 wasnipoop; giraffes="very-long-necked-animals";
    943  spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
    944 
    945     def test_long_header_encode_with_tab_continuation(self):
    946         eq = self.ndiffAssertEqual
    947         h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
    948                    'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
    949                    header_name='X-Foobar-Spoink-Defrobnit',
    950                    continuation_ws='\t')
    951         eq(h.encode(), '''\
    952 wasnipoop; giraffes="very-long-necked-animals";
    953 \tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
    954 
    955     def test_header_encode_with_different_output_charset(self):
    956         h = Header('', 'euc-jp')
    957         self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
    958 
    959     def test_long_header_encode_with_different_output_charset(self):
    960         h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
    961             b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
    962             b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
    963             b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
    964         res = """\
    965 =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
    966  =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
    967         self.assertEqual(h.encode(), res)
    968 
    969     def test_header_splitter(self):
    970         eq = self.ndiffAssertEqual
    971         msg = MIMEText('')
    972         # It'd be great if we could use add_header() here, but that doesn't
    973         # guarantee an order of the parameters.
    974         msg['X-Foobar-Spoink-Defrobnit'] = (
    975             'wasnipoop; giraffes="very-long-necked-animals"; '
    976             'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
    977         sfp = StringIO()
    978         g = Generator(sfp)
    979         g.flatten(msg)
    980         eq(sfp.getvalue(), '''\
    981 Content-Type: text/plain; charset="us-ascii"
    982 MIME-Version: 1.0
    983 Content-Transfer-Encoding: 7bit
    984 X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
    985  spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
    986 
    987 ''')
    988 
    989     def test_no_semis_header_splitter(self):
    990         eq = self.ndiffAssertEqual
    991         msg = Message()
    992         msg['From'] = 'test (at] dom.ain'
    993         msg['References'] = SPACE.join('<%d (at] dom.ain>' % i for i in range(10))
    994         msg.set_payload('Test')
    995         sfp = StringIO()
    996         g = Generator(sfp)
    997         g.flatten(msg)
    998         eq(sfp.getvalue(), """\
    999 From: test (at] dom.ain
   1000 References: <0 (at] dom.ain> <1 (at] dom.ain> <2 (at] dom.ain> <3 (at] dom.ain> <4 (at] dom.ain>
   1001  <5 (at] dom.ain> <6 (at] dom.ain> <7 (at] dom.ain> <8 (at] dom.ain> <9 (at] dom.ain>
   1002 
   1003 Test""")
   1004 
   1005     def test_last_split_chunk_does_not_fit(self):
   1006         eq = self.ndiffAssertEqual
   1007         h = Header('Subject: the first part of this is short, but_the_second'
   1008             '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
   1009             '_all_by_itself')
   1010         eq(h.encode(), """\
   1011 Subject: the first part of this is short,
   1012  but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
   1013 
   1014     def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
   1015         eq = self.ndiffAssertEqual
   1016         h = Header(', but_the_second'
   1017             '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
   1018             '_all_by_itself')
   1019         eq(h.encode(), """\
   1020 ,
   1021  but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
   1022 
   1023     def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
   1024         eq = self.ndiffAssertEqual
   1025         h = Header(', , but_the_second'
   1026             '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
   1027             '_all_by_itself')
   1028         eq(h.encode(), """\
   1029 , ,
   1030  but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
   1031 
   1032     def test_trailing_splitable_on_overlong_unsplitable(self):
   1033         eq = self.ndiffAssertEqual
   1034         h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1035             'be_on_a_line_all_by_itself;')
   1036         eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
   1037             "be_on_a_line_all_by_itself;")
   1038 
   1039     def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
   1040         eq = self.ndiffAssertEqual
   1041         h = Header('; '
   1042             'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1043             'be_on_a_line_all_by_itself; ')
   1044         eq(h.encode(), """\
   1045 ;
   1046  this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
   1047 
   1048     def test_long_header_with_multiple_sequential_split_chars(self):
   1049         eq = self.ndiffAssertEqual
   1050         h = Header('This is a long line that has two whitespaces  in a row.  '
   1051             'This used to cause truncation of the header when folded')
   1052         eq(h.encode(), """\
   1053 This is a long line that has two whitespaces  in a row.  This used to cause
   1054  truncation of the header when folded""")
   1055 
   1056     def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
   1057         eq = self.ndiffAssertEqual
   1058         h = Header('thisverylongheaderhas;semicolons;and,commas,but'
   1059             'they;arenotlegal;fold,points')
   1060         eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
   1061                         "arenotlegal;fold,points")
   1062 
   1063     def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
   1064         eq = self.ndiffAssertEqual
   1065         h = Header('this is a  test where we need to have more than one line '
   1066             'before; our final line that is just too big to fit;; '
   1067             'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1068             'be_on_a_line_all_by_itself;')
   1069         eq(h.encode(), """\
   1070 this is a  test where we need to have more than one line before;
   1071  our final line that is just too big to fit;;
   1072  this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
   1073 
   1074     def test_overlong_last_part_followed_by_split_point(self):
   1075         eq = self.ndiffAssertEqual
   1076         h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1077             'be_on_a_line_all_by_itself ')
   1078         eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
   1079                         "should_be_on_a_line_all_by_itself ")
   1080 
   1081     def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
   1082         eq = self.ndiffAssertEqual
   1083         h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
   1084             'before_our_final_line_; ; '
   1085             'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1086             'be_on_a_line_all_by_itself; ')
   1087         eq(h.encode(), """\
   1088 this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
   1089  ;
   1090  this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
   1091 
   1092     def test_multiline_with_overlong_last_part_followed_by_split_point(self):
   1093         eq = self.ndiffAssertEqual
   1094         h = Header('this is a test where we need to have more than one line '
   1095             'before our final line; ; '
   1096             'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
   1097             'be_on_a_line_all_by_itself; ')
   1098         eq(h.encode(), """\
   1099 this is a test where we need to have more than one line before our final line;
   1100  ;
   1101  this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
   1102 
   1103     def test_long_header_with_whitespace_runs(self):
   1104         eq = self.ndiffAssertEqual
   1105         msg = Message()
   1106         msg['From'] = 'test (at] dom.ain'
   1107         msg['References'] = SPACE.join(['<foo (at] dom.ain>  '] * 10)
   1108         msg.set_payload('Test')
   1109         sfp = StringIO()
   1110         g = Generator(sfp)
   1111         g.flatten(msg)
   1112         eq(sfp.getvalue(), """\
   1113 From: test (at] dom.ain
   1114 References: <foo (at] dom.ain>   <foo (at] dom.ain>   <foo (at] dom.ain>   <foo (at] dom.ain>
   1115    <foo (at] dom.ain>   <foo (at] dom.ain>   <foo (at] dom.ain>   <foo (at] dom.ain>
   1116    <foo (at] dom.ain>   <foo (at] dom.ain>\x20\x20
   1117 
   1118 Test""")
   1119 
   1120     def test_long_run_with_semi_header_splitter(self):
   1121         eq = self.ndiffAssertEqual
   1122         msg = Message()
   1123         msg['From'] = 'test (at] dom.ain'
   1124         msg['References'] = SPACE.join(['<foo (at] dom.ain>'] * 10) + '; abc'
   1125         msg.set_payload('Test')
   1126         sfp = StringIO()
   1127         g = Generator(sfp)
   1128         g.flatten(msg)
   1129         eq(sfp.getvalue(), """\
   1130 From: test (at] dom.ain
   1131 References: <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain>
   1132  <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain>
   1133  <foo (at] dom.ain>; abc
   1134 
   1135 Test""")
   1136 
   1137     def test_splitter_split_on_punctuation_only_if_fws(self):
   1138         eq = self.ndiffAssertEqual
   1139         msg = Message()
   1140         msg['From'] = 'test (at] dom.ain'
   1141         msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
   1142             'they;arenotlegal;fold,points')
   1143         msg.set_payload('Test')
   1144         sfp = StringIO()
   1145         g = Generator(sfp)
   1146         g.flatten(msg)
   1147         # XXX the space after the header should not be there.
   1148         eq(sfp.getvalue(), """\
   1149 From: test (at] dom.ain
   1150 References:\x20
   1151  thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
   1152 
   1153 Test""")
   1154 
   1155     def test_no_split_long_header(self):
   1156         eq = self.ndiffAssertEqual
   1157         hstr = 'References: ' + 'x' * 80
   1158         h = Header(hstr)
   1159         # These come on two lines because Headers are really field value
   1160         # classes and don't really know about their field names.
   1161         eq(h.encode(), """\
   1162 References:
   1163  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
   1164         h = Header('x' * 80)
   1165         eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
   1166 
   1167     def test_splitting_multiple_long_lines(self):
   1168         eq = self.ndiffAssertEqual
   1169         hstr = """\
   1170 from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
   1171 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
   1172 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
   1173 """
   1174         h = Header(hstr, continuation_ws='\t')
   1175         eq(h.encode(), """\
   1176 from babylon.socal-raves.org (localhost [127.0.0.1]);
   1177  by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
   1178  for <mailman-admin (at] babylon.socal-raves.org>;
   1179  Sat, 2 Feb 2002 17:00:06 -0800 (PST)
   1180 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
   1181  by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
   1182  for <mailman-admin (at] babylon.socal-raves.org>;
   1183  Sat, 2 Feb 2002 17:00:06 -0800 (PST)
   1184 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
   1185  by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
   1186  for <mailman-admin (at] babylon.socal-raves.org>;
   1187  Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
   1188 
   1189     def test_splitting_first_line_only_is_long(self):
   1190         eq = self.ndiffAssertEqual
   1191         hstr = """\
   1192 from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
   1193 \tby kronos.mems-exchange.org with esmtp (Exim 4.05)
   1194 \tid 17k4h5-00034i-00
   1195 \tfor test (at] mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
   1196         h = Header(hstr, maxlinelen=78, header_name='Received',
   1197                    continuation_ws='\t')
   1198         eq(h.encode(), """\
   1199 from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
   1200  helo=cthulhu.gerg.ca)
   1201 \tby kronos.mems-exchange.org with esmtp (Exim 4.05)
   1202 \tid 17k4h5-00034i-00
   1203 \tfor test (at] mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
   1204 
   1205     def test_long_8bit_header(self):
   1206         eq = self.ndiffAssertEqual
   1207         msg = Message()
   1208         h = Header('Britische Regierung gibt', 'iso-8859-1',
   1209                     header_name='Subject')
   1210         h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
   1211         eq(h.encode(maxlinelen=76), """\
   1212 =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
   1213  =?iso-8859-1?q?hore-Windkraftprojekte?=""")
   1214         msg['Subject'] = h
   1215         eq(msg.as_string(maxheaderlen=76), """\
   1216 Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
   1217  =?iso-8859-1?q?hore-Windkraftprojekte?=
   1218 
   1219 """)
   1220         eq(msg.as_string(maxheaderlen=0), """\
   1221 Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
   1222 
   1223 """)
   1224 
   1225     def test_long_8bit_header_no_charset(self):
   1226         eq = self.ndiffAssertEqual
   1227         msg = Message()
   1228         header_string = ('Britische Regierung gibt gr\xfcnes Licht '
   1229                          'f\xfcr Offshore-Windkraftprojekte '
   1230                          '<a-very-long-address (at] example.com>')
   1231         msg['Reply-To'] = header_string
   1232         eq(msg.as_string(maxheaderlen=78), """\
   1233 Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
   1234  =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
   1235 
   1236 """)
   1237         msg = Message()
   1238         msg['Reply-To'] = Header(header_string,
   1239                                  header_name='Reply-To')
   1240         eq(msg.as_string(maxheaderlen=78), """\
   1241 Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
   1242  =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
   1243 
   1244 """)
   1245 
   1246     def test_long_to_header(self):
   1247         eq = self.ndiffAssertEqual
   1248         to = ('"Someone Test #A" <someone (at] eecs.umich.edu>,'
   1249               '<someone (at] eecs.umich.edu>, '
   1250               '"Someone Test #B" <someone (at] umich.edu>, '
   1251               '"Someone Test #C" <someone (at] eecs.umich.edu>, '
   1252               '"Someone Test #D" <someone (at] eecs.umich.edu>')
   1253         msg = Message()
   1254         msg['To'] = to
   1255         eq(msg.as_string(maxheaderlen=78), '''\
   1256 To: "Someone Test #A" <someone (at] eecs.umich.edu>,<someone (at] eecs.umich.edu>,
   1257  "Someone Test #B" <someone (at] umich.edu>,
   1258  "Someone Test #C" <someone (at] eecs.umich.edu>,
   1259  "Someone Test #D" <someone (at] eecs.umich.edu>
   1260 
   1261 ''')
   1262 
   1263     def test_long_line_after_append(self):
   1264         eq = self.ndiffAssertEqual
   1265         s = 'This is an example of string which has almost the limit of header length.'
   1266         h = Header(s)
   1267         h.append('Add another line.')
   1268         eq(h.encode(maxlinelen=76), """\
   1269 This is an example of string which has almost the limit of header length.
   1270  Add another line.""")
   1271 
   1272     def test_shorter_line_with_append(self):
   1273         eq = self.ndiffAssertEqual
   1274         s = 'This is a shorter line.'
   1275         h = Header(s)
   1276         h.append('Add another sentence. (Surprise?)')
   1277         eq(h.encode(),
   1278            'This is a shorter line. Add another sentence. (Surprise?)')
   1279 
   1280     def test_long_field_name(self):
   1281         eq = self.ndiffAssertEqual
   1282         fn = 'X-Very-Very-Very-Long-Header-Name'
   1283         gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
   1284               'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
   1285               'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
   1286               'bef\xf6rdert. ')
   1287         h = Header(gs, 'iso-8859-1', header_name=fn)
   1288         # BAW: this seems broken because the first line is too long
   1289         eq(h.encode(maxlinelen=76), """\
   1290 =?iso-8859-1?q?Die_Mieter_treten_hier_e?=
   1291  =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
   1292  =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
   1293  =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
   1294 
   1295     def test_long_received_header(self):
   1296         h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
   1297              'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
   1298              'Wed, 05 Mar 2003 18:10:18 -0700')
   1299         msg = Message()
   1300         msg['Received-1'] = Header(h, continuation_ws='\t')
   1301         msg['Received-2'] = h
   1302         # This should be splitting on spaces not semicolons.
   1303         self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
   1304 Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
   1305  hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
   1306  Wed, 05 Mar 2003 18:10:18 -0700
   1307 Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
   1308  hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
   1309  Wed, 05 Mar 2003 18:10:18 -0700
   1310 
   1311 """)
   1312 
   1313     def test_string_headerinst_eq(self):
   1314         h = ('<15975.17901.207240.414604 (at] sgigritzmann1.mathematik.'
   1315              'tu-muenchen.de> (David Bremner\'s message of '
   1316              '"Thu, 6 Mar 2003 13:58:21 +0100")')
   1317         msg = Message()
   1318         msg['Received-1'] = Header(h, header_name='Received-1',
   1319                                    continuation_ws='\t')
   1320         msg['Received-2'] = h
   1321         # XXX The space after the ':' should not be there.
   1322         self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
   1323 Received-1:\x20
   1324  <15975.17901.207240.414604 (at] sgigritzmann1.mathematik.tu-muenchen.de> (David
   1325  Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
   1326 Received-2:\x20
   1327  <15975.17901.207240.414604 (at] sgigritzmann1.mathematik.tu-muenchen.de> (David
   1328  Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
   1329 
   1330 """)
   1331 
   1332     def test_long_unbreakable_lines_with_continuation(self):
   1333         eq = self.ndiffAssertEqual
   1334         msg = Message()
   1335         t = """\
   1336 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
   1337  locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
   1338         msg['Face-1'] = t
   1339         msg['Face-2'] = Header(t, header_name='Face-2')
   1340         msg['Face-3'] = ' ' + t
   1341         # XXX This splitting is all wrong.  It the first value line should be
   1342         # snug against the field name or the space after the header not there.
   1343         eq(msg.as_string(maxheaderlen=78), """\
   1344 Face-1:\x20
   1345  iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
   1346  locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
   1347 Face-2:\x20
   1348  iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
   1349  locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
   1350 Face-3:\x20
   1351  iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
   1352  locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
   1353 
   1354 """)
   1355 
   1356     def test_another_long_multiline_header(self):
   1357         eq = self.ndiffAssertEqual
   1358         m = ('Received: from siimage.com '
   1359              '([172.25.1.3]) by zima.siliconimage.com with '
   1360              'Microsoft SMTPSVC(5.0.2195.4905); '
   1361              'Wed, 16 Oct 2002 07:41:11 -0700')
   1362         msg = email.message_from_string(m)
   1363         eq(msg.as_string(maxheaderlen=78), '''\
   1364 Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
   1365  Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
   1366 
   1367 ''')
   1368 
   1369     def test_long_lines_with_different_header(self):
   1370         eq = self.ndiffAssertEqual
   1371         h = ('List-Unsubscribe: '
   1372              '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
   1373              '        <mailto:spamassassin-talk-request (at] lists.sourceforge.net'
   1374              '?subject=unsubscribe>')
   1375         msg = Message()
   1376         msg['List'] = h
   1377         msg['List'] = Header(h, header_name='List')
   1378         eq(msg.as_string(maxheaderlen=78), """\
   1379 List: List-Unsubscribe:
   1380  <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
   1381         <mailto:spamassassin-talk-request (at] lists.sourceforge.net?subject=unsubscribe>
   1382 List: List-Unsubscribe:
   1383  <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
   1384         <mailto:spamassassin-talk-request (at] lists.sourceforge.net?subject=unsubscribe>
   1385 
   1386 """)
   1387 
   1388     def test_long_rfc2047_header_with_embedded_fws(self):
   1389         h = Header(textwrap.dedent("""\
   1390             We're going to pretend this header is in a non-ascii character set
   1391             \tto see if line wrapping with encoded words and embedded
   1392                folding white space works"""),
   1393                    charset='utf-8',
   1394                    header_name='Test')
   1395         self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
   1396             =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
   1397              =?utf-8?q?cter_set?=
   1398              =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
   1399              =?utf-8?q?_folding_white_space_works?=""")+'\n')
   1400 
   1401 
   1402 
   1403 # Test mangling of "From " lines in the body of a message
   1404 class TestFromMangling(unittest.TestCase):
   1405     def setUp(self):
   1406         self.msg = Message()
   1407         self.msg['From'] = 'aaa (at] bbb.org'
   1408         self.msg.set_payload("""\
   1409 From the desk of A.A.A.:
   1410 Blah blah blah
   1411 """)
   1412 
   1413     def test_mangled_from(self):
   1414         s = StringIO()
   1415         g = Generator(s, mangle_from_=True)
   1416         g.flatten(self.msg)
   1417         self.assertEqual(s.getvalue(), """\
   1418 From: aaa (at] bbb.org
   1419 
   1420 >From the desk of A.A.A.:
   1421 Blah blah blah
   1422 """)
   1423 
   1424     def test_dont_mangle_from(self):
   1425         s = StringIO()
   1426         g = Generator(s, mangle_from_=False)
   1427         g.flatten(self.msg)
   1428         self.assertEqual(s.getvalue(), """\
   1429 From: aaa (at] bbb.org
   1430 
   1431 From the desk of A.A.A.:
   1432 Blah blah blah
   1433 """)
   1434 
   1435     def test_mangle_from_in_preamble_and_epilog(self):
   1436         s = StringIO()
   1437         g = Generator(s, mangle_from_=True)
   1438         msg = email.message_from_string(textwrap.dedent("""\
   1439             From: foo (at] bar.com
   1440             Mime-Version: 1.0
   1441             Content-Type: multipart/mixed; boundary=XXX
   1442 
   1443             From somewhere unknown
   1444 
   1445             --XXX
   1446             Content-Type: text/plain
   1447 
   1448             foo
   1449 
   1450             --XXX--
   1451 
   1452             From somewhere unknowable
   1453             """))
   1454         g.flatten(msg)
   1455         self.assertEqual(len([1 for x in s.getvalue().split('\n')
   1456                                   if x.startswith('>From ')]), 2)
   1457 
   1458     def test_mangled_from_with_bad_bytes(self):
   1459         source = textwrap.dedent("""\
   1460             Content-Type: text/plain; charset="utf-8"
   1461             MIME-Version: 1.0
   1462             Content-Transfer-Encoding: 8bit
   1463             From: aaa (at] bbb.org
   1464 
   1465         """).encode('utf-8')
   1466         msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
   1467         b = BytesIO()
   1468         g = BytesGenerator(b, mangle_from_=True)
   1469         g.flatten(msg)
   1470         self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
   1471 
   1472 
   1473 # Test the basic MIMEAudio class
   1474 class TestMIMEAudio(unittest.TestCase):
   1475     def setUp(self):
   1476         with openfile('audiotest.au', 'rb') as fp:
   1477             self._audiodata = fp.read()
   1478         self._au = MIMEAudio(self._audiodata)
   1479 
   1480     def test_guess_minor_type(self):
   1481         self.assertEqual(self._au.get_content_type(), 'audio/basic')
   1482 
   1483     def test_encoding(self):
   1484         payload = self._au.get_payload()
   1485         self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
   1486                 self._audiodata)
   1487 
   1488     def test_checkSetMinor(self):
   1489         au = MIMEAudio(self._audiodata, 'fish')
   1490         self.assertEqual(au.get_content_type(), 'audio/fish')
   1491 
   1492     def test_add_header(self):
   1493         eq = self.assertEqual
   1494         self._au.add_header('Content-Disposition', 'attachment',
   1495                             filename='audiotest.au')
   1496         eq(self._au['content-disposition'],
   1497            'attachment; filename="audiotest.au"')
   1498         eq(self._au.get_params(header='content-disposition'),
   1499            [('attachment', ''), ('filename', 'audiotest.au')])
   1500         eq(self._au.get_param('filename', header='content-disposition'),
   1501            'audiotest.au')
   1502         missing = []
   1503         eq(self._au.get_param('attachment', header='content-disposition'), '')
   1504         self.assertIs(self._au.get_param('foo', failobj=missing,
   1505                                          header='content-disposition'), missing)
   1506         # Try some missing stuff
   1507         self.assertIs(self._au.get_param('foobar', missing), missing)
   1508         self.assertIs(self._au.get_param('attachment', missing,
   1509                                          header='foobar'), missing)
   1510 
   1511 
   1512 
   1513 # Test the basic MIMEImage class
   1514 class TestMIMEImage(unittest.TestCase):
   1515     def setUp(self):
   1516         with openfile('PyBanner048.gif', 'rb') as fp:
   1517             self._imgdata = fp.read()
   1518         self._im = MIMEImage(self._imgdata)
   1519 
   1520     def test_guess_minor_type(self):
   1521         self.assertEqual(self._im.get_content_type(), 'image/gif')
   1522 
   1523     def test_encoding(self):
   1524         payload = self._im.get_payload()
   1525         self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
   1526                 self._imgdata)
   1527 
   1528     def test_checkSetMinor(self):
   1529         im = MIMEImage(self._imgdata, 'fish')
   1530         self.assertEqual(im.get_content_type(), 'image/fish')
   1531 
   1532     def test_add_header(self):
   1533         eq = self.assertEqual
   1534         self._im.add_header('Content-Disposition', 'attachment',
   1535                             filename='dingusfish.gif')
   1536         eq(self._im['content-disposition'],
   1537            'attachment; filename="dingusfish.gif"')
   1538         eq(self._im.get_params(header='content-disposition'),
   1539            [('attachment', ''), ('filename', 'dingusfish.gif')])
   1540         eq(self._im.get_param('filename', header='content-disposition'),
   1541            'dingusfish.gif')
   1542         missing = []
   1543         eq(self._im.get_param('attachment', header='content-disposition'), '')
   1544         self.assertIs(self._im.get_param('foo', failobj=missing,
   1545                                          header='content-disposition'), missing)
   1546         # Try some missing stuff
   1547         self.assertIs(self._im.get_param('foobar', missing), missing)
   1548         self.assertIs(self._im.get_param('attachment', missing,
   1549                                          header='foobar'), missing)
   1550 
   1551 
   1552 
   1553 # Test the basic MIMEApplication class
   1554 class TestMIMEApplication(unittest.TestCase):
   1555     def test_headers(self):
   1556         eq = self.assertEqual
   1557         msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
   1558         eq(msg.get_content_type(), 'application/octet-stream')
   1559         eq(msg['content-transfer-encoding'], 'base64')
   1560 
   1561     def test_body(self):
   1562         eq = self.assertEqual
   1563         bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
   1564         msg = MIMEApplication(bytesdata)
   1565         # whitespace in the cte encoded block is RFC-irrelevant.
   1566         eq(msg.get_payload().strip(), '+vv8/f7/')
   1567         eq(msg.get_payload(decode=True), bytesdata)
   1568 
   1569     def test_binary_body_with_encode_7or8bit(self):
   1570         # Issue 17171.
   1571         bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
   1572         msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
   1573         # Treated as a string, this will be invalid code points.
   1574         self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
   1575         self.assertEqual(msg.get_payload(decode=True), bytesdata)
   1576         self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
   1577         s = BytesIO()
   1578         g = BytesGenerator(s)
   1579         g.flatten(msg)
   1580         wireform = s.getvalue()
   1581         msg2 = email.message_from_bytes(wireform)
   1582         self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
   1583         self.assertEqual(msg2.get_payload(decode=True), bytesdata)
   1584         self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
   1585 
   1586     def test_binary_body_with_encode_noop(self):
   1587         # Issue 16564: This does not produce an RFC valid message, since to be
   1588         # valid it should have a CTE of binary.  But the below works in
   1589         # Python2, and is documented as working this way.
   1590         bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
   1591         msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
   1592         # Treated as a string, this will be invalid code points.
   1593         self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
   1594         self.assertEqual(msg.get_payload(decode=True), bytesdata)
   1595         s = BytesIO()
   1596         g = BytesGenerator(s)
   1597         g.flatten(msg)
   1598         wireform = s.getvalue()
   1599         msg2 = email.message_from_bytes(wireform)
   1600         self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
   1601         self.assertEqual(msg2.get_payload(decode=True), bytesdata)
   1602 
   1603     def test_binary_body_with_unicode_linend_encode_noop(self):
   1604         # Issue 19003: This is a variation on #16564.
   1605         bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
   1606         msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
   1607         self.assertEqual(msg.get_payload(decode=True), bytesdata)
   1608         s = BytesIO()
   1609         g = BytesGenerator(s)
   1610         g.flatten(msg)
   1611         wireform = s.getvalue()
   1612         msg2 = email.message_from_bytes(wireform)
   1613         self.assertEqual(msg2.get_payload(decode=True), bytesdata)
   1614 
   1615     def test_binary_body_with_encode_quopri(self):
   1616         # Issue 14360.
   1617         bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
   1618         msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
   1619         self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
   1620         self.assertEqual(msg.get_payload(decode=True), bytesdata)
   1621         self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
   1622         s = BytesIO()
   1623         g = BytesGenerator(s)
   1624         g.flatten(msg)
   1625         wireform = s.getvalue()
   1626         msg2 = email.message_from_bytes(wireform)
   1627         self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
   1628         self.assertEqual(msg2.get_payload(decode=True), bytesdata)
   1629         self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
   1630 
   1631     def test_binary_body_with_encode_base64(self):
   1632         bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
   1633         msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
   1634         self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
   1635         self.assertEqual(msg.get_payload(decode=True), bytesdata)
   1636         s = BytesIO()
   1637         g = BytesGenerator(s)
   1638         g.flatten(msg)
   1639         wireform = s.getvalue()
   1640         msg2 = email.message_from_bytes(wireform)
   1641         self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
   1642         self.assertEqual(msg2.get_payload(decode=True), bytesdata)
   1643 
   1644 
   1645 # Test the basic MIMEText class
   1646 class TestMIMEText(unittest.TestCase):
   1647     def setUp(self):
   1648         self._msg = MIMEText('hello there')
   1649 
   1650     def test_types(self):
   1651         eq = self.assertEqual
   1652         eq(self._msg.get_content_type(), 'text/plain')
   1653         eq(self._msg.get_param('charset'), 'us-ascii')
   1654         missing = []
   1655         self.assertIs(self._msg.get_param('foobar', missing), missing)
   1656         self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
   1657                       missing)
   1658 
   1659     def test_payload(self):
   1660         self.assertEqual(self._msg.get_payload(), 'hello there')
   1661         self.assertFalse(self._msg.is_multipart())
   1662 
   1663     def test_charset(self):
   1664         eq = self.assertEqual
   1665         msg = MIMEText('hello there', _charset='us-ascii')
   1666         eq(msg.get_charset().input_charset, 'us-ascii')
   1667         eq(msg['content-type'], 'text/plain; charset="us-ascii"')
   1668         # Also accept a Charset instance
   1669         charset = Charset('utf-8')
   1670         charset.body_encoding = None
   1671         msg = MIMEText('hello there', _charset=charset)
   1672         eq(msg.get_charset().input_charset, 'utf-8')
   1673         eq(msg['content-type'], 'text/plain; charset="utf-8"')
   1674         eq(msg.get_payload(), 'hello there')
   1675 
   1676     def test_7bit_input(self):
   1677         eq = self.assertEqual
   1678         msg = MIMEText('hello there', _charset='us-ascii')
   1679         eq(msg.get_charset().input_charset, 'us-ascii')
   1680         eq(msg['content-type'], 'text/plain; charset="us-ascii"')
   1681 
   1682     def test_7bit_input_no_charset(self):
   1683         eq = self.assertEqual
   1684         msg = MIMEText('hello there')
   1685         eq(msg.get_charset(), 'us-ascii')
   1686         eq(msg['content-type'], 'text/plain; charset="us-ascii"')
   1687         self.assertIn('hello there', msg.as_string())
   1688 
   1689     def test_utf8_input(self):
   1690         teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
   1691         eq = self.assertEqual
   1692         msg = MIMEText(teststr, _charset='utf-8')
   1693         eq(msg.get_charset().output_charset, 'utf-8')
   1694         eq(msg['content-type'], 'text/plain; charset="utf-8"')
   1695         eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
   1696 
   1697     @unittest.skip("can't fix because of backward compat in email5, "
   1698         "will fix in email6")
   1699     def test_utf8_input_no_charset(self):
   1700         teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
   1701         self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
   1702 
   1703 
   1704 
   1705 # Test complicated multipart/* messages
   1706 class TestMultipart(TestEmailBase):
   1707     def setUp(self):
   1708         with openfile('PyBanner048.gif', 'rb') as fp:
   1709             data = fp.read()
   1710         container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
   1711         image = MIMEImage(data, name='dingusfish.gif')
   1712         image.add_header('content-disposition', 'attachment',
   1713                          filename='dingusfish.gif')
   1714         intro = MIMEText('''\
   1715 Hi there,
   1716 
   1717 This is the dingus fish.
   1718 ''')
   1719         container.attach(intro)
   1720         container.attach(image)
   1721         container['From'] = 'Barry <barry (at] digicool.com>'
   1722         container['To'] = 'Dingus Lovers <cravindogs (at] cravindogs.com>'
   1723         container['Subject'] = 'Here is your dingus fish'
   1724 
   1725         now = 987809702.54848599
   1726         timetuple = time.localtime(now)
   1727         if timetuple[-1] == 0:
   1728             tzsecs = time.timezone
   1729         else:
   1730             tzsecs = time.altzone
   1731         if tzsecs > 0:
   1732             sign = '-'
   1733         else:
   1734             sign = '+'
   1735         tzoffset = ' %s%04d' % (sign, tzsecs / 36)
   1736         container['Date'] = time.strftime(
   1737             '%a, %d %b %Y %H:%M:%S',
   1738             time.localtime(now)) + tzoffset
   1739         self._msg = container
   1740         self._im = image
   1741         self._txt = intro
   1742 
   1743     def test_hierarchy(self):
   1744         # convenience
   1745         eq = self.assertEqual
   1746         raises = self.assertRaises
   1747         # tests
   1748         m = self._msg
   1749         self.assertTrue(m.is_multipart())
   1750         eq(m.get_content_type(), 'multipart/mixed')
   1751         eq(len(m.get_payload()), 2)
   1752         raises(IndexError, m.get_payload, 2)
   1753         m0 = m.get_payload(0)
   1754         m1 = m.get_payload(1)
   1755         self.assertIs(m0, self._txt)
   1756         self.assertIs(m1, self._im)
   1757         eq(m.get_payload(), [m0, m1])
   1758         self.assertFalse(m0.is_multipart())
   1759         self.assertFalse(m1.is_multipart())
   1760 
   1761     def test_empty_multipart_idempotent(self):
   1762         text = """\
   1763 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1764 MIME-Version: 1.0
   1765 Subject: A subject
   1766 To: aperson (at] dom.ain
   1767 From: bperson (at] dom.ain
   1768 
   1769 
   1770 --BOUNDARY
   1771 
   1772 
   1773 --BOUNDARY--
   1774 """
   1775         msg = Parser().parsestr(text)
   1776         self.ndiffAssertEqual(text, msg.as_string())
   1777 
   1778     def test_no_parts_in_a_multipart_with_none_epilogue(self):
   1779         outer = MIMEBase('multipart', 'mixed')
   1780         outer['Subject'] = 'A subject'
   1781         outer['To'] = 'aperson (at] dom.ain'
   1782         outer['From'] = 'bperson (at] dom.ain'
   1783         outer.set_boundary('BOUNDARY')
   1784         self.ndiffAssertEqual(outer.as_string(), '''\
   1785 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1786 MIME-Version: 1.0
   1787 Subject: A subject
   1788 To: aperson (at] dom.ain
   1789 From: bperson (at] dom.ain
   1790 
   1791 --BOUNDARY
   1792 
   1793 --BOUNDARY--
   1794 ''')
   1795 
   1796     def test_no_parts_in_a_multipart_with_empty_epilogue(self):
   1797         outer = MIMEBase('multipart', 'mixed')
   1798         outer['Subject'] = 'A subject'
   1799         outer['To'] = 'aperson (at] dom.ain'
   1800         outer['From'] = 'bperson (at] dom.ain'
   1801         outer.preamble = ''
   1802         outer.epilogue = ''
   1803         outer.set_boundary('BOUNDARY')
   1804         self.ndiffAssertEqual(outer.as_string(), '''\
   1805 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1806 MIME-Version: 1.0
   1807 Subject: A subject
   1808 To: aperson (at] dom.ain
   1809 From: bperson (at] dom.ain
   1810 
   1811 
   1812 --BOUNDARY
   1813 
   1814 --BOUNDARY--
   1815 ''')
   1816 
   1817     def test_one_part_in_a_multipart(self):
   1818         eq = self.ndiffAssertEqual
   1819         outer = MIMEBase('multipart', 'mixed')
   1820         outer['Subject'] = 'A subject'
   1821         outer['To'] = 'aperson (at] dom.ain'
   1822         outer['From'] = 'bperson (at] dom.ain'
   1823         outer.set_boundary('BOUNDARY')
   1824         msg = MIMEText('hello world')
   1825         outer.attach(msg)
   1826         eq(outer.as_string(), '''\
   1827 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1828 MIME-Version: 1.0
   1829 Subject: A subject
   1830 To: aperson (at] dom.ain
   1831 From: bperson (at] dom.ain
   1832 
   1833 --BOUNDARY
   1834 Content-Type: text/plain; charset="us-ascii"
   1835 MIME-Version: 1.0
   1836 Content-Transfer-Encoding: 7bit
   1837 
   1838 hello world
   1839 --BOUNDARY--
   1840 ''')
   1841 
   1842     def test_seq_parts_in_a_multipart_with_empty_preamble(self):
   1843         eq = self.ndiffAssertEqual
   1844         outer = MIMEBase('multipart', 'mixed')
   1845         outer['Subject'] = 'A subject'
   1846         outer['To'] = 'aperson (at] dom.ain'
   1847         outer['From'] = 'bperson (at] dom.ain'
   1848         outer.preamble = ''
   1849         msg = MIMEText('hello world')
   1850         outer.attach(msg)
   1851         outer.set_boundary('BOUNDARY')
   1852         eq(outer.as_string(), '''\
   1853 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1854 MIME-Version: 1.0
   1855 Subject: A subject
   1856 To: aperson (at] dom.ain
   1857 From: bperson (at] dom.ain
   1858 
   1859 
   1860 --BOUNDARY
   1861 Content-Type: text/plain; charset="us-ascii"
   1862 MIME-Version: 1.0
   1863 Content-Transfer-Encoding: 7bit
   1864 
   1865 hello world
   1866 --BOUNDARY--
   1867 ''')
   1868 
   1869 
   1870     def test_seq_parts_in_a_multipart_with_none_preamble(self):
   1871         eq = self.ndiffAssertEqual
   1872         outer = MIMEBase('multipart', 'mixed')
   1873         outer['Subject'] = 'A subject'
   1874         outer['To'] = 'aperson (at] dom.ain'
   1875         outer['From'] = 'bperson (at] dom.ain'
   1876         outer.preamble = None
   1877         msg = MIMEText('hello world')
   1878         outer.attach(msg)
   1879         outer.set_boundary('BOUNDARY')
   1880         eq(outer.as_string(), '''\
   1881 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1882 MIME-Version: 1.0
   1883 Subject: A subject
   1884 To: aperson (at] dom.ain
   1885 From: bperson (at] dom.ain
   1886 
   1887 --BOUNDARY
   1888 Content-Type: text/plain; charset="us-ascii"
   1889 MIME-Version: 1.0
   1890 Content-Transfer-Encoding: 7bit
   1891 
   1892 hello world
   1893 --BOUNDARY--
   1894 ''')
   1895 
   1896 
   1897     def test_seq_parts_in_a_multipart_with_none_epilogue(self):
   1898         eq = self.ndiffAssertEqual
   1899         outer = MIMEBase('multipart', 'mixed')
   1900         outer['Subject'] = 'A subject'
   1901         outer['To'] = 'aperson (at] dom.ain'
   1902         outer['From'] = 'bperson (at] dom.ain'
   1903         outer.epilogue = None
   1904         msg = MIMEText('hello world')
   1905         outer.attach(msg)
   1906         outer.set_boundary('BOUNDARY')
   1907         eq(outer.as_string(), '''\
   1908 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1909 MIME-Version: 1.0
   1910 Subject: A subject
   1911 To: aperson (at] dom.ain
   1912 From: bperson (at] dom.ain
   1913 
   1914 --BOUNDARY
   1915 Content-Type: text/plain; charset="us-ascii"
   1916 MIME-Version: 1.0
   1917 Content-Transfer-Encoding: 7bit
   1918 
   1919 hello world
   1920 --BOUNDARY--
   1921 ''')
   1922 
   1923 
   1924     def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
   1925         eq = self.ndiffAssertEqual
   1926         outer = MIMEBase('multipart', 'mixed')
   1927         outer['Subject'] = 'A subject'
   1928         outer['To'] = 'aperson (at] dom.ain'
   1929         outer['From'] = 'bperson (at] dom.ain'
   1930         outer.epilogue = ''
   1931         msg = MIMEText('hello world')
   1932         outer.attach(msg)
   1933         outer.set_boundary('BOUNDARY')
   1934         eq(outer.as_string(), '''\
   1935 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1936 MIME-Version: 1.0
   1937 Subject: A subject
   1938 To: aperson (at] dom.ain
   1939 From: bperson (at] dom.ain
   1940 
   1941 --BOUNDARY
   1942 Content-Type: text/plain; charset="us-ascii"
   1943 MIME-Version: 1.0
   1944 Content-Transfer-Encoding: 7bit
   1945 
   1946 hello world
   1947 --BOUNDARY--
   1948 ''')
   1949 
   1950 
   1951     def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
   1952         eq = self.ndiffAssertEqual
   1953         outer = MIMEBase('multipart', 'mixed')
   1954         outer['Subject'] = 'A subject'
   1955         outer['To'] = 'aperson (at] dom.ain'
   1956         outer['From'] = 'bperson (at] dom.ain'
   1957         outer.epilogue = '\n'
   1958         msg = MIMEText('hello world')
   1959         outer.attach(msg)
   1960         outer.set_boundary('BOUNDARY')
   1961         eq(outer.as_string(), '''\
   1962 Content-Type: multipart/mixed; boundary="BOUNDARY"
   1963 MIME-Version: 1.0
   1964 Subject: A subject
   1965 To: aperson (at] dom.ain
   1966 From: bperson (at] dom.ain
   1967 
   1968 --BOUNDARY
   1969 Content-Type: text/plain; charset="us-ascii"
   1970 MIME-Version: 1.0
   1971 Content-Transfer-Encoding: 7bit
   1972 
   1973 hello world
   1974 --BOUNDARY--
   1975 
   1976 ''')
   1977 
   1978     def test_message_external_body(self):
   1979         eq = self.assertEqual
   1980         msg = self._msgobj('msg_36.txt')
   1981         eq(len(msg.get_payload()), 2)
   1982         msg1 = msg.get_payload(1)
   1983         eq(msg1.get_content_type(), 'multipart/alternative')
   1984         eq(len(msg1.get_payload()), 2)
   1985         for subpart in msg1.get_payload():
   1986             eq(subpart.get_content_type(), 'message/external-body')
   1987             eq(len(subpart.get_payload()), 1)
   1988             subsubpart = subpart.get_payload(0)
   1989             eq(subsubpart.get_content_type(), 'text/plain')
   1990 
   1991     def test_double_boundary(self):
   1992         # msg_37.txt is a multipart that contains two dash-boundary's in a
   1993         # row.  Our interpretation of RFC 2046 calls for ignoring the second
   1994         # and subsequent boundaries.
   1995         msg = self._msgobj('msg_37.txt')
   1996         self.assertEqual(len(msg.get_payload()), 3)
   1997 
   1998     def test_nested_inner_contains_outer_boundary(self):
   1999         eq = self.ndiffAssertEqual
   2000         # msg_38.txt has an inner part that contains outer boundaries.  My
   2001         # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
   2002         # these are illegal and should be interpreted as unterminated inner
   2003         # parts.
   2004         msg = self._msgobj('msg_38.txt')
   2005         sfp = StringIO()
   2006         iterators._structure(msg, sfp)
   2007         eq(sfp.getvalue(), """\
   2008 multipart/mixed
   2009     multipart/mixed
   2010         multipart/alternative
   2011             text/plain
   2012         text/plain
   2013     text/plain
   2014     text/plain
   2015 """)
   2016 
   2017     def test_nested_with_same_boundary(self):
   2018         eq = self.ndiffAssertEqual
   2019         # msg 39.txt is similarly evil in that it's got inner parts that use
   2020         # the same boundary as outer parts.  Again, I believe the way this is
   2021         # parsed is closest to the spirit of RFC 2046
   2022         msg = self._msgobj('msg_39.txt')
   2023         sfp = StringIO()
   2024         iterators._structure(msg, sfp)
   2025         eq(sfp.getvalue(), """\
   2026 multipart/mixed
   2027     multipart/mixed
   2028         multipart/alternative
   2029         application/octet-stream
   2030         application/octet-stream
   2031     text/plain
   2032 """)
   2033 
   2034     def test_boundary_in_non_multipart(self):
   2035         msg = self._msgobj('msg_40.txt')
   2036         self.assertEqual(msg.as_string(), '''\
   2037 MIME-Version: 1.0
   2038 Content-Type: text/html; boundary="--961284236552522269"
   2039 
   2040 ----961284236552522269
   2041 Content-Type: text/html;
   2042 Content-Transfer-Encoding: 7Bit
   2043 
   2044 <html></html>
   2045 
   2046 ----961284236552522269--
   2047 ''')
   2048 
   2049     def test_boundary_with_leading_space(self):
   2050         eq = self.assertEqual
   2051         msg = email.message_from_string('''\
   2052 MIME-Version: 1.0
   2053 Content-Type: multipart/mixed; boundary="    XXXX"
   2054 
   2055 --    XXXX
   2056 Content-Type: text/plain
   2057 
   2058 
   2059 --    XXXX
   2060 Content-Type: text/plain
   2061 
   2062 --    XXXX--
   2063 ''')
   2064         self.assertTrue(msg.is_multipart())
   2065         eq(msg.get_boundary(), '    XXXX')
   2066         eq(len(msg.get_payload()), 2)
   2067 
   2068     def test_boundary_without_trailing_newline(self):
   2069         m = Parser().parsestr("""\
   2070 Content-Type: multipart/mixed; boundary="===============0012394164=="
   2071 MIME-Version: 1.0
   2072 
   2073 --===============0012394164==
   2074 Content-Type: image/file1.jpg
   2075 MIME-Version: 1.0
   2076 Content-Transfer-Encoding: base64
   2077 
   2078 YXNkZg==
   2079 --===============0012394164==--""")
   2080         self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
   2081 
   2082     def test_mimebase_default_policy(self):
   2083         m = MIMEBase('multipart', 'mixed')
   2084         self.assertIs(m.policy, email.policy.compat32)
   2085 
   2086     def test_mimebase_custom_policy(self):
   2087         m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
   2088         self.assertIs(m.policy, email.policy.default)
   2089 
   2090 # Test some badly formatted messages
   2091 class TestNonConformant(TestEmailBase):
   2092 
   2093     def test_parse_missing_minor_type(self):
   2094         eq = self.assertEqual
   2095         msg = self._msgobj('msg_14.txt')
   2096         eq(msg.get_content_type(), 'text/plain')
   2097         eq(msg.get_content_maintype(), 'text')
   2098         eq(msg.get_content_subtype(), 'plain')
   2099 
   2100     # test_defect_handling
   2101     def test_same_boundary_inner_outer(self):
   2102         msg = self._msgobj('msg_15.txt')
   2103         # XXX We can probably eventually do better
   2104         inner = msg.get_payload(0)
   2105         self.assertTrue(hasattr(inner, 'defects'))
   2106         self.assertEqual(len(inner.defects), 1)
   2107         self.assertIsInstance(inner.defects[0],
   2108                               errors.StartBoundaryNotFoundDefect)
   2109 
   2110     # test_defect_handling
   2111     def test_multipart_no_boundary(self):
   2112         msg = self._msgobj('msg_25.txt')
   2113         self.assertIsInstance(msg.get_payload(), str)
   2114         self.assertEqual(len(msg.defects), 2)
   2115         self.assertIsInstance(msg.defects[0],
   2116                               errors.NoBoundaryInMultipartDefect)
   2117         self.assertIsInstance(msg.defects[1],
   2118                               errors.MultipartInvariantViolationDefect)
   2119 
   2120     multipart_msg = textwrap.dedent("""\
   2121         Date: Wed, 14 Nov 2007 12:56:23 GMT
   2122         From: foo (at] bar.invalid
   2123         To: foo (at] bar.invalid
   2124         Subject: Content-Transfer-Encoding: base64 and multipart
   2125         MIME-Version: 1.0
   2126         Content-Type: multipart/mixed;
   2127             boundary="===============3344438784458119861=="{}
   2128 
   2129         --===============3344438784458119861==
   2130         Content-Type: text/plain
   2131 
   2132         Test message
   2133 
   2134         --===============3344438784458119861==
   2135         Content-Type: application/octet-stream
   2136         Content-Transfer-Encoding: base64
   2137 
   2138         YWJj
   2139 
   2140         --===============3344438784458119861==--
   2141         """)
   2142 
   2143     # test_defect_handling
   2144     def test_multipart_invalid_cte(self):
   2145         msg = self._str_msg(
   2146             self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
   2147         self.assertEqual(len(msg.defects), 1)
   2148         self.assertIsInstance(msg.defects[0],
   2149             errors.InvalidMultipartContentTransferEncodingDefect)
   2150 
   2151     # test_defect_handling
   2152     def test_multipart_no_cte_no_defect(self):
   2153         msg = self._str_msg(self.multipart_msg.format(''))
   2154         self.assertEqual(len(msg.defects), 0)
   2155 
   2156     # test_defect_handling
   2157     def test_multipart_valid_cte_no_defect(self):
   2158         for cte in ('7bit', '8bit', 'BINary'):
   2159             msg = self._str_msg(
   2160                 self.multipart_msg.format(
   2161                     "\nContent-Transfer-Encoding: {}".format(cte)))
   2162             self.assertEqual(len(msg.defects), 0)
   2163 
   2164     # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
   2165     def test_invalid_content_type(self):
   2166         eq = self.assertEqual
   2167         neq = self.ndiffAssertEqual
   2168         msg = Message()
   2169         # RFC 2045, $5.2 says invalid yields text/plain
   2170         msg['Content-Type'] = 'text'
   2171         eq(msg.get_content_maintype(), 'text')
   2172         eq(msg.get_content_subtype(), 'plain')
   2173         eq(msg.get_content_type(), 'text/plain')
   2174         # Clear the old value and try something /really/ invalid
   2175         del msg['content-type']
   2176         msg['Content-Type'] = 'foo'
   2177         eq(msg.get_content_maintype(), 'text')
   2178         eq(msg.get_content_subtype(), 'plain')
   2179         eq(msg.get_content_type(), 'text/plain')
   2180         # Still, make sure that the message is idempotently generated
   2181         s = StringIO()
   2182         g = Generator(s)
   2183         g.flatten(msg)
   2184         neq(s.getvalue(), 'Content-Type: foo\n\n')
   2185 
   2186     def test_no_start_boundary(self):
   2187         eq = self.ndiffAssertEqual
   2188         msg = self._msgobj('msg_31.txt')
   2189         eq(msg.get_payload(), """\
   2190 --BOUNDARY
   2191 Content-Type: text/plain
   2192 
   2193 message 1
   2194 
   2195 --BOUNDARY
   2196 Content-Type: text/plain
   2197 
   2198 message 2
   2199 
   2200 --BOUNDARY--
   2201 """)
   2202 
   2203     def test_no_separating_blank_line(self):
   2204         eq = self.ndiffAssertEqual
   2205         msg = self._msgobj('msg_35.txt')
   2206         eq(msg.as_string(), """\
   2207 From: aperson (at] dom.ain
   2208 To: bperson (at] dom.ain
   2209 Subject: here's something interesting
   2210 
   2211 counter to RFC 2822, there's no separating newline here
   2212 """)
   2213 
   2214     # test_defect_handling
   2215     def test_lying_multipart(self):
   2216         msg = self._msgobj('msg_41.txt')
   2217         self.assertTrue(hasattr(msg, 'defects'))
   2218         self.assertEqual(len(msg.defects), 2)
   2219         self.assertIsInstance(msg.defects[0],
   2220                               errors.NoBoundaryInMultipartDefect)
   2221         self.assertIsInstance(msg.defects[1],
   2222                               errors.MultipartInvariantViolationDefect)
   2223 
   2224     # test_defect_handling
   2225     def test_missing_start_boundary(self):
   2226         outer = self._msgobj('msg_42.txt')
   2227         # The message structure is:
   2228         #
   2229         # multipart/mixed
   2230         #    text/plain
   2231         #    message/rfc822
   2232         #        multipart/mixed [*]
   2233         #
   2234         # [*] This message is missing its start boundary
   2235         bad = outer.get_payload(1).get_payload(0)
   2236         self.assertEqual(len(bad.defects), 1)
   2237         self.assertIsInstance(bad.defects[0],
   2238                               errors.StartBoundaryNotFoundDefect)
   2239 
   2240     # test_defect_handling
   2241     def test_first_line_is_continuation_header(self):
   2242         eq = self.assertEqual
   2243         m = ' Line 1\nSubject: test\n\nbody'
   2244         msg = email.message_from_string(m)
   2245         eq(msg.keys(), ['Subject'])
   2246         eq(msg.get_payload(), 'body')
   2247         eq(len(msg.defects), 1)
   2248         self.assertDefectsEqual(msg.defects,
   2249                                  [errors.FirstHeaderLineIsContinuationDefect])
   2250         eq(msg.defects[0].line, ' Line 1\n')
   2251 
   2252     # test_defect_handling
   2253     def test_missing_header_body_separator(self):
   2254         # Our heuristic if we see a line that doesn't look like a header (no
   2255         # leading whitespace but no ':') is to assume that the blank line that
   2256         # separates the header from the body is missing, and to stop parsing
   2257         # headers and start parsing the body.
   2258         msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
   2259         self.assertEqual(msg.keys(), ['Subject'])
   2260         self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
   2261         self.assertDefectsEqual(msg.defects,
   2262                                 [errors.MissingHeaderBodySeparatorDefect])
   2263 
   2264 
   2265 # Test RFC 2047 header encoding and decoding
   2266 class TestRFC2047(TestEmailBase):
   2267     def test_rfc2047_multiline(self):
   2268         eq = self.assertEqual
   2269         s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
   2270  foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
   2271         dh = decode_header(s)
   2272         eq(dh, [
   2273             (b'Re: ', None),
   2274             (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
   2275             (b' baz foo bar ', None),
   2276             (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
   2277         header = make_header(dh)
   2278         eq(str(header),
   2279            'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
   2280         self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
   2281 Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
   2282  =?mac-iceland?q?=9Arg=8Cs?=""")
   2283 
   2284     def test_whitespace_keeper_unicode(self):
   2285         eq = self.assertEqual
   2286         s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard (at] dom.ain>'
   2287         dh = decode_header(s)
   2288         eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
   2289                 (b' Pirard <pirard (at] dom.ain>', None)])
   2290         header = str(make_header(dh))
   2291         eq(header, 'Andr\xe9 Pirard <pirard (at] dom.ain>')
   2292 
   2293     def test_whitespace_keeper_unicode_2(self):
   2294         eq = self.assertEqual
   2295         s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
   2296         dh = decode_header(s)
   2297         eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
   2298                 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
   2299         hu = str(make_header(dh))
   2300         eq(hu, 'The quick brown fox jumped over the lazy dog')
   2301 
   2302     def test_rfc2047_missing_whitespace(self):
   2303         s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
   2304         dh = decode_header(s)
   2305         self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
   2306                               (b'rg', None), (b'\xe5', 'iso-8859-1'),
   2307                               (b'sbord', None)])
   2308 
   2309     def test_rfc2047_with_whitespace(self):
   2310         s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
   2311         dh = decode_header(s)
   2312         self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
   2313                               (b' rg ', None), (b'\xe5', 'iso-8859-1'),
   2314                               (b' sbord', None)])
   2315 
   2316     def test_rfc2047_B_bad_padding(self):
   2317         s = '=?iso-8859-1?B?%s?='
   2318         data = [                                # only test complete bytes
   2319             ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
   2320             ('dmk=', b'vi'), ('dmk', b'vi')
   2321           ]
   2322         for q, a in data:
   2323             dh = decode_header(s % q)
   2324             self.assertEqual(dh, [(a, 'iso-8859-1')])
   2325 
   2326     def test_rfc2047_Q_invalid_digits(self):
   2327         # issue 10004.
   2328         s = '=?iso-8859-1?Q?andr=e9=zz?='
   2329         self.assertEqual(decode_header(s),
   2330                         [(b'andr\xe9=zz', 'iso-8859-1')])
   2331 
   2332     def test_rfc2047_rfc2047_1(self):
   2333         # 1st testcase at end of rfc2047
   2334         s = '(=?ISO-8859-1?Q?a?=)'
   2335         self.assertEqual(decode_header(s),
   2336             [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
   2337 
   2338     def test_rfc2047_rfc2047_2(self):
   2339         # 2nd testcase at end of rfc2047
   2340         s = '(=?ISO-8859-1?Q?a?= b)'
   2341         self.assertEqual(decode_header(s),
   2342             [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
   2343 
   2344     def test_rfc2047_rfc2047_3(self):
   2345         # 3rd testcase at end of rfc2047
   2346         s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
   2347         self.assertEqual(decode_header(s),
   2348             [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
   2349 
   2350     def test_rfc2047_rfc2047_4(self):
   2351         # 4th testcase at end of rfc2047
   2352         s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
   2353         self.assertEqual(decode_header(s),
   2354             [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
   2355 
   2356     def test_rfc2047_rfc2047_5a(self):
   2357         # 5th testcase at end of rfc2047 newline is \r\n
   2358         s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
   2359         self.assertEqual(decode_header(s),
   2360             [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
   2361 
   2362     def test_rfc2047_rfc2047_5b(self):
   2363         # 5th testcase at end of rfc2047 newline is \n
   2364         s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
   2365         self.assertEqual(decode_header(s),
   2366             [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
   2367 
   2368     def test_rfc2047_rfc2047_6(self):
   2369         # 6th testcase at end of rfc2047
   2370         s = '(=?ISO-8859-1?Q?a_b?=)'
   2371         self.assertEqual(decode_header(s),
   2372             [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
   2373 
   2374     def test_rfc2047_rfc2047_7(self):
   2375         # 7th testcase at end of rfc2047
   2376         s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
   2377         self.assertEqual(decode_header(s),
   2378             [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
   2379              (b')', None)])
   2380         self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
   2381         self.assertEqual(str(make_header(decode_header(s))), '(a b)')
   2382 
   2383     def test_multiline_header(self):
   2384         s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller (at] xxx.com>'
   2385         self.assertEqual(decode_header(s),
   2386             [(b'"M\xfcller T"', 'windows-1252'),
   2387              (b'<T.Mueller (at] xxx.com>', None)])
   2388         self.assertEqual(make_header(decode_header(s)).encode(),
   2389                          ''.join(s.splitlines()))
   2390         self.assertEqual(str(make_header(decode_header(s))),
   2391                          '"Mller T" <T.Mueller (at] xxx.com>')
   2392 
   2393 
   2394 # Test the MIMEMessage class
   2395 class TestMIMEMessage(TestEmailBase):
   2396     def setUp(self):
   2397         with openfile('msg_11.txt') as fp:
   2398             self._text = fp.read()
   2399 
   2400     def test_type_error(self):
   2401         self.assertRaises(TypeError, MIMEMessage, 'a plain string')
   2402 
   2403     def test_valid_argument(self):
   2404         eq = self.assertEqual
   2405         subject = 'A sub-message'
   2406         m = Message()
   2407         m['Subject'] = subject
   2408         r = MIMEMessage(m)
   2409         eq(r.get_content_type(), 'message/rfc822')
   2410         payload = r.get_payload()
   2411         self.assertIsInstance(payload, list)
   2412         eq(len(payload), 1)
   2413         subpart = payload[0]
   2414         self.assertIs(subpart, m)
   2415         eq(subpart['subject'], subject)
   2416 
   2417     def test_bad_multipart(self):
   2418         msg1 = Message()
   2419         msg1['Subject'] = 'subpart 1'
   2420         msg2 = Message()
   2421         msg2['Subject'] = 'subpart 2'
   2422         r = MIMEMessage(msg1)
   2423         self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
   2424 
   2425     def test_generate(self):
   2426         # First craft the message to be encapsulated
   2427         m = Message()
   2428         m['Subject'] = 'An enclosed message'
   2429         m.set_payload('Here is the body of the message.\n')
   2430         r = MIMEMessage(m)
   2431         r['Subject'] = 'The enclosing message'
   2432         s = StringIO()
   2433         g = Generator(s)
   2434         g.flatten(r)
   2435         self.assertEqual(s.getvalue(), """\
   2436 Content-Type: message/rfc822
   2437 MIME-Version: 1.0
   2438 Subject: The enclosing message
   2439 
   2440 Subject: An enclosed message
   2441 
   2442 Here is the body of the message.
   2443 """)
   2444 
   2445     def test_parse_message_rfc822(self):
   2446         eq = self.assertEqual
   2447         msg = self._msgobj('msg_11.txt')
   2448         eq(msg.get_content_type(), 'message/rfc822')
   2449         payload = msg.get_payload()
   2450         self.assertIsInstance(payload, list)
   2451         eq(len(payload), 1)
   2452         submsg = payload[0]
   2453         self.assertIsInstance(submsg, Message)
   2454         eq(submsg['subject'], 'An enclosed message')
   2455         eq(submsg.get_payload(), 'Here is the body of the message.\n')
   2456 
   2457     def test_dsn(self):
   2458         eq = self.assertEqual
   2459         # msg 16 is a Delivery Status Notification, see RFC 1894
   2460         msg = self._msgobj('msg_16.txt')
   2461         eq(msg.get_content_type(), 'multipart/report')
   2462         self.assertTrue(msg.is_multipart())
   2463         eq(len(msg.get_payload()), 3)
   2464         # Subpart 1 is a text/plain, human readable section
   2465         subpart = msg.get_payload(0)
   2466         eq(subpart.get_content_type(), 'text/plain')
   2467         eq(subpart.get_payload(), """\
   2468 This report relates to a message you sent with the following header fields:
   2469 
   2470   Message-id: <002001c144a6$8752e060$56104586 (at] oxy.edu>
   2471   Date: Sun, 23 Sep 2001 20:10:55 -0700
   2472   From: "Ian T. Henry" <henryi (at] oxy.edu>
   2473   To: SoCal Raves <scr (at] socal-raves.org>
   2474   Subject: [scr] yeah for Ians!!
   2475 
   2476 Your message cannot be delivered to the following recipients:
   2477 
   2478   Recipient address: jangel1 (at] cougar.noc.ucla.edu
   2479   Reason: recipient reached disk quota
   2480 
   2481 """)
   2482         # Subpart 2 contains the machine parsable DSN information.  It
   2483         # consists of two blocks of headers, represented by two nested Message
   2484         # objects.
   2485         subpart = msg.get_payload(1)
   2486         eq(subpart.get_content_type(), 'message/delivery-status')
   2487         eq(len(subpart.get_payload()), 2)
   2488         # message/delivery-status should treat each block as a bunch of
   2489         # headers, i.e. a bunch of Message objects.
   2490         dsn1 = subpart.get_payload(0)
   2491         self.assertIsInstance(dsn1, Message)
   2492         eq(dsn1['original-envelope-id'], '0GK500B4HD0888 (at] cougar.noc.ucla.edu')
   2493         eq(dsn1.get_param('dns', header='reporting-mta'), '')
   2494         # Try a missing one <wink>
   2495         eq(dsn1.get_param('nsd', header='reporting-mta'), None)
   2496         dsn2 = subpart.get_payload(1)
   2497         self.assertIsInstance(dsn2, Message)
   2498         eq(dsn2['action'], 'failed')
   2499         eq(dsn2.get_params(header='original-recipient'),
   2500            [('rfc822', ''), ('jangel1 (at] cougar.noc.ucla.edu', '')])
   2501         eq(dsn2.get_param('rfc822', header='final-recipient'), '')
   2502         # Subpart 3 is the original message
   2503         subpart = msg.get_payload(2)
   2504         eq(subpart.get_content_type(), 'message/rfc822')
   2505         payload = subpart.get_payload()
   2506         self.assertIsInstance(payload, list)
   2507         eq(len(payload), 1)
   2508         subsubpart = payload[0]
   2509         self.assertIsInstance(subsubpart, Message)
   2510         eq(subsubpart.get_content_type(), 'text/plain')
   2511         eq(subsubpart['message-id'],
   2512            '<002001c144a6$8752e060$56104586 (at] oxy.edu>')
   2513 
   2514     def test_epilogue(self):
   2515         eq = self.ndiffAssertEqual
   2516         with openfile('msg_21.txt') as fp:
   2517             text = fp.read()
   2518         msg = Message()
   2519         msg['From'] = 'aperson (at] dom.ain'
   2520         msg['To'] = 'bperson (at] dom.ain'
   2521         msg['Subject'] = 'Test'
   2522         msg.preamble = 'MIME message'
   2523         msg.epilogue = 'End of MIME message\n'
   2524         msg1 = MIMEText('One')
   2525         msg2 = MIMEText('Two')
   2526         msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
   2527         msg.attach(msg1)
   2528         msg.attach(msg2)
   2529         sfp = StringIO()
   2530         g = Generator(sfp)
   2531         g.flatten(msg)
   2532         eq(sfp.getvalue(), text)
   2533 
   2534     def test_no_nl_preamble(self):
   2535         eq = self.ndiffAssertEqual
   2536         msg = Message()
   2537         msg['From'] = 'aperson (at] dom.ain'
   2538         msg['To'] = 'bperson (at] dom.ain'
   2539         msg['Subject'] = 'Test'
   2540         msg.preamble = 'MIME message'
   2541         msg.epilogue = ''
   2542         msg1 = MIMEText('One')
   2543         msg2 = MIMEText('Two')
   2544         msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
   2545         msg.attach(msg1)
   2546         msg.attach(msg2)
   2547         eq(msg.as_string(), """\
   2548 From: aperson (at] dom.ain
   2549 To: bperson (at] dom.ain
   2550 Subject: Test
   2551 Content-Type: multipart/mixed; boundary="BOUNDARY"
   2552 
   2553 MIME message
   2554 --BOUNDARY
   2555 Content-Type: text/plain; charset="us-ascii"
   2556 MIME-Version: 1.0
   2557 Content-Transfer-Encoding: 7bit
   2558 
   2559 One
   2560 --BOUNDARY
   2561 Content-Type: text/plain; charset="us-ascii"
   2562 MIME-Version: 1.0
   2563 Content-Transfer-Encoding: 7bit
   2564 
   2565 Two
   2566 --BOUNDARY--
   2567 """)
   2568 
   2569     def test_default_type(self):
   2570         eq = self.assertEqual
   2571         with openfile('msg_30.txt') as fp:
   2572             msg = email.message_from_file(fp)
   2573         container1 = msg.get_payload(0)
   2574         eq(container1.get_default_type(), 'message/rfc822')
   2575         eq(container1.get_content_type(), 'message/rfc822')
   2576         container2 = msg.get_payload(1)
   2577         eq(container2.get_default_type(), 'message/rfc822')
   2578         eq(container2.get_content_type(), 'message/rfc822')
   2579         container1a = container1.get_payload(0)
   2580         eq(container1a.get_default_type(), 'text/plain')
   2581         eq(container1a.get_content_type(), 'text/plain')
   2582         container2a = container2.get_payload(0)
   2583         eq(container2a.get_default_type(), 'text/plain')
   2584         eq(container2a.get_content_type(), 'text/plain')
   2585 
   2586     def test_default_type_with_explicit_container_type(self):
   2587         eq = self.assertEqual
   2588         with openfile('msg_28.txt') as fp:
   2589             msg = email.message_from_file(fp)
   2590         container1 = msg.get_payload(0)
   2591         eq(container1.get_default_type(), 'message/rfc822')
   2592         eq(container1.get_content_type(), 'message/rfc822')
   2593         container2 = msg.get_payload(1)
   2594         eq(container2.get_default_type(), 'message/rfc822')
   2595         eq(container2.get_content_type(), 'message/rfc822')
   2596         container1a = container1.get_payload(0)
   2597         eq(container1a.get_default_type(), 'text/plain')
   2598         eq(container1a.get_content_type(), 'text/plain')
   2599         container2a = container2.get_payload(0)
   2600         eq(container2a.get_default_type(), 'text/plain')
   2601         eq(container2a.get_content_type(), 'text/plain')
   2602 
   2603     def test_default_type_non_parsed(self):
   2604         eq = self.assertEqual
   2605         neq = self.ndiffAssertEqual
   2606         # Set up container
   2607         container = MIMEMultipart('digest', 'BOUNDARY')
   2608         container.epilogue = ''
   2609         # Set up subparts
   2610         subpart1a = MIMEText('message 1\n')
   2611         subpart2a = MIMEText('message 2\n')
   2612         subpart1 = MIMEMessage(subpart1a)
   2613         subpart2 = MIMEMessage(subpart2a)
   2614         container.attach(subpart1)
   2615         container.attach(subpart2)
   2616         eq(subpart1.get_content_type(), 'message/rfc822')
   2617         eq(subpart1.get_default_type(), 'message/rfc822')
   2618         eq(subpart2.get_content_type(), 'message/rfc822')
   2619         eq(subpart2.get_default_type(), 'message/rfc822')
   2620         neq(container.as_string(0), '''\
   2621 Content-Type: multipart/digest; boundary="BOUNDARY"
   2622 MIME-Version: 1.0
   2623 
   2624 --BOUNDARY
   2625 Content-Type: message/rfc822
   2626 MIME-Version: 1.0
   2627 
   2628 Content-Type: text/plain; charset="us-ascii"
   2629 MIME-Version: 1.0
   2630 Content-Transfer-Encoding: 7bit
   2631 
   2632 message 1
   2633 
   2634 --BOUNDARY
   2635 Content-Type: message/rfc822
   2636 MIME-Version: 1.0
   2637 
   2638 Content-Type: text/plain; charset="us-ascii"
   2639 MIME-Version: 1.0
   2640 Content-Transfer-Encoding: 7bit
   2641 
   2642 message 2
   2643 
   2644 --BOUNDARY--
   2645 ''')
   2646         del subpart1['content-type']
   2647         del subpart1['mime-version']
   2648         del subpart2['content-type']
   2649         del subpart2['mime-version']
   2650         eq(subpart1.get_content_type(), 'message/rfc822')
   2651         eq(subpart1.get_default_type(), 'message/rfc822')
   2652         eq(subpart2.get_content_type(), 'message/rfc822')
   2653         eq(subpart2.get_default_type(), 'message/rfc822')
   2654         neq(container.as_string(0), '''\
   2655 Content-Type: multipart/digest; boundary="BOUNDARY"
   2656 MIME-Version: 1.0
   2657 
   2658 --BOUNDARY
   2659 
   2660 Content-Type: text/plain; charset="us-ascii"
   2661 MIME-Version: 1.0
   2662 Content-Transfer-Encoding: 7bit
   2663 
   2664 message 1
   2665 
   2666 --BOUNDARY
   2667 
   2668 Content-Type: text/plain; charset="us-ascii"
   2669 MIME-Version: 1.0
   2670 Content-Transfer-Encoding: 7bit
   2671 
   2672 message 2
   2673 
   2674 --BOUNDARY--
   2675 ''')
   2676 
   2677     def test_mime_attachments_in_constructor(self):
   2678         eq = self.assertEqual
   2679         text1 = MIMEText('')
   2680         text2 = MIMEText('')
   2681         msg = MIMEMultipart(_subparts=(text1, text2))
   2682         eq(len(msg.get_payload()), 2)
   2683         eq(msg.get_payload(0), text1)
   2684         eq(msg.get_payload(1), text2)
   2685 
   2686     def test_default_multipart_constructor(self):
   2687         msg = MIMEMultipart()
   2688         self.assertTrue(msg.is_multipart())
   2689 
   2690     def test_multipart_default_policy(self):
   2691         msg = MIMEMultipart()
   2692         msg['To'] = 'a (at] b.com'
   2693         msg['To'] = 'c (at] d.com'
   2694         self.assertEqual(msg.get_all('to'), ['a (at] b.com', 'c (at] d.com'])
   2695 
   2696     def test_multipart_custom_policy(self):
   2697         msg = MIMEMultipart(policy=email.policy.default)
   2698         msg['To'] = 'a (at] b.com'
   2699         with self.assertRaises(ValueError) as cm:
   2700             msg['To'] = 'c (at] d.com'
   2701         self.assertEqual(str(cm.exception),
   2702                          'There may be at most 1 To headers in a message')
   2703 
   2704 # A general test of parser->model->generator idempotency.  IOW, read a message
   2705 # in, parse it into a message object tree, then without touching the tree,
   2706 # regenerate the plain text.  The original text and the transformed text
   2707 # should be identical.  Note: that we ignore the Unix-From since that may
   2708 # contain a changed date.
   2709 class TestIdempotent(TestEmailBase):
   2710 
   2711     linesep = '\n'
   2712 
   2713     def _msgobj(self, filename):
   2714         with openfile(filename) as fp:
   2715             data = fp.read()
   2716         msg = email.message_from_string(data)
   2717         return msg, data
   2718 
   2719     def _idempotent(self, msg, text, unixfrom=False):
   2720         eq = self.ndiffAssertEqual
   2721         s = StringIO()
   2722         g = Generator(s, maxheaderlen=0)
   2723         g.flatten(msg, unixfrom=unixfrom)
   2724         eq(text, s.getvalue())
   2725 
   2726     def test_parse_text_message(self):
   2727         eq = self.assertEqual
   2728         msg, text = self._msgobj('msg_01.txt')
   2729         eq(msg.get_content_type(), 'text/plain')
   2730         eq(msg.get_content_maintype(), 'text')
   2731         eq(msg.get_content_subtype(), 'plain')
   2732         eq(msg.get_params()[1], ('charset', 'us-ascii'))
   2733         eq(msg.get_param('charset'), 'us-ascii')
   2734         eq(msg.preamble, None)
   2735         eq(msg.epilogue, None)
   2736         self._idempotent(msg, text)
   2737 
   2738     def test_parse_untyped_message(self):
   2739         eq = self.assertEqual
   2740         msg, text = self._msgobj('msg_03.txt')
   2741         eq(msg.get_content_type(), 'text/plain')
   2742         eq(msg.get_params(), None)
   2743         eq(msg.get_param('charset'), None)
   2744         self._idempotent(msg, text)
   2745 
   2746     def test_simple_multipart(self):
   2747         msg, text = self._msgobj('msg_04.txt')
   2748         self._idempotent(msg, text)
   2749 
   2750     def test_MIME_digest(self):
   2751         msg, text = self._msgobj('msg_02.txt')
   2752         self._idempotent(msg, text)
   2753 
   2754     def test_long_header(self):
   2755         msg, text = self._msgobj('msg_27.txt')
   2756         self._idempotent(msg, text)
   2757 
   2758     def test_MIME_digest_with_part_headers(self):
   2759         msg, text = self._msgobj('msg_28.txt')
   2760         self._idempotent(msg, text)
   2761 
   2762     def test_mixed_with_image(self):
   2763         msg, text = self._msgobj('msg_06.txt')
   2764         self._idempotent(msg, text)
   2765 
   2766     def test_multipart_report(self):
   2767         msg, text = self._msgobj('msg_05.txt')
   2768         self._idempotent(msg, text)
   2769 
   2770     def test_dsn(self):
   2771         msg, text = self._msgobj('msg_16.txt')
   2772         self._idempotent(msg, text)
   2773 
   2774     def test_preamble_epilogue(self):
   2775         msg, text = self._msgobj('msg_21.txt')
   2776         self._idempotent(msg, text)
   2777 
   2778     def test_multipart_one_part(self):
   2779         msg, text = self._msgobj('msg_23.txt')
   2780         self._idempotent(msg, text)
   2781 
   2782     def test_multipart_no_parts(self):
   2783         msg, text = self._msgobj('msg_24.txt')
   2784         self._idempotent(msg, text)
   2785 
   2786     def test_no_start_boundary(self):
   2787         msg, text = self._msgobj('msg_31.txt')
   2788         self._idempotent(msg, text)
   2789 
   2790     def test_rfc2231_charset(self):
   2791         msg, text = self._msgobj('msg_32.txt')
   2792         self._idempotent(msg, text)
   2793 
   2794     def test_more_rfc2231_parameters(self):
   2795         msg, text = self._msgobj('msg_33.txt')
   2796         self._idempotent(msg, text)
   2797 
   2798     def test_text_plain_in_a_multipart_digest(self):
   2799         msg, text = self._msgobj('msg_34.txt')
   2800         self._idempotent(msg, text)
   2801 
   2802     def test_nested_multipart_mixeds(self):
   2803         msg, text = self._msgobj('msg_12a.txt')
   2804         self._idempotent(msg, text)
   2805 
   2806     def test_message_external_body_idempotent(self):
   2807         msg, text = self._msgobj('msg_36.txt')
   2808         self._idempotent(msg, text)
   2809 
   2810     def test_message_delivery_status(self):
   2811         msg, text = self._msgobj('msg_43.txt')
   2812         self._idempotent(msg, text, unixfrom=True)
   2813 
   2814     def test_message_signed_idempotent(self):
   2815         msg, text = self._msgobj('msg_45.txt')
   2816         self._idempotent(msg, text)
   2817 
   2818     def test_content_type(self):
   2819         eq = self.assertEqual
   2820         # Get a message object and reset the seek pointer for other tests
   2821         msg, text = self._msgobj('msg_05.txt')
   2822         eq(msg.get_content_type(), 'multipart/report')
   2823         # Test the Content-Type: parameters
   2824         params = {}
   2825         for pk, pv in msg.get_params():
   2826             params[pk] = pv
   2827         eq(params['report-type'], 'delivery-status')
   2828         eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
   2829         eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
   2830         eq(msg.epilogue, self.linesep)
   2831         eq(len(msg.get_payload()), 3)
   2832         # Make sure the subparts are what we expect
   2833         msg1 = msg.get_payload(0)
   2834         eq(msg1.get_content_type(), 'text/plain')
   2835         eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
   2836         msg2 = msg.get_payload(1)
   2837         eq(msg2.get_content_type(), 'text/plain')
   2838         eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
   2839         msg3 = msg.get_payload(2)
   2840         eq(msg3.get_content_type(), 'message/rfc822')
   2841         self.assertIsInstance(msg3, Message)
   2842         payload = msg3.get_payload()
   2843         self.assertIsInstance(payload, list)
   2844         eq(len(payload), 1)
   2845         msg4 = payload[0]
   2846         self.assertIsInstance(msg4, Message)
   2847         eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
   2848 
   2849     def test_parser(self):
   2850         eq = self.assertEqual
   2851         msg, text = self._msgobj('msg_06.txt')
   2852         # Check some of the outer headers
   2853         eq(msg.get_content_type(), 'message/rfc822')
   2854         # Make sure the payload is a list of exactly one sub-Message, and that
   2855         # that submessage has a type of text/plain
   2856         payload = msg.get_payload()
   2857         self.assertIsInstance(payload, list)
   2858         eq(len(payload), 1)
   2859         msg1 = payload[0]
   2860         self.assertIsInstance(msg1, Message)
   2861         eq(msg1.get_content_type(), 'text/plain')
   2862         self.assertIsInstance(msg1.get_payload(), str)
   2863         eq(msg1.get_payload(), self.linesep)
   2864 
   2865 
   2866 
   2867 # Test various other bits of the package's functionality
   2868 class TestMiscellaneous(TestEmailBase):
   2869     def test_message_from_string(self):
   2870         with openfile('msg_01.txt') as fp:
   2871             text = fp.read()
   2872         msg = email.message_from_string(text)
   2873         s = StringIO()
   2874         # Don't wrap/continue long headers since we're trying to test
   2875         # idempotency.
   2876         g = Generator(s, maxheaderlen=0)
   2877         g.flatten(msg)
   2878         self.assertEqual(text, s.getvalue())
   2879 
   2880     def test_message_from_file(self):
   2881         with openfile('msg_01.txt') as fp:
   2882             text = fp.read()
   2883             fp.seek(0)
   2884             msg = email.message_from_file(fp)
   2885             s = StringIO()
   2886             # Don't wrap/continue long headers since we're trying to test
   2887             # idempotency.
   2888             g = Generator(s, maxheaderlen=0)
   2889             g.flatten(msg)
   2890             self.assertEqual(text, s.getvalue())
   2891 
   2892     def test_message_from_string_with_class(self):
   2893         with openfile('msg_01.txt') as fp:
   2894             text = fp.read()
   2895 
   2896         # Create a subclass
   2897         class MyMessage(Message):
   2898             pass
   2899 
   2900         msg = email.message_from_string(text, MyMessage)
   2901         self.assertIsInstance(msg, MyMessage)
   2902         # Try something more complicated
   2903         with openfile('msg_02.txt') as fp:
   2904             text = fp.read()
   2905         msg = email.message_from_string(text, MyMessage)
   2906         for subpart in msg.walk():
   2907             self.assertIsInstance(subpart, MyMessage)
   2908 
   2909     def test_message_from_file_with_class(self):
   2910         # Create a subclass
   2911         class MyMessage(Message):
   2912             pass
   2913 
   2914         with openfile('msg_01.txt') as fp:
   2915             msg = email.message_from_file(fp, MyMessage)
   2916         self.assertIsInstance(msg, MyMessage)
   2917         # Try something more complicated
   2918         with openfile('msg_02.txt') as fp:
   2919             msg = email.message_from_file(fp, MyMessage)
   2920         for subpart in msg.walk():
   2921             self.assertIsInstance(subpart, MyMessage)
   2922 
   2923     def test_custom_message_does_not_require_arguments(self):
   2924         class MyMessage(Message):
   2925             def __init__(self):
   2926                 super().__init__()
   2927         msg = self._str_msg("Subject: test\n\ntest", MyMessage)
   2928         self.assertIsInstance(msg, MyMessage)
   2929 
   2930     def test__all__(self):
   2931         module = __import__('email')
   2932         self.assertEqual(sorted(module.__all__), [
   2933             'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
   2934             'generator', 'header', 'iterators', 'message',
   2935             'message_from_binary_file', 'message_from_bytes',
   2936             'message_from_file', 'message_from_string', 'mime', 'parser',
   2937             'quoprimime', 'utils',
   2938             ])
   2939 
   2940     def test_formatdate(self):
   2941         now = time.time()
   2942         self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
   2943                          time.gmtime(now)[:6])
   2944 
   2945     def test_formatdate_localtime(self):
   2946         now = time.time()
   2947         self.assertEqual(
   2948             utils.parsedate(utils.formatdate(now, localtime=True))[:6],
   2949             time.localtime(now)[:6])
   2950 
   2951     def test_formatdate_usegmt(self):
   2952         now = time.time()
   2953         self.assertEqual(
   2954             utils.formatdate(now, localtime=False),
   2955             time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
   2956         self.assertEqual(
   2957             utils.formatdate(now, localtime=False, usegmt=True),
   2958             time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
   2959 
   2960     # parsedate and parsedate_tz will become deprecated interfaces someday
   2961     def test_parsedate_returns_None_for_invalid_strings(self):
   2962         self.assertIsNone(utils.parsedate(''))
   2963         self.assertIsNone(utils.parsedate_tz(''))
   2964         self.assertIsNone(utils.parsedate('0'))
   2965         self.assertIsNone(utils.parsedate_tz('0'))
   2966         self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
   2967         self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
   2968         # Not a part of the spec but, but this has historically worked:
   2969         self.assertIsNone(utils.parsedate(None))
   2970         self.assertIsNone(utils.parsedate_tz(None))
   2971 
   2972     def test_parsedate_compact(self):
   2973         # The FWS after the comma is optional
   2974         self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
   2975                          utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
   2976 
   2977     def test_parsedate_no_dayofweek(self):
   2978         eq = self.assertEqual
   2979         eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
   2980            (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
   2981 
   2982     def test_parsedate_compact_no_dayofweek(self):
   2983         eq = self.assertEqual
   2984         eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
   2985            (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
   2986 
   2987     def test_parsedate_no_space_before_positive_offset(self):
   2988         self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
   2989            (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
   2990 
   2991     def test_parsedate_no_space_before_negative_offset(self):
   2992         # Issue 1155362: we already handled '+' for this case.
   2993         self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
   2994            (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
   2995 
   2996 
   2997     def test_parsedate_accepts_time_with_dots(self):
   2998         eq = self.assertEqual
   2999         eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
   3000            (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
   3001         eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
   3002            (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
   3003 
   3004     def test_parsedate_acceptable_to_time_functions(self):
   3005         eq = self.assertEqual
   3006         timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
   3007         t = int(time.mktime(timetup))
   3008         eq(time.localtime(t)[:6], timetup[:6])
   3009         eq(int(time.strftime('%Y', timetup)), 2003)
   3010         timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
   3011         t = int(time.mktime(timetup[:9]))
   3012         eq(time.localtime(t)[:6], timetup[:6])
   3013         eq(int(time.strftime('%Y', timetup[:9])), 2003)
   3014 
   3015     def test_mktime_tz(self):
   3016         self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
   3017                                           -1, -1, -1, 0)), 0)
   3018         self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
   3019                                           -1, -1, -1, 1234)), -1234)
   3020 
   3021     def test_parsedate_y2k(self):
   3022         """Test for parsing a date with a two-digit year.
   3023 
   3024         Parsing a date with a two-digit year should return the correct
   3025         four-digit year. RFC822 allows two-digit years, but RFC2822 (which
   3026         obsoletes RFC822) requires four-digit years.
   3027 
   3028         """
   3029         self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
   3030                          utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
   3031         self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
   3032                          utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
   3033 
   3034     def test_parseaddr_empty(self):
   3035         self.assertEqual(utils.parseaddr('<>'), ('', ''))
   3036         self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
   3037 
   3038     def test_noquote_dump(self):
   3039         self.assertEqual(
   3040             utils.formataddr(('A Silly Person', 'person (at] dom.ain')),
   3041             'A Silly Person <person (at] dom.ain>')
   3042 
   3043     def test_escape_dump(self):
   3044         self.assertEqual(
   3045             utils.formataddr(('A (Very) Silly Person', 'person (at] dom.ain')),
   3046             r'"A (Very) Silly Person" <person (at] dom.ain>')
   3047         self.assertEqual(
   3048             utils.parseaddr(r'"A \(Very\) Silly Person" <person (at] dom.ain>'),
   3049             ('A (Very) Silly Person', 'person (at] dom.ain'))
   3050         a = r'A \(Special\) Person'
   3051         b = 'person (at] dom.ain'
   3052         self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
   3053 
   3054     def test_escape_backslashes(self):
   3055         self.assertEqual(
   3056             utils.formataddr((r'Arthur \Backslash\ Foobar', 'person (at] dom.ain')),
   3057             r'"Arthur \\Backslash\\ Foobar" <person (at] dom.ain>')
   3058         a = r'Arthur \Backslash\ Foobar'
   3059         b = 'person (at] dom.ain'
   3060         self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
   3061 
   3062     def test_quotes_unicode_names(self):
   3063         # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
   3064         name = "H\u00e4ns W\u00fcrst"
   3065         addr = 'person (at] dom.ain'
   3066         utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person (at] dom.ain>"
   3067         latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person (at] dom.ain>"
   3068         self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
   3069         self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
   3070             latin1_quopri)
   3071 
   3072     def test_accepts_any_charset_like_object(self):
   3073         # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
   3074         name = "H\u00e4ns W\u00fcrst"
   3075         addr = 'person (at] dom.ain'
   3076         utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person (at] dom.ain>"
   3077         foobar = "FOOBAR"
   3078         class CharsetMock:
   3079             def header_encode(self, string):
   3080                 return foobar
   3081         mock = CharsetMock()
   3082         mock_expected = "%s <%s>" % (foobar, addr)
   3083         self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
   3084         self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
   3085             utf8_base64)
   3086 
   3087     def test_invalid_charset_like_object_raises_error(self):
   3088         # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
   3089         name = "H\u00e4ns W\u00fcrst"
   3090         addr = 'person (at] dom.ain'
   3091         # An object without a header_encode method:
   3092         bad_charset = object()
   3093         self.assertRaises(AttributeError, utils.formataddr, (name, addr),
   3094             bad_charset)
   3095 
   3096     def test_unicode_address_raises_error(self):
   3097         # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
   3098         addr = 'pers\u00f6n (at] dom.in'
   3099         self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
   3100         self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
   3101 
   3102     def test_name_with_dot(self):
   3103         x = 'John X. Doe <jxd (at] example.com>'
   3104         y = '"John X. Doe" <jxd (at] example.com>'
   3105         a, b = ('John X. Doe', 'jxd (at] example.com')
   3106         self.assertEqual(utils.parseaddr(x), (a, b))
   3107         self.assertEqual(utils.parseaddr(y), (a, b))
   3108         # formataddr() quotes the name if there's a dot in it
   3109         self.assertEqual(utils.formataddr((a, b)), y)
   3110 
   3111     def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
   3112         # issue 10005.  Note that in the third test the second pair of
   3113         # backslashes is not actually a quoted pair because it is not inside a
   3114         # comment or quoted string: the address being parsed has a quoted
   3115         # string containing a quoted backslash, followed by 'example' and two
   3116         # backslashes, followed by another quoted string containing a space and
   3117         # the word 'example'.  parseaddr copies those two backslashes
   3118         # literally.  Per rfc5322 this is not technically correct since a \ may
   3119         # not appear in an address outside of a quoted string.  It is probably
   3120         # a sensible Postel interpretation, though.
   3121         eq = self.assertEqual
   3122         eq(utils.parseaddr('""example" example"@example.com'),
   3123           ('', '""example" example"@example.com'))
   3124         eq(utils.parseaddr('"\\"example\\" example"@example.com'),
   3125           ('', '"\\"example\\" example"@example.com'))
   3126         eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
   3127           ('', '"\\\\"example\\\\" example"@example.com'))
   3128 
   3129     def test_parseaddr_preserves_spaces_in_local_part(self):
   3130         # issue 9286.  A normal RFC5322 local part should not contain any
   3131         # folding white space, but legacy local parts can (they are a sequence
   3132         # of atoms, not dotatoms).  On the other hand we strip whitespace from
   3133         # before the @ and around dots, on the assumption that the whitespace
   3134         # around the punctuation is a mistake in what would otherwise be
   3135         # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
   3136         self.assertEqual(('', "merwok wok (at] xample.com"),
   3137             utils.parseaddr("merwok wok (at] xample.com"))
   3138         self.assertEqual(('', "merwok  wok (at] xample.com"),
   3139             utils.parseaddr("merwok  wok (at] xample.com"))
   3140         self.assertEqual(('', "merwok  wok (at] xample.com"),
   3141             utils.parseaddr(" merwok  wok  @xample.com"))
   3142         self.assertEqual(('', 'merwok"wok"  wok (at] xample.com'),
   3143             utils.parseaddr('merwok"wok"  wok (at] xample.com'))
   3144         self.assertEqual(('', 'merwok.wok.wok (at] xample.com'),
   3145             utils.parseaddr('merwok. wok .  wok (at] xample.com'))
   3146 
   3147     def test_formataddr_does_not_quote_parens_in_quoted_string(self):
   3148         addr = ("'foo (at] example.com' (foo (at] example.com)",
   3149                 'foo (at] example.com')
   3150         addrstr = ('"\'foo (at] example.com\' '
   3151                             '(foo (at] example.com)" <foo (at] example.com>')
   3152         self.assertEqual(utils.parseaddr(addrstr), addr)
   3153         self.assertEqual(utils.formataddr(addr), addrstr)
   3154 
   3155 
   3156     def test_multiline_from_comment(self):
   3157         x = """\
   3158 Foo
   3159 \tBar <foo (at] example.com>"""
   3160         self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo (at] example.com'))
   3161 
   3162     def test_quote_dump(self):
   3163         self.assertEqual(
   3164             utils.formataddr(('A Silly; Person', 'person (at] dom.ain')),
   3165             r'"A Silly; Person" <person (at] dom.ain>')
   3166 
   3167     def test_charset_richcomparisons(self):
   3168         eq = self.assertEqual
   3169         ne = self.assertNotEqual
   3170         cset1 = Charset()
   3171         cset2 = Charset()
   3172         eq(cset1, 'us-ascii')
   3173         eq(cset1, 'US-ASCII')
   3174         eq(cset1, 'Us-AsCiI')
   3175         eq('us-ascii', cset1)
   3176         eq('US-ASCII', cset1)
   3177         eq('Us-AsCiI', cset1)
   3178         ne(cset1, 'usascii')
   3179         ne(cset1, 'USASCII')
   3180         ne(cset1, 'UsAsCiI')
   3181         ne('usascii', cset1)
   3182         ne('USASCII', cset1)
   3183         ne('UsAsCiI', cset1)
   3184         eq(cset1, cset2)
   3185         eq(cset2, cset1)
   3186 
   3187     def test_getaddresses(self):
   3188         eq = self.assertEqual
   3189         eq(utils.getaddresses(['aperson (at] dom.ain (Al Person)',
   3190                                'Bud Person <bperson (at] dom.ain>']),
   3191            [('Al Person', 'aperson (at] dom.ain'),
   3192             ('Bud Person', 'bperson (at] dom.ain')])
   3193 
   3194     def test_getaddresses_nasty(self):
   3195         eq = self.assertEqual
   3196         eq(utils.getaddresses(['foo: ;']), [('', '')])
   3197         eq(utils.getaddresses(
   3198            ['[]*-- =~$']),
   3199            [('', ''), ('', ''), ('', '*--')])
   3200         eq(utils.getaddresses(
   3201            ['foo: ;', '"Jason R. Mastaler" <jason (at] dom.ain>']),
   3202            [('', ''), ('Jason R. Mastaler', 'jason (at] dom.ain')])
   3203 
   3204     def test_getaddresses_embedded_comment(self):
   3205         """Test proper handling of a nested comment"""
   3206         eq = self.assertEqual
   3207         addrs = utils.getaddresses(['User ((nested comment)) <foo (at] bar.com>'])
   3208         eq(addrs[0][1], 'foo (at] bar.com')
   3209 
   3210     def test_make_msgid_collisions(self):
   3211         # Test make_msgid uniqueness, even with multiple threads
   3212         class MsgidsThread(Thread):
   3213             def run(self):
   3214                 # generate msgids for 3 seconds
   3215                 self.msgids = []
   3216                 append = self.msgids.append
   3217                 make_msgid = utils.make_msgid
   3218                 clock = time.monotonic
   3219                 tfin = clock() + 3.0
   3220                 while clock() < tfin:
   3221                     append(make_msgid(domain='testdomain-string'))
   3222 
   3223         threads = [MsgidsThread() for i in range(5)]
   3224         with start_threads(threads):
   3225             pass
   3226         all_ids = sum([t.msgids for t in threads], [])
   3227         self.assertEqual(len(set(all_ids)), len(all_ids))
   3228 
   3229     def test_utils_quote_unquote(self):
   3230         eq = self.assertEqual
   3231         msg = Message()
   3232         msg.add_header('content-disposition', 'attachment',
   3233                        filename='foo\\wacky"name')
   3234         eq(msg.get_filename(), 'foo\\wacky"name')
   3235 
   3236     def test_get_body_encoding_with_bogus_charset(self):
   3237         charset = Charset('not a charset')
   3238         self.assertEqual(charset.get_body_encoding(), 'base64')
   3239 
   3240     def test_get_body_encoding_with_uppercase_charset(self):
   3241         eq = self.assertEqual
   3242         msg = Message()
   3243         msg['Content-Type'] = 'text/plain; charset=UTF-8'
   3244         eq(msg['content-type'], 'text/plain; charset=UTF-8')
   3245         charsets = msg.get_charsets()
   3246         eq(len(charsets), 1)
   3247         eq(charsets[0], 'utf-8')
   3248         charset = Charset(charsets[0])
   3249         eq(charset.get_body_encoding(), 'base64')
   3250         msg.set_payload(b'hello world', charset=charset)
   3251         eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
   3252         eq(msg.get_payload(decode=True), b'hello world')
   3253         eq(msg['content-transfer-encoding'], 'base64')
   3254         # Try another one
   3255         msg = Message()
   3256         msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
   3257         charsets = msg.get_charsets()
   3258         eq(len(charsets), 1)
   3259         eq(charsets[0], 'us-ascii')
   3260         charset = Charset(charsets[0])
   3261         eq(charset.get_body_encoding(), encoders.encode_7or8bit)
   3262         msg.set_payload('hello world', charset=charset)
   3263         eq(msg.get_payload(), 'hello world')
   3264         eq(msg['content-transfer-encoding'], '7bit')
   3265 
   3266     def test_charsets_case_insensitive(self):
   3267         lc = Charset('us-ascii')
   3268         uc = Charset('US-ASCII')
   3269         self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
   3270 
   3271     def test_partial_falls_inside_message_delivery_status(self):
   3272         eq = self.ndiffAssertEqual
   3273         # The Parser interface provides chunks of data to FeedParser in 8192
   3274         # byte gulps.  SF bug #1076485 found one of those chunks inside
   3275         # message/delivery-status header block, which triggered an
   3276         # unreadline() of NeedMoreData.
   3277         msg = self._msgobj('msg_43.txt')
   3278         sfp = StringIO()
   3279         iterators._structure(msg, sfp)
   3280         eq(sfp.getvalue(), """\
   3281 multipart/report
   3282     text/plain
   3283     message/delivery-status
   3284         text/plain
   3285         text/plain
   3286         text/plain
   3287         text/plain
   3288         text/plain
   3289         text/plain
   3290         text/plain
   3291         text/plain
   3292         text/plain
   3293         text/plain
   3294         text/plain
   3295         text/plain
   3296         text/plain
   3297         text/plain
   3298         text/plain
   3299         text/plain
   3300         text/plain
   3301         text/plain
   3302         text/plain
   3303         text/plain
   3304         text/plain
   3305         text/plain
   3306         text/plain
   3307         text/plain
   3308         text/plain
   3309         text/plain
   3310     text/rfc822-headers
   3311 """)
   3312 
   3313     def test_make_msgid_domain(self):
   3314         self.assertEqual(
   3315             email.utils.make_msgid(domain='testdomain-string')[-19:],
   3316             '@testdomain-string>')
   3317 
   3318     def test_make_msgid_idstring(self):
   3319         self.assertEqual(
   3320             email.utils.make_msgid(idstring='test-idstring',
   3321                 domain='testdomain-string')[-33:],
   3322             '.test-idstring@testdomain-string>')
   3323 
   3324     def test_make_msgid_default_domain(self):
   3325         self.assertTrue(
   3326             email.utils.make_msgid().endswith(
   3327                 '@' + getfqdn() + '>'))
   3328 
   3329     def test_Generator_linend(self):
   3330         # Issue 14645.
   3331         with openfile('msg_26.txt', newline='\n') as f:
   3332             msgtxt = f.read()
   3333         msgtxt_nl = msgtxt.replace('\r\n', '\n')
   3334         msg = email.message_from_string(msgtxt)
   3335         s = StringIO()
   3336         g = email.generator.Generator(s)
   3337         g.flatten(msg)
   3338         self.assertEqual(s.getvalue(), msgtxt_nl)
   3339 
   3340     def test_BytesGenerator_linend(self):
   3341         # Issue 14645.
   3342         with openfile('msg_26.txt', newline='\n') as f:
   3343             msgtxt = f.read()
   3344         msgtxt_nl = msgtxt.replace('\r\n', '\n')
   3345         msg = email.message_from_string(msgtxt_nl)
   3346         s = BytesIO()
   3347         g = email.generator.BytesGenerator(s)
   3348         g.flatten(msg, linesep='\r\n')
   3349         self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
   3350 
   3351     def test_BytesGenerator_linend_with_non_ascii(self):
   3352         # Issue 14645.
   3353         with openfile('msg_26.txt', 'rb') as f:
   3354             msgtxt = f.read()
   3355         msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
   3356         msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
   3357         msg = email.message_from_bytes(msgtxt_nl)
   3358         s = BytesIO()
   3359         g = email.generator.BytesGenerator(s)
   3360         g.flatten(msg, linesep='\r\n')
   3361         self.assertEqual(s.getvalue(), msgtxt)
   3362 
   3363     def test_mime_classes_policy_argument(self):
   3364         with openfile('audiotest.au', 'rb') as fp:
   3365             audiodata = fp.read()
   3366         with openfile('PyBanner048.gif', 'rb') as fp:
   3367             bindata = fp.read()
   3368         classes = [
   3369             (MIMEApplication, ('',)),
   3370             (MIMEAudio, (audiodata,)),
   3371             (MIMEImage, (bindata,)),
   3372             (MIMEMessage, (Message(),)),
   3373             (MIMENonMultipart, ('multipart', 'mixed')),
   3374             (MIMEText, ('',)),
   3375         ]
   3376         for cls, constructor in classes:
   3377             with self.subTest(cls=cls.__name__, policy='compat32'):
   3378                 m = cls(*constructor)
   3379                 self.assertIs(m.policy, email.policy.compat32)
   3380             with self.subTest(cls=cls.__name__, policy='default'):
   3381                 m = cls(*constructor, policy=email.policy.default)
   3382                 self.assertIs(m.policy, email.policy.default)
   3383 
   3384 
   3385 # Test the iterator/generators
   3386 class TestIterators(TestEmailBase):
   3387     def test_body_line_iterator(self):
   3388         eq = self.assertEqual
   3389         neq = self.ndiffAssertEqual
   3390         # First a simple non-multipart message
   3391         msg = self._msgobj('msg_01.txt')
   3392         it = iterators.body_line_iterator(msg)
   3393         lines = list(it)
   3394         eq(len(lines), 6)
   3395         neq(EMPTYSTRING.join(lines), msg.get_payload())
   3396         # Now a more complicated multipart
   3397         msg = self._msgobj('msg_02.txt')
   3398         it = iterators.body_line_iterator(msg)
   3399         lines = list(it)
   3400         eq(len(lines), 43)
   3401         with openfile('msg_19.txt') as fp:
   3402             neq(EMPTYSTRING.join(lines), fp.read())
   3403 
   3404     def test_typed_subpart_iterator(self):
   3405         eq = self.assertEqual
   3406         msg = self._msgobj('msg_04.txt')
   3407         it = iterators.typed_subpart_iterator(msg, 'text')
   3408         lines = []
   3409         subparts = 0
   3410         for subpart in it:
   3411             subparts += 1
   3412             lines.append(subpart.get_payload())
   3413         eq(subparts, 2)
   3414         eq(EMPTYSTRING.join(lines), """\
   3415 a simple kind of mirror
   3416 to reflect upon our own
   3417 a simple kind of mirror
   3418 to reflect upon our own
   3419 """)
   3420 
   3421     def test_typed_subpart_iterator_default_type(self):
   3422         eq = self.assertEqual
   3423         msg = self._msgobj('msg_03.txt')
   3424         it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
   3425         lines = []
   3426         subparts = 0
   3427         for subpart in it:
   3428             subparts += 1
   3429             lines.append(subpart.get_payload())
   3430         eq(subparts, 1)
   3431         eq(EMPTYSTRING.join(lines), """\
   3432 
   3433 Hi,
   3434 
   3435 Do you like this message?
   3436 
   3437 -Me
   3438 """)
   3439 
   3440     def test_pushCR_LF(self):
   3441         '''FeedParser BufferedSubFile.push() assumed it received complete
   3442            line endings.  A CR ending one push() followed by a LF starting
   3443            the next push() added an empty line.
   3444         '''
   3445         imt = [
   3446             ("a\r \n",  2),
   3447             ("b",       0),
   3448             ("c\n",     1),
   3449             ("",        0),
   3450             ("d\r\n",   1),
   3451             ("e\r",     0),
   3452             ("\nf",     1),
   3453             ("\r\n",    1),
   3454           ]
   3455         from email.feedparser import BufferedSubFile, NeedMoreData
   3456         bsf = BufferedSubFile()
   3457         om = []
   3458         nt = 0
   3459         for il, n in imt:
   3460             bsf.push(il)
   3461             nt += n
   3462             n1 = 0
   3463             for ol in iter(bsf.readline, NeedMoreData):
   3464                 om.append(ol)
   3465                 n1 += 1
   3466             self.assertEqual(n, n1)
   3467         self.assertEqual(len(om), nt)
   3468         self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
   3469 
   3470     def test_push_random(self):
   3471         from email.feedparser import BufferedSubFile, NeedMoreData
   3472 
   3473         n = 10000
   3474         chunksize = 5
   3475         chars = 'abcd \t\r\n'
   3476 
   3477         s = ''.join(choice(chars) for i in range(n)) + '\n'
   3478         target = s.splitlines(True)
   3479 
   3480         bsf = BufferedSubFile()
   3481         lines = []
   3482         for i in range(0, len(s), chunksize):
   3483             chunk = s[i:i+chunksize]
   3484             bsf.push(chunk)
   3485             lines.extend(iter(bsf.readline, NeedMoreData))
   3486         self.assertEqual(lines, target)
   3487 
   3488 
   3489 class TestFeedParsers(TestEmailBase):
   3490 
   3491     def parse(self, chunks):
   3492         feedparser = FeedParser()
   3493         for chunk in chunks:
   3494             feedparser.feed(chunk)
   3495         return feedparser.close()
   3496 
   3497     def test_empty_header_name_handled(self):
   3498         # Issue 19996
   3499         msg = self.parse("First: val\n: bad\nSecond: val")
   3500         self.assertEqual(msg['First'], 'val')
   3501         self.assertEqual(msg['Second'], 'val')
   3502 
   3503     def test_newlines(self):
   3504         m = self.parse(['a:\nb:\rc:\r\nd:\n'])
   3505         self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
   3506         m = self.parse(['a:\nb:\rc:\r\nd:'])
   3507         self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
   3508         m = self.parse(['a:\rb', 'c:\n'])
   3509         self.assertEqual(m.keys(), ['a', 'bc'])
   3510         m = self.parse(['a:\r', 'b:\n'])
   3511         self.assertEqual(m.keys(), ['a', 'b'])
   3512         m = self.parse(['a:\r', '\nb:\n'])
   3513         self.assertEqual(m.keys(), ['a', 'b'])
   3514 
   3515         # Only CR and LF should break header fields
   3516         m = self.parse(['a:\x85b:\u2028c:\n'])
   3517         self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
   3518         m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
   3519         self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
   3520 
   3521     def test_long_lines(self):
   3522         # Expected peak memory use on 32-bit platform: 6*N*M bytes.
   3523         M, N = 1000, 20000
   3524         m = self.parse(['a:b\n\n'] + ['x'*M] * N)
   3525         self.assertEqual(m.items(), [('a', 'b')])
   3526         self.assertEqual(m.get_payload(), 'x'*M*N)
   3527         m = self.parse(['a:b\r\r'] + ['x'*M] * N)
   3528         self.assertEqual(m.items(), [('a', 'b')])
   3529         self.assertEqual(m.get_payload(), 'x'*M*N)
   3530         m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
   3531         self.assertEqual(m.items(), [('a', 'b')])
   3532         self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
   3533         m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
   3534         self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
   3535 
   3536 
   3537 class TestParsers(TestEmailBase):
   3538 
   3539     def test_header_parser(self):
   3540         eq = self.assertEqual
   3541         # Parse only the headers of a complex multipart MIME document
   3542         with openfile('msg_02.txt') as fp:
   3543             msg = HeaderParser().parse(fp)
   3544         eq(msg['from'], 'ppp-request (at] zzz.org')
   3545         eq(msg['to'], 'ppp (at] zzz.org')
   3546         eq(msg.get_content_type(), 'multipart/mixed')
   3547         self.assertFalse(msg.is_multipart())
   3548         self.assertIsInstance(msg.get_payload(), str)
   3549 
   3550     def test_bytes_header_parser(self):
   3551         eq = self.assertEqual
   3552         # Parse only the headers of a complex multipart MIME document
   3553         with openfile('msg_02.txt', 'rb') as fp:
   3554             msg = email.parser.BytesHeaderParser().parse(fp)
   3555         eq(msg['from'], 'ppp-request (at] zzz.org')
   3556         eq(msg['to'], 'ppp (at] zzz.org')
   3557         eq(msg.get_content_type(), 'multipart/mixed')
   3558         self.assertFalse(msg.is_multipart())
   3559         self.assertIsInstance(msg.get_payload(), str)
   3560         self.assertIsInstance(msg.get_payload(decode=True), bytes)
   3561 
   3562     def test_bytes_parser_does_not_close_file(self):
   3563         with openfile('msg_02.txt', 'rb') as fp:
   3564             email.parser.BytesParser().parse(fp)
   3565             self.assertFalse(fp.closed)
   3566 
   3567     def test_bytes_parser_on_exception_does_not_close_file(self):
   3568         with openfile('msg_15.txt', 'rb') as fp:
   3569             bytesParser = email.parser.BytesParser
   3570             self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
   3571                               bytesParser(policy=email.policy.strict).parse,
   3572                               fp)
   3573             self.assertFalse(fp.closed)
   3574 
   3575     def test_parser_does_not_close_file(self):
   3576         with openfile('msg_02.txt', 'r') as fp:
   3577             email.parser.Parser().parse(fp)
   3578             self.assertFalse(fp.closed)
   3579 
   3580     def test_parser_on_exception_does_not_close_file(self):
   3581         with openfile('msg_15.txt', 'r') as fp:
   3582             parser = email.parser.Parser
   3583             self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
   3584                               parser(policy=email.policy.strict).parse, fp)
   3585             self.assertFalse(fp.closed)
   3586 
   3587     def test_whitespace_continuation(self):
   3588         eq = self.assertEqual
   3589         # This message contains a line after the Subject: header that has only
   3590         # whitespace, but it is not empty!
   3591         msg = email.message_from_string("""\
   3592 From: aperson (at] dom.ain
   3593 To: bperson (at] dom.ain
   3594 Subject: the next line has a space on it
   3595 \x20
   3596 Date: Mon, 8 Apr 2002 15:09:19 -0400
   3597 Message-ID: spam
   3598 
   3599 Here's the message body
   3600 """)
   3601         eq(msg['subject'], 'the next line has a space on it\n ')
   3602         eq(msg['message-id'], 'spam')
   3603         eq(msg.get_payload(), "Here's the message body\n")
   3604 
   3605     def test_whitespace_continuation_last_header(self):
   3606         eq = self.assertEqual
   3607         # Like the previous test, but the subject line is the last
   3608         # header.
   3609         msg = email.message_from_string("""\
   3610 From: aperson (at] dom.ain
   3611 To: bperson (at] dom.ain
   3612 Date: Mon, 8 Apr 2002 15:09:19 -0400
   3613 Message-ID: spam
   3614 Subject: the next line has a space on it
   3615 \x20
   3616 
   3617 Here's the message body
   3618 """)
   3619         eq(msg['subject'], 'the next line has a space on it\n ')
   3620         eq(msg['message-id'], 'spam')
   3621         eq(msg.get_payload(), "Here's the message body\n")
   3622 
   3623     def test_crlf_separation(self):
   3624         eq = self.assertEqual
   3625         with openfile('msg_26.txt', newline='\n') as fp:
   3626             msg = Parser().parse(fp)
   3627         eq(len(msg.get_payload()), 2)
   3628         part1 = msg.get_payload(0)
   3629         eq(part1.get_content_type(), 'text/plain')
   3630         eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
   3631         part2 = msg.get_payload(1)
   3632         eq(part2.get_content_type(), 'application/riscos')
   3633 
   3634     def test_crlf_flatten(self):
   3635         # Using newline='\n' preserves the crlfs in this input file.
   3636         with openfile('msg_26.txt', newline='\n') as fp:
   3637             text = fp.read()
   3638         msg = email.message_from_string(text)
   3639         s = StringIO()
   3640         g = Generator(s)
   3641         g.flatten(msg, linesep='\r\n')
   3642         self.assertEqual(s.getvalue(), text)
   3643 
   3644     maxDiff = None
   3645 
   3646     def test_multipart_digest_with_extra_mime_headers(self):
   3647         eq = self.assertEqual
   3648         neq = self.ndiffAssertEqual
   3649         with openfile('msg_28.txt') as fp:
   3650             msg = email.message_from_file(fp)
   3651         # Structure is:
   3652         # multipart/digest
   3653         #   message/rfc822
   3654         #     text/plain
   3655         #   message/rfc822
   3656         #     text/plain
   3657         eq(msg.is_multipart(), 1)
   3658         eq(len(msg.get_payload()), 2)
   3659         part1 = msg.get_payload(0)
   3660         eq(part1.get_content_type(), 'message/rfc822')
   3661         eq(part1.is_multipart(), 1)
   3662         eq(len(part1.get_payload()), 1)
   3663         part1a = part1.get_payload(0)
   3664         eq(part1a.is_multipart(), 0)
   3665         eq(part1a.get_content_type(), 'text/plain')
   3666         neq(part1a.get_payload(), 'message 1\n')
   3667         # next message/rfc822
   3668         part2 = msg.get_payload(1)
   3669         eq(part2.get_content_type(), 'message/rfc822')
   3670         eq(part2.is_multipart(), 1)
   3671         eq(len(part2.get_payload()), 1)
   3672         part2a = part2.get_payload(0)
   3673         eq(part2a.is_multipart(), 0)
   3674         eq(part2a.get_content_type(), 'text/plain')
   3675         neq(part2a.get_payload(), 'message 2\n')
   3676 
   3677     def test_three_lines(self):
   3678         # A bug report by Andrew McNamara
   3679         lines = ['From: Andrew Person <aperson (at] dom.ain',
   3680                  'Subject: Test',
   3681                  'Date: Tue, 20 Aug 2002 16:43:45 +1000']
   3682         msg = email.message_from_string(NL.join(lines))
   3683         self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
   3684 
   3685     def test_strip_line_feed_and_carriage_return_in_headers(self):
   3686         eq = self.assertEqual
   3687         # For [ 1002475 ] email message parser doesn't handle \r\n correctly
   3688         value1 = 'text'
   3689         value2 = 'more text'
   3690         m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
   3691             value1, value2)
   3692         msg = email.message_from_string(m)
   3693         eq(msg.get('Header'), value1)
   3694         eq(msg.get('Next-Header'), value2)
   3695 
   3696     def test_rfc2822_header_syntax(self):
   3697         eq = self.assertEqual
   3698         m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
   3699         msg = email.message_from_string(m)
   3700         eq(len(msg), 3)
   3701         eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
   3702         eq(msg.get_payload(), 'body')
   3703 
   3704     def test_rfc2822_space_not_allowed_in_header(self):
   3705         eq = self.assertEqual
   3706         m = '>From foo (at] example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
   3707         msg = email.message_from_string(m)
   3708         eq(len(msg.keys()), 0)
   3709 
   3710     def test_rfc2822_one_character_header(self):
   3711         eq = self.assertEqual
   3712         m = 'A: first header\nB: second header\nCC: third header\n\nbody'
   3713         msg = email.message_from_string(m)
   3714         headers = msg.keys()
   3715         headers.sort()
   3716         eq(headers, ['A', 'B', 'CC'])
   3717         eq(msg.get_payload(), 'body')
   3718 
   3719     def test_CRLFLF_at_end_of_part(self):
   3720         # issue 5610: feedparser should not eat two chars from body part ending
   3721         # with "\r\n\n".
   3722         m = (
   3723             "From: foo (at] bar.com\n"
   3724             "To: baz\n"
   3725             "Mime-Version: 1.0\n"
   3726             "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
   3727             "\n"
   3728             "--BOUNDARY\n"
   3729             "Content-Type: text/plain\n"
   3730             "\n"
   3731             "body ending with CRLF newline\r\n"
   3732             "\n"
   3733             "--BOUNDARY--\n"
   3734           )
   3735         msg = email.message_from_string(m)
   3736         self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
   3737 
   3738 
   3739 class Test8BitBytesHandling(TestEmailBase):
   3740     # In Python3 all input is string, but that doesn't work if the actual input
   3741     # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
   3742     # decode byte streams using the surrogateescape error handler, and
   3743     # reconvert to binary at appropriate places if we detect surrogates.  This
   3744     # doesn't allow us to transform headers with 8bit bytes (they get munged),
   3745     # but it does allow us to parse and preserve them, and to decode body
   3746     # parts that use an 8bit CTE.
   3747 
   3748     bodytest_msg = textwrap.dedent("""\
   3749         From: foo (at] bar.com
   3750         To: baz
   3751         Mime-Version: 1.0
   3752         Content-Type: text/plain; charset={charset}
   3753         Content-Transfer-Encoding: {cte}
   3754 
   3755         {bodyline}
   3756         """)
   3757 
   3758     def test_known_8bit_CTE(self):
   3759         m = self.bodytest_msg.format(charset='utf-8',
   3760                                      cte='8bit',
   3761                                      bodyline='pstal').encode('utf-8')
   3762         msg = email.message_from_bytes(m)
   3763         self.assertEqual(msg.get_payload(), "pstal\n")
   3764         self.assertEqual(msg.get_payload(decode=True),
   3765                          "pstal\n".encode('utf-8'))
   3766 
   3767     def test_unknown_8bit_CTE(self):
   3768         m = self.bodytest_msg.format(charset='notavalidcharset',
   3769                                      cte='8bit',
   3770                                      bodyline='pstal').encode('utf-8')
   3771         msg = email.message_from_bytes(m)
   3772         self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
   3773         self.assertEqual(msg.get_payload(decode=True),
   3774                          "pstal\n".encode('utf-8'))
   3775 
   3776     def test_8bit_in_quopri_body(self):
   3777         # This is non-RFC compliant data...without 'decode' the library code
   3778         # decodes the body using the charset from the headers, and because the
   3779         # source byte really is utf-8 this works.  This is likely to fail
   3780         # against real dirty data (ie: produce mojibake), but the data is
   3781         # invalid anyway so it is as good a guess as any.  But this means that
   3782         # this test just confirms the current behavior; that behavior is not
   3783         # necessarily the best possible behavior.  With 'decode' it is
   3784         # returning the raw bytes, so that test should be of correct behavior,
   3785         # or at least produce the same result that email4 did.
   3786         m = self.bodytest_msg.format(charset='utf-8',
   3787                                      cte='quoted-printable',
   3788                                      bodyline='p=C3=B6stl').encode('utf-8')
   3789         msg = email.message_from_bytes(m)
   3790         self.assertEqual(msg.get_payload(), 'p=C3=B6stl\n')
   3791         self.assertEqual(msg.get_payload(decode=True),
   3792                          'pstl\n'.encode('utf-8'))
   3793 
   3794     def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
   3795         # This is similar to the previous test, but proves that if the 8bit
   3796         # byte is undecodeable in the specified charset, it gets replaced
   3797         # by the unicode 'unknown' character.  Again, this may or may not
   3798         # be the ideal behavior.  Note that if decode=False none of the
   3799         # decoders will get involved, so this is the only test we need
   3800         # for this behavior.
   3801         m = self.bodytest_msg.format(charset='ascii',
   3802                                      cte='quoted-printable',
   3803                                      bodyline='p=C3=B6stl').encode('utf-8')
   3804         msg = email.message_from_bytes(m)
   3805         self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
   3806         self.assertEqual(msg.get_payload(decode=True),
   3807                         'pstl\n'.encode('utf-8'))
   3808 
   3809     # test_defect_handling:test_invalid_chars_in_base64_payload
   3810     def test_8bit_in_base64_body(self):
   3811         # If we get 8bit bytes in a base64 body, we can just ignore them
   3812         # as being outside the base64 alphabet and decode anyway.  But
   3813         # we register a defect.
   3814         m = self.bodytest_msg.format(charset='utf-8',
   3815                                      cte='base64',
   3816                                      bodyline='cMO2c3RhbA=').encode('utf-8')
   3817         msg = email.message_from_bytes(m)
   3818         self.assertEqual(msg.get_payload(decode=True),
   3819                          'pstal'.encode('utf-8'))
   3820         self.assertIsInstance(msg.defects[0],
   3821                               errors.InvalidBase64CharactersDefect)
   3822 
   3823     def test_8bit_in_uuencode_body(self):
   3824         # Sticking an 8bit byte in a uuencode block makes it undecodable by
   3825         # normal means, so the block is returned undecoded, but as bytes.
   3826         m = self.bodytest_msg.format(charset='utf-8',
   3827                                      cte='uuencode',
   3828                                      bodyline='<,.V<W1A;  ').encode('utf-8')
   3829         msg = email.message_from_bytes(m)
   3830         self.assertEqual(msg.get_payload(decode=True),
   3831                          '<,.V<W1A;  \n'.encode('utf-8'))
   3832 
   3833 
   3834     headertest_headers = (
   3835         ('From: foo (at] bar.com', ('From', 'foo (at] bar.com')),
   3836         ('To: bz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
   3837         ('Subject: Maintenant je vous prsente mon collgue, le pouf clbre\n'
   3838             '\tJean de Baddie',
   3839             ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
   3840                 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
   3841                 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
   3842         ('From: gst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
   3843         )
   3844     headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
   3845         '\nYes, they are flying.\n').encode('utf-8')
   3846 
   3847     def test_get_8bit_header(self):
   3848         msg = email.message_from_bytes(self.headertest_msg)
   3849         self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
   3850         self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
   3851 
   3852     def test_print_8bit_headers(self):
   3853         msg = email.message_from_bytes(self.headertest_msg)
   3854         self.assertEqual(str(msg),
   3855                          textwrap.dedent("""\
   3856                             From: {}
   3857                             To: {}
   3858                             Subject: {}
   3859                             From: {}
   3860 
   3861                             Yes, they are flying.
   3862                             """).format(*[expected[1] for (_, expected) in
   3863                                         self.headertest_headers]))
   3864 
   3865     def test_values_with_8bit_headers(self):
   3866         msg = email.message_from_bytes(self.headertest_msg)
   3867         self.assertListEqual([str(x) for x in msg.values()],
   3868                               ['foo (at] bar.com',
   3869                                'b\uFFFD\uFFFDz',
   3870                                'Maintenant je vous pr\uFFFD\uFFFDsente mon '
   3871                                    'coll\uFFFD\uFFFDgue, le pouf '
   3872                                    'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
   3873                                    '\tJean de Baddie',
   3874                                "g\uFFFD\uFFFDst"])
   3875 
   3876     def test_items_with_8bit_headers(self):
   3877         msg = email.message_from_bytes(self.headertest_msg)
   3878         self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
   3879                               [('From', 'foo (at] bar.com'),
   3880                                ('To', 'b\uFFFD\uFFFDz'),
   3881                                ('Subject', 'Maintenant je vous '
   3882                                   'pr\uFFFD\uFFFDsente '
   3883                                   'mon coll\uFFFD\uFFFDgue, le pouf '
   3884                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
   3885                                   '\tJean de Baddie'),
   3886                                ('From', 'g\uFFFD\uFFFDst')])
   3887 
   3888     def test_get_all_with_8bit_headers(self):
   3889         msg = email.message_from_bytes(self.headertest_msg)
   3890         self.assertListEqual([str(x) for x in msg.get_all('from')],
   3891                               ['foo (at] bar.com',
   3892                                'g\uFFFD\uFFFDst'])
   3893 
   3894     def test_get_content_type_with_8bit(self):
   3895         msg = email.message_from_bytes(textwrap.dedent("""\
   3896             Content-Type: text/pl\xA7in; charset=utf-8
   3897             """).encode('latin-1'))
   3898         self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
   3899         self.assertEqual(msg.get_content_maintype(), "text")
   3900         self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
   3901 
   3902     # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
   3903     def test_get_params_with_8bit(self):
   3904         msg = email.message_from_bytes(
   3905             'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
   3906         self.assertEqual(msg.get_params(header='x-header'),
   3907            [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
   3908         self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
   3909         # XXX: someday you might be able to get 'b\xa7r', for now you can't.
   3910         self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
   3911 
   3912     # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
   3913     def test_get_rfc2231_params_with_8bit(self):
   3914         msg = email.message_from_bytes(textwrap.dedent("""\
   3915             Content-Type: text/plain; charset=us-ascii;
   3916              title*=us-ascii'en'This%20is%20not%20f\xa7n"""
   3917              ).encode('latin-1'))
   3918         self.assertEqual(msg.get_param('title'),
   3919             ('us-ascii', 'en', 'This is not f\uFFFDn'))
   3920 
   3921     def test_set_rfc2231_params_with_8bit(self):
   3922         msg = email.message_from_bytes(textwrap.dedent("""\
   3923             Content-Type: text/plain; charset=us-ascii;
   3924              title*=us-ascii'en'This%20is%20not%20f\xa7n"""
   3925              ).encode('latin-1'))
   3926         msg.set_param('title', 'test')
   3927         self.assertEqual(msg.get_param('title'), 'test')
   3928 
   3929     def test_del_rfc2231_params_with_8bit(self):
   3930         msg = email.message_from_bytes(textwrap.dedent("""\
   3931             Content-Type: text/plain; charset=us-ascii;
   3932              title*=us-ascii'en'This%20is%20not%20f\xa7n"""
   3933              ).encode('latin-1'))
   3934         msg.del_param('title')
   3935         self.assertEqual(msg.get_param('title'), None)
   3936         self.assertEqual(msg.get_content_maintype(), 'text')
   3937 
   3938     def test_get_payload_with_8bit_cte_header(self):
   3939         msg = email.message_from_bytes(textwrap.dedent("""\
   3940             Content-Transfer-Encoding: b\xa7se64
   3941             Content-Type: text/plain; charset=latin-1
   3942 
   3943             payload
   3944             """).encode('latin-1'))
   3945         self.assertEqual(msg.get_payload(), 'payload\n')
   3946         self.assertEqual(msg.get_payload(decode=True), b'payload\n')
   3947 
   3948     non_latin_bin_msg = textwrap.dedent("""\
   3949         From: foo (at] bar.com
   3950         To: bz
   3951         Subject: Maintenant je vous prsente mon collgue, le pouf clbre
   3952         \tJean de Baddie
   3953         Mime-Version: 1.0
   3954         Content-Type: text/plain; charset="utf-8"
   3955         Content-Transfer-Encoding: 8bit
   3956 
   3957         ,  .
   3958         """).encode('utf-8')
   3959 
   3960     def test_bytes_generator(self):
   3961         msg = email.message_from_bytes(self.non_latin_bin_msg)
   3962         out = BytesIO()
   3963         email.generator.BytesGenerator(out).flatten(msg)
   3964         self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
   3965 
   3966     def test_bytes_generator_handles_None_body(self):
   3967         #Issue 11019
   3968         msg = email.message.Message()
   3969         out = BytesIO()
   3970         email.generator.BytesGenerator(out).flatten(msg)
   3971         self.assertEqual(out.getvalue(), b"\n")
   3972 
   3973     non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
   3974         From: foo (at] bar.com
   3975         To: =?unknown-8bit?q?b=C3=A1z?=
   3976         Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
   3977          =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
   3978          =?unknown-8bit?q?_Jean_de_Baddie?=
   3979         Mime-Version: 1.0
   3980         Content-Type: text/plain; charset="utf-8"
   3981         Content-Transfer-Encoding: base64
   3982 
   3983         0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
   3984         """)
   3985 
   3986     def test_generator_handles_8bit(self):
   3987         msg = email.message_from_bytes(self.non_latin_bin_msg)
   3988         out = StringIO()
   3989         email.generator.Generator(out).flatten(msg)
   3990         self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
   3991 
   3992     def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
   3993         msg = email.message_from_bytes(self.non_latin_bin_msg)
   3994         out = BytesIO()
   3995         BytesGenerator(out).flatten(msg)
   3996         orig_value = out.getvalue()
   3997         Generator(StringIO()).flatten(msg) # Should not mutate msg!
   3998         out = BytesIO()
   3999         BytesGenerator(out).flatten(msg)
   4000         self.assertEqual(out.getvalue(), orig_value)
   4001 
   4002     def test_bytes_generator_with_unix_from(self):
   4003         # The unixfrom contains a current date, so we can't check it
   4004         # literally.  Just make sure the first word is 'From' and the
   4005         # rest of the message matches the input.
   4006         msg = email.message_from_bytes(self.non_latin_bin_msg)
   4007         out = BytesIO()
   4008         email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
   4009         lines = out.getvalue().split(b'\n')
   4010         self.assertEqual(lines[0].split()[0], b'From')
   4011         self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
   4012 
   4013     non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
   4014     non_latin_bin_msg_as7bit[2:4] = [
   4015         'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
   4016          'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
   4017     non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
   4018 
   4019     def test_message_from_binary_file(self):
   4020         fn = 'test.msg'
   4021         self.addCleanup(unlink, fn)
   4022         with open(fn, 'wb') as testfile:
   4023             testfile.write(self.non_latin_bin_msg)
   4024         with open(fn, 'rb') as testfile:
   4025             m = email.parser.BytesParser().parse(testfile)
   4026         self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
   4027 
   4028     latin_bin_msg = textwrap.dedent("""\
   4029         From: foo (at] bar.com
   4030         To: Dinsdale
   4031         Subject: Nudge nudge, wink, wink
   4032         Mime-Version: 1.0
   4033         Content-Type: text/plain; charset="latin-1"
   4034         Content-Transfer-Encoding: 8bit
   4035 
   4036         oh l l, know what I mean, know what I mean?
   4037         """).encode('latin-1')
   4038 
   4039     latin_bin_msg_as7bit = textwrap.dedent("""\
   4040         From: foo (at] bar.com
   4041         To: Dinsdale
   4042         Subject: Nudge nudge, wink, wink
   4043         Mime-Version: 1.0
   4044         Content-Type: text/plain; charset="iso-8859-1"
   4045         Content-Transfer-Encoding: quoted-printable
   4046 
   4047         oh l=E0 l=E0, know what I mean, know what I mean?
   4048         """)
   4049 
   4050     def test_string_generator_reencodes_to_quopri_when_appropriate(self):
   4051         m = email.message_from_bytes(self.latin_bin_msg)
   4052         self.assertEqual(str(m), self.latin_bin_msg_as7bit)
   4053 
   4054     def test_decoded_generator_emits_unicode_body(self):
   4055         m = email.message_from_bytes(self.latin_bin_msg)
   4056         out = StringIO()
   4057         email.generator.DecodedGenerator(out).flatten(m)
   4058         #DecodedHeader output contains an extra blank line compared
   4059         #to the input message.  RDM: not sure if this is a bug or not,
   4060         #but it is not specific to the 8bit->7bit conversion.
   4061         self.assertEqual(out.getvalue(),
   4062             self.latin_bin_msg.decode('latin-1')+'\n')
   4063 
   4064     def test_bytes_feedparser(self):
   4065         bfp = email.feedparser.BytesFeedParser()
   4066         for i in range(0, len(self.latin_bin_msg), 10):
   4067             bfp.feed(self.latin_bin_msg[i:i+10])
   4068         m = bfp.close()
   4069         self.assertEqual(str(m), self.latin_bin_msg_as7bit)
   4070 
   4071     def test_crlf_flatten(self):
   4072         with openfile('msg_26.txt', 'rb') as fp:
   4073             text = fp.read()
   4074         msg = email.message_from_bytes(text)
   4075         s = BytesIO()
   4076         g = email.generator.BytesGenerator(s)
   4077         g.flatten(msg, linesep='\r\n')
   4078         self.assertEqual(s.getvalue(), text)
   4079 
   4080     def test_8bit_multipart(self):
   4081         # Issue 11605
   4082         source = textwrap.dedent("""\
   4083             Date: Fri, 18 Mar 2011 17:15:43 +0100
   4084             To: foo (at] example.com
   4085             From: foodwatch-Newsletter <bar (at] example.com>
   4086             Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
   4087             Message-ID: <76a486bee62b0d200f33dc2ca08220ad (at] localhost.localdomain>
   4088             MIME-Version: 1.0
   4089             Content-Type: multipart/alternative;
   4090                     boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
   4091 
   4092             --b1_76a486bee62b0d200f33dc2ca08220ad
   4093             Content-Type: text/plain; charset="utf-8"
   4094             Content-Transfer-Encoding: 8bit
   4095 
   4096             Guten Tag, ,
   4097 
   4098             mit groer Betroffenheit verfolgen auch wir im foodwatch-Team die
   4099             Nachrichten aus Japan.
   4100 
   4101 
   4102             --b1_76a486bee62b0d200f33dc2ca08220ad
   4103             Content-Type: text/html; charset="utf-8"
   4104             Content-Transfer-Encoding: 8bit
   4105 
   4106             <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
   4107                 "http://www.w3.org/TR/html4/loose.dtd">
   4108             <html lang="de">
   4109             <head>
   4110                     <title>foodwatch - Newsletter</title>
   4111             </head>
   4112             <body>
   4113               <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
   4114                  die Nachrichten aus Japan.</p>
   4115             </body>
   4116             </html>
   4117             --b1_76a486bee62b0d200f33dc2ca08220ad--
   4118 
   4119             """).encode('utf-8')
   4120         msg = email.message_from_bytes(source)
   4121         s = BytesIO()
   4122         g = email.generator.BytesGenerator(s)
   4123         g.flatten(msg)
   4124         self.assertEqual(s.getvalue(), source)
   4125 
   4126     def test_bytes_generator_b_encoding_linesep(self):
   4127         # Issue 14062: b encoding was tacking on an extra \n.
   4128         m = Message()
   4129         # This has enough non-ascii that it should always end up b encoded.
   4130         m['Subject'] = Header('luouk k')
   4131         s = BytesIO()
   4132         g = email.generator.BytesGenerator(s)
   4133         g.flatten(m, linesep='\r\n')
   4134         self.assertEqual(
   4135             s.getvalue(),
   4136             b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
   4137 
   4138     def test_generator_b_encoding_linesep(self):
   4139         # Since this broke in ByteGenerator, test Generator for completeness.
   4140         m = Message()
   4141         # This has enough non-ascii that it should always end up b encoded.
   4142         m['Subject'] = Header('luouk k')
   4143         s = StringIO()
   4144         g = email.generator.Generator(s)
   4145         g.flatten(m, linesep='\r\n')
   4146         self.assertEqual(
   4147             s.getvalue(),
   4148             'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
   4149 
   4150     maxDiff = None
   4151 
   4152 
   4153 class BaseTestBytesGeneratorIdempotent:
   4154 
   4155     maxDiff = None
   4156 
   4157     def _msgobj(self, filename):
   4158         with openfile(filename, 'rb') as fp:
   4159             data = fp.read()
   4160         data = self.normalize_linesep_regex.sub(self.blinesep, data)
   4161         msg = email.message_from_bytes(data)
   4162         return msg, data
   4163 
   4164     def _idempotent(self, msg, data, unixfrom=False):
   4165         b = BytesIO()
   4166         g = email.generator.BytesGenerator(b, maxheaderlen=0)
   4167         g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
   4168         self.assertEqual(data, b.getvalue())
   4169 
   4170 
   4171 class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
   4172                                     TestIdempotent):
   4173     linesep = '\n'
   4174     blinesep = b'\n'
   4175     normalize_linesep_regex = re.compile(br'\r\n')
   4176 
   4177 
   4178 class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
   4179                                        TestIdempotent):
   4180     linesep = '\r\n'
   4181     blinesep = b'\r\n'
   4182     normalize_linesep_regex = re.compile(br'(?<!\r)\n')
   4183 
   4184 
   4185 class TestBase64(unittest.TestCase):
   4186     def test_len(self):
   4187         eq = self.assertEqual
   4188         eq(base64mime.header_length('hello'),
   4189            len(base64mime.body_encode(b'hello', eol='')))
   4190         for size in range(15):
   4191             if   size == 0 : bsize = 0
   4192             elif size <= 3 : bsize = 4
   4193             elif size <= 6 : bsize = 8
   4194             elif size <= 9 : bsize = 12
   4195             elif size <= 12: bsize = 16
   4196             else           : bsize = 20
   4197             eq(base64mime.header_length('x' * size), bsize)
   4198 
   4199     def test_decode(self):
   4200         eq = self.assertEqual
   4201         eq(base64mime.decode(''), b'')
   4202         eq(base64mime.decode('aGVsbG8='), b'hello')
   4203 
   4204     def test_encode(self):
   4205         eq = self.assertEqual
   4206         eq(base64mime.body_encode(b''), b'')
   4207         eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
   4208         # Test the binary flag
   4209         eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
   4210         # Test the maxlinelen arg
   4211         eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
   4212 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
   4213 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
   4214 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
   4215 eHh4eCB4eHh4IA==
   4216 """)
   4217         # Test the eol argument
   4218         eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
   4219            """\
   4220 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
   4221 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
   4222 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
   4223 eHh4eCB4eHh4IA==\r
   4224 """)
   4225 
   4226     def test_header_encode(self):
   4227         eq = self.assertEqual
   4228         he = base64mime.header_encode
   4229         eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
   4230         eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
   4231         eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
   4232         # Test the charset option
   4233         eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
   4234         eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
   4235 
   4236 
   4237 
   4238 class TestQuopri(unittest.TestCase):
   4239     def setUp(self):
   4240         # Set of characters (as byte integers) that don't need to be encoded
   4241         # in headers.
   4242         self.hlit = list(chain(
   4243             range(ord('a'), ord('z') + 1),
   4244             range(ord('A'), ord('Z') + 1),
   4245             range(ord('0'), ord('9') + 1),
   4246             (c for c in b'!*+-/')))
   4247         # Set of characters (as byte integers) that do need to be encoded in
   4248         # headers.
   4249         self.hnon = [c for c in range(256) if c not in self.hlit]
   4250         assert len(self.hlit) + len(self.hnon) == 256
   4251         # Set of characters (as byte integers) that don't need to be encoded
   4252         # in bodies.
   4253         self.blit = list(range(ord(' '), ord('~') + 1))
   4254         self.blit.append(ord('\t'))
   4255         self.blit.remove(ord('='))
   4256         # Set of characters (as byte integers) that do need to be encoded in
   4257         # bodies.
   4258         self.bnon = [c for c in range(256) if c not in self.blit]
   4259         assert len(self.blit) + len(self.bnon) == 256
   4260 
   4261     def test_quopri_header_check(self):
   4262         for c in self.hlit:
   4263             self.assertFalse(quoprimime.header_check(c),
   4264                         'Should not be header quopri encoded: %s' % chr(c))
   4265         for c in self.hnon:
   4266             self.assertTrue(quoprimime.header_check(c),
   4267                             'Should be header quopri encoded: %s' % chr(c))
   4268 
   4269     def test_quopri_body_check(self):
   4270         for c in self.blit:
   4271             self.assertFalse(quoprimime.body_check(c),
   4272                         'Should not be body quopri encoded: %s' % chr(c))
   4273         for c in self.bnon:
   4274             self.assertTrue(quoprimime.body_check(c),
   4275                             'Should be body quopri encoded: %s' % chr(c))
   4276 
   4277     def test_header_quopri_len(self):
   4278         eq = self.assertEqual
   4279         eq(quoprimime.header_length(b'hello'), 5)
   4280         # RFC 2047 chrome is not included in header_length().
   4281         eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
   4282            quoprimime.header_length(b'hello') +
   4283            # =?xxx?q?...?= means 10 extra characters
   4284            10)
   4285         eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
   4286         # RFC 2047 chrome is not included in header_length().
   4287         eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
   4288            quoprimime.header_length(b'h@e@l@l@o@') +
   4289            # =?xxx?q?...?= means 10 extra characters
   4290            10)
   4291         for c in self.hlit:
   4292             eq(quoprimime.header_length(bytes([c])), 1,
   4293                'expected length 1 for %r' % chr(c))
   4294         for c in self.hnon:
   4295             # Space is special; it's encoded to _
   4296             if c == ord(' '):
   4297                 continue
   4298             eq(quoprimime.header_length(bytes([c])), 3,
   4299                'expected length 3 for %r' % chr(c))
   4300         eq(quoprimime.header_length(b' '), 1)
   4301 
   4302     def test_body_quopri_len(self):
   4303         eq = self.assertEqual
   4304         for c in self.blit:
   4305             eq(quoprimime.body_length(bytes([c])), 1)
   4306         for c in self.bnon:
   4307             eq(quoprimime.body_length(bytes([c])), 3)
   4308 
   4309     def test_quote_unquote_idempotent(self):
   4310         for x in range(256):
   4311             c = chr(x)
   4312             self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
   4313 
   4314     def _test_header_encode(self, header, expected_encoded_header, charset=None):
   4315         if charset is None:
   4316             encoded_header = quoprimime.header_encode(header)
   4317         else:
   4318             encoded_header = quoprimime.header_encode(header, charset)
   4319         self.assertEqual(encoded_header, expected_encoded_header)
   4320 
   4321     def test_header_encode_null(self):
   4322         self._test_header_encode(b'', '')
   4323 
   4324     def test_header_encode_one_word(self):
   4325         self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
   4326 
   4327     def test_header_encode_two_lines(self):
   4328         self._test_header_encode(b'hello\nworld',
   4329                                 '=?iso-8859-1?q?hello=0Aworld?=')
   4330 
   4331     def test_header_encode_non_ascii(self):
   4332         self._test_header_encode(b'hello\xc7there',
   4333                                 '=?iso-8859-1?q?hello=C7there?=')
   4334 
   4335     def test_header_encode_alt_charset(self):
   4336         self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
   4337                 charset='iso-8859-2')
   4338 
   4339     def _test_header_decode(self, encoded_header, expected_decoded_header):
   4340         decoded_header = quoprimime.header_decode(encoded_header)
   4341         self.assertEqual(decoded_header, expected_decoded_header)
   4342 
   4343     def test_header_decode_null(self):
   4344         self._test_header_decode('', '')
   4345 
   4346     def test_header_decode_one_word(self):
   4347         self._test_header_decode('hello', 'hello')
   4348 
   4349     def test_header_decode_two_lines(self):
   4350         self._test_header_decode('hello=0Aworld', 'hello\nworld')
   4351 
   4352     def test_header_decode_non_ascii(self):
   4353         self._test_header_decode('hello=C7there', 'hello\xc7there')
   4354 
   4355     def test_header_decode_re_bug_18380(self):
   4356         # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
   4357         self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
   4358 
   4359     def _test_decode(self, encoded, expected_decoded, eol=None):
   4360         if eol is None:
   4361             decoded = quoprimime.decode(encoded)
   4362         else:
   4363             decoded = quoprimime.decode(encoded, eol=eol)
   4364         self.assertEqual(decoded, expected_decoded)
   4365 
   4366     def test_decode_null_word(self):
   4367         self._test_decode('', '')
   4368 
   4369     def test_decode_null_line_null_word(self):
   4370         self._test_decode('\r\n', '\n')
   4371 
   4372     def test_decode_one_word(self):
   4373         self._test_decode('hello', 'hello')
   4374 
   4375     def test_decode_one_word_eol(self):
   4376         self._test_decode('hello', 'hello', eol='X')
   4377 
   4378     def test_decode_one_line(self):
   4379         self._test_decode('hello\r\n', 'hello\n')
   4380 
   4381     def test_decode_one_line_lf(self):
   4382         self._test_decode('hello\n', 'hello\n')
   4383 
   4384     def test_decode_one_line_cr(self):
   4385         self._test_decode('hello\r', 'hello\n')
   4386 
   4387     def test_decode_one_line_nl(self):
   4388         self._test_decode('hello\n', 'helloX', eol='X')
   4389 
   4390     def test_decode_one_line_crnl(self):
   4391         self._test_decode('hello\r\n', 'helloX', eol='X')
   4392 
   4393     def test_decode_one_line_one_word(self):
   4394         self._test_decode('hello\r\nworld', 'hello\nworld')
   4395 
   4396     def test_decode_one_line_one_word_eol(self):
   4397         self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
   4398 
   4399     def test_decode_two_lines(self):
   4400         self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
   4401 
   4402     def test_decode_two_lines_eol(self):
   4403         self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
   4404 
   4405     def test_decode_one_long_line(self):
   4406         self._test_decode('Spam' * 250, 'Spam' * 250)
   4407 
   4408     def test_decode_one_space(self):
   4409         self._test_decode(' ', '')
   4410 
   4411     def test_decode_multiple_spaces(self):
   4412         self._test_decode(' ' * 5, '')
   4413 
   4414     def test_decode_one_line_trailing_spaces(self):
   4415         self._test_decode('hello    \r\n', 'hello\n')
   4416 
   4417     def test_decode_two_lines_trailing_spaces(self):
   4418         self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
   4419 
   4420     def test_decode_quoted_word(self):
   4421         self._test_decode('=22quoted=20words=22', '"quoted words"')
   4422 
   4423     def test_decode_uppercase_quoting(self):
   4424         self._test_decode('ab=CD=EF', 'ab\xcd\xef')
   4425 
   4426     def test_decode_lowercase_quoting(self):
   4427         self._test_decode('ab=cd=ef', 'ab\xcd\xef')
   4428 
   4429     def test_decode_soft_line_break(self):
   4430         self._test_decode('soft line=\r\nbreak', 'soft linebreak')
   4431 
   4432     def test_decode_false_quoting(self):
   4433         self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
   4434 
   4435     def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
   4436         kwargs = {}
   4437         if maxlinelen is None:
   4438             # Use body_encode's default.
   4439             maxlinelen = 76
   4440         else:
   4441             kwargs['maxlinelen'] = maxlinelen
   4442         if eol is None:
   4443             # Use body_encode's default.
   4444             eol = '\n'
   4445         else:
   4446             kwargs['eol'] = eol
   4447         encoded_body = quoprimime.body_encode(body, **kwargs)
   4448         self.assertEqual(encoded_body, expected_encoded_body)
   4449         if eol == '\n' or eol == '\r\n':
   4450             # We know how to split the result back into lines, so maxlinelen
   4451             # can be checked.
   4452             for line in encoded_body.splitlines():
   4453                 self.assertLessEqual(len(line), maxlinelen)
   4454 
   4455     def test_encode_null(self):
   4456         self._test_encode('', '')
   4457 
   4458     def test_encode_null_lines(self):
   4459         self._test_encode('\n\n', '\n\n')
   4460 
   4461     def test_encode_one_line(self):
   4462         self._test_encode('hello\n', 'hello\n')
   4463 
   4464     def test_encode_one_line_crlf(self):
   4465         self._test_encode('hello\r\n', 'hello\n')
   4466 
   4467     def test_encode_one_line_eol(self):
   4468         self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
   4469 
   4470     def test_encode_one_line_eol_after_non_ascii(self):
   4471         # issue 20206; see changeset 0cf700464177 for why the encode/decode.
   4472         self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
   4473                           'hello=CF=85\r\n', eol='\r\n')
   4474 
   4475     def test_encode_one_space(self):
   4476         self._test_encode(' ', '=20')
   4477 
   4478     def test_encode_one_line_one_space(self):
   4479         self._test_encode(' \n', '=20\n')
   4480 
   4481 # XXX: body_encode() expect strings, but uses ord(char) from these strings
   4482 # to index into a 256-entry list.  For code points above 255, this will fail.
   4483 # Should there be a check for 8-bit only ord() values in body, or at least
   4484 # a comment about the expected input?
   4485 
   4486     def test_encode_two_lines_one_space(self):
   4487         self._test_encode(' \n \n', '=20\n=20\n')
   4488 
   4489     def test_encode_one_word_trailing_spaces(self):
   4490         self._test_encode('hello   ', 'hello  =20')
   4491 
   4492     def test_encode_one_line_trailing_spaces(self):
   4493         self._test_encode('hello   \n', 'hello  =20\n')
   4494 
   4495     def test_encode_one_word_trailing_tab(self):
   4496         self._test_encode('hello  \t', 'hello  =09')
   4497 
   4498     def test_encode_one_line_trailing_tab(self):
   4499         self._test_encode('hello  \t\n', 'hello  =09\n')
   4500 
   4501     def test_encode_trailing_space_before_maxlinelen(self):
   4502         self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
   4503 
   4504     def test_encode_trailing_space_at_maxlinelen(self):
   4505         self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
   4506 
   4507     def test_encode_trailing_space_beyond_maxlinelen(self):
   4508         self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
   4509 
   4510     def test_encode_whitespace_lines(self):
   4511         self._test_encode(' \n' * 5, '=20\n' * 5)
   4512 
   4513     def test_encode_quoted_equals(self):
   4514         self._test_encode('a = b', 'a =3D b')
   4515 
   4516     def test_encode_one_long_string(self):
   4517         self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
   4518 
   4519     def test_encode_one_long_line(self):
   4520         self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
   4521 
   4522     def test_encode_one_very_long_line(self):
   4523         self._test_encode('x' * 200 + '\n',
   4524                 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
   4525 
   4526     def test_encode_shortest_maxlinelen(self):
   4527         self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
   4528 
   4529     def test_encode_maxlinelen_too_small(self):
   4530         self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
   4531 
   4532     def test_encode(self):
   4533         eq = self.assertEqual
   4534         eq(quoprimime.body_encode(''), '')
   4535         eq(quoprimime.body_encode('hello'), 'hello')
   4536         # Test the binary flag
   4537         eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
   4538         # Test the maxlinelen arg
   4539         eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
   4540 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
   4541  xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
   4542 x xxxx xxxx xxxx xxxx=20""")
   4543         # Test the eol argument
   4544         eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
   4545            """\
   4546 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
   4547  xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
   4548 x xxxx xxxx xxxx xxxx=20""")
   4549         eq(quoprimime.body_encode("""\
   4550 one line
   4551 
   4552 two line"""), """\
   4553 one line
   4554 
   4555 two line""")
   4556 
   4557 
   4558 
   4559 # Test the Charset class
   4560 class TestCharset(unittest.TestCase):
   4561     def tearDown(self):
   4562         from email import charset as CharsetModule
   4563         try:
   4564             del CharsetModule.CHARSETS['fake']
   4565         except KeyError:
   4566             pass
   4567 
   4568     def test_codec_encodeable(self):
   4569         eq = self.assertEqual
   4570         # Make sure us-ascii = no Unicode conversion
   4571         c = Charset('us-ascii')
   4572         eq(c.header_encode('Hello World!'), 'Hello World!')
   4573         # Test 8-bit idempotency with us-ascii
   4574         s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
   4575         self.assertRaises(UnicodeError, c.header_encode, s)
   4576         c = Charset('utf-8')
   4577         eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
   4578 
   4579     def test_body_encode(self):
   4580         eq = self.assertEqual
   4581         # Try a charset with QP body encoding
   4582         c = Charset('iso-8859-1')
   4583         eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
   4584         # Try a charset with Base64 body encoding
   4585         c = Charset('utf-8')
   4586         eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
   4587         # Try a charset with None body encoding
   4588         c = Charset('us-ascii')
   4589         eq('hello world', c.body_encode('hello world'))
   4590         # Try the convert argument, where input codec != output codec
   4591         c = Charset('euc-jp')
   4592         # With apologies to Tokio Kikuchi ;)
   4593         # XXX FIXME
   4594 ##         try:
   4595 ##             eq('\x1b$B5FCO;~IW\x1b(B',
   4596 ##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
   4597 ##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
   4598 ##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
   4599 ##         except LookupError:
   4600 ##             # We probably don't have the Japanese codecs installed
   4601 ##             pass
   4602         # Testing SF bug #625509, which we have to fake, since there are no
   4603         # built-in encodings where the header encoding is QP but the body
   4604         # encoding is not.
   4605         from email import charset as CharsetModule
   4606         CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
   4607         c = Charset('fake')
   4608         eq('hello world', c.body_encode('hello world'))
   4609 
   4610     def test_unicode_charset_name(self):
   4611         charset = Charset('us-ascii')
   4612         self.assertEqual(str(charset), 'us-ascii')
   4613         self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
   4614 
   4615 
   4616 
   4617 # Test multilingual MIME headers.
   4618 class TestHeader(TestEmailBase):
   4619     def test_simple(self):
   4620         eq = self.ndiffAssertEqual
   4621         h = Header('Hello World!')
   4622         eq(h.encode(), 'Hello World!')
   4623         h.append(' Goodbye World!')
   4624         eq(h.encode(), 'Hello World!  Goodbye World!')
   4625 
   4626     def test_simple_surprise(self):
   4627         eq = self.ndiffAssertEqual
   4628         h = Header('Hello World!')
   4629         eq(h.encode(), 'Hello World!')
   4630         h.append('Goodbye World!')
   4631         eq(h.encode(), 'Hello World! Goodbye World!')
   4632 
   4633     def test_header_needs_no_decoding(self):
   4634         h = 'no decoding needed'
   4635         self.assertEqual(decode_header(h), [(h, None)])
   4636 
   4637     def test_long(self):
   4638         h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
   4639                    maxlinelen=76)
   4640         for l in h.encode(splitchars=' ').split('\n '):
   4641             self.assertLessEqual(len(l), 76)
   4642 
   4643     def test_multilingual(self):
   4644         eq = self.ndiffAssertEqual
   4645         g = Charset("iso-8859-1")
   4646         cz = Charset("iso-8859-2")
   4647         utf8 = Charset("utf-8")
   4648         g_head = (b'Die Mieter treten hier ein werden mit einem '
   4649                   b'Foerderband komfortabel den Korridor entlang, '
   4650                   b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
   4651                   b'gegen die rotierenden Klingen bef\xf6rdert. ')
   4652         cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
   4653                    b'd\xf9vtipu.. ')
   4654         utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
   4655                      '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
   4656                      '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
   4657                      '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
   4658                      '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
   4659                      'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
   4660                      'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
   4661                      '\u3044\u307e\u3059\u3002')
   4662         h = Header(g_head, g)
   4663         h.append(cz_head, cz)
   4664         h.append(utf8_head, utf8)
   4665         enc = h.encode(maxlinelen=76)
   4666         eq(enc, """\
   4667 =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
   4668  =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
   4669  =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
   4670  =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
   4671  =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
   4672  =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
   4673  =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
   4674  =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
   4675  =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
   4676  =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
   4677  =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
   4678         decoded = decode_header(enc)
   4679         eq(len(decoded), 3)
   4680         eq(decoded[0], (g_head, 'iso-8859-1'))
   4681         eq(decoded[1], (cz_head, 'iso-8859-2'))
   4682         eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
   4683         ustr = str(h)
   4684         eq(ustr,
   4685            (b'Die Mieter treten hier ein werden mit einem Foerderband '
   4686             b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
   4687             b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
   4688             b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
   4689             b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
   4690             b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
   4691             b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
   4692             b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
   4693             b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
   4694             b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
   4695             b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
   4696             b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
   4697             b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
   4698             b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
   4699             b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
   4700             b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
   4701             ).decode('utf-8'))
   4702         # Test make_header()
   4703         newh = make_header(decode_header(enc))
   4704         eq(newh, h)
   4705 
   4706     def test_empty_header_encode(self):
   4707         h = Header()
   4708         self.assertEqual(h.encode(), '')
   4709 
   4710     def test_header_ctor_default_args(self):
   4711         eq = self.ndiffAssertEqual
   4712         h = Header()
   4713         eq(h, '')
   4714         h.append('foo', Charset('iso-8859-1'))
   4715         eq(h, 'foo')
   4716 
   4717     def test_explicit_maxlinelen(self):
   4718         eq = self.ndiffAssertEqual
   4719         hstr = ('A very long line that must get split to something other '
   4720                 'than at the 76th character boundary to test the non-default '
   4721                 'behavior')
   4722         h = Header(hstr)
   4723         eq(h.encode(), '''\
   4724 A very long line that must get split to something other than at the 76th
   4725  character boundary to test the non-default behavior''')
   4726         eq(str(h), hstr)
   4727         h = Header(hstr, header_name='Subject')
   4728         eq(h.encode(), '''\
   4729 A very long line that must get split to something other than at the
   4730  76th character boundary to test the non-default behavior''')
   4731         eq(str(h), hstr)
   4732         h = Header(hstr, maxlinelen=1024, header_name='Subject')
   4733         eq(h.encode(), hstr)
   4734         eq(str(h), hstr)
   4735 
   4736     def test_quopri_splittable(self):
   4737         eq = self.ndiffAssertEqual
   4738         h = Header(charset='iso-8859-1', maxlinelen=20)
   4739         x = 'xxxx ' * 20
   4740         h.append(x)
   4741         s = h.encode()
   4742         eq(s, """\
   4743 =?iso-8859-1?q?xxx?=
   4744  =?iso-8859-1?q?x_?=
   4745  =?iso-8859-1?q?xx?=
   4746  =?iso-8859-1?q?xx?=
   4747  =?iso-8859-1?q?_x?=
   4748  =?iso-8859-1?q?xx?=
   4749  =?iso-8859-1?q?x_?=
   4750  =?iso-8859-1?q?xx?=
   4751  =?iso-8859-1?q?xx?=
   4752  =?iso-8859-1?q?_x?=
   4753  =?iso-8859-1?q?xx?=
   4754  =?iso-8859-1?q?x_?=
   4755  =?iso-8859-1?q?xx?=
   4756  =?iso-8859-1?q?xx?=
   4757  =?iso-8859-1?q?_x?=
   4758  =?iso-8859-1?q?xx?=
   4759  =?iso-8859-1?q?x_?=
   4760  =?iso-8859-1?q?xx?=
   4761  =?iso-8859-1?q?xx?=
   4762  =?iso-8859-1?q?_x?=
   4763  =?iso-8859-1?q?xx?=
   4764  =?iso-8859-1?q?x_?=
   4765  =?iso-8859-1?q?xx?=
   4766  =?iso-8859-1?q?xx?=
   4767  =?iso-8859-1?q?_x?=
   4768  =?iso-8859-1?q?xx?=
   4769  =?iso-8859-1?q?x_?=
   4770  =?iso-8859-1?q?xx?=
   4771  =?iso-8859-1?q?xx?=
   4772  =?iso-8859-1?q?_x?=
   4773  =?iso-8859-1?q?xx?=
   4774  =?iso-8859-1?q?x_?=
   4775  =?iso-8859-1?q?xx?=
   4776  =?iso-8859-1?q?xx?=
   4777  =?iso-8859-1?q?_x?=
   4778  =?iso-8859-1?q?xx?=
   4779  =?iso-8859-1?q?x_?=
   4780  =?iso-8859-1?q?xx?=
   4781  =?iso-8859-1?q?xx?=
   4782  =?iso-8859-1?q?_x?=
   4783  =?iso-8859-1?q?xx?=
   4784  =?iso-8859-1?q?x_?=
   4785  =?iso-8859-1?q?xx?=
   4786  =?iso-8859-1?q?xx?=
   4787  =?iso-8859-1?q?_x?=
   4788  =?iso-8859-1?q?xx?=
   4789  =?iso-8859-1?q?x_?=
   4790  =?iso-8859-1?q?xx?=
   4791  =?iso-8859-1?q?xx?=
   4792  =?iso-8859-1?q?_?=""")
   4793         eq(x, str(make_header(decode_header(s))))
   4794         h = Header(charset='iso-8859-1', maxlinelen=40)
   4795         h.append('xxxx ' * 20)
   4796         s = h.encode()
   4797         eq(s, """\
   4798 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
   4799  =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
   4800  =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
   4801  =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
   4802  =?iso-8859-1?q?_xxxx_xxxx_?=""")
   4803         eq(x, str(make_header(decode_header(s))))
   4804 
   4805     def test_base64_splittable(self):
   4806         eq = self.ndiffAssertEqual
   4807         h = Header(charset='koi8-r', maxlinelen=20)
   4808         x = 'xxxx ' * 20
   4809         h.append(x)
   4810         s = h.encode()
   4811         eq(s, """\
   4812 =?koi8-r?b?eHh4?=
   4813  =?koi8-r?b?eCB4?=
   4814  =?koi8-r?b?eHh4?=
   4815  =?koi8-r?b?IHh4?=
   4816  =?koi8-r?b?eHgg?=
   4817  =?koi8-r?b?eHh4?=
   4818  =?koi8-r?b?eCB4?=
   4819  =?koi8-r?b?eHh4?=
   4820  =?koi8-r?b?IHh4?=
   4821  =?koi8-r?b?eHgg?=
   4822  =?koi8-r?b?eHh4?=
   4823  =?koi8-r?b?eCB4?=
   4824  =?koi8-r?b?eHh4?=
   4825  =?koi8-r?b?IHh4?=
   4826  =?koi8-r?b?eHgg?=
   4827  =?koi8-r?b?eHh4?=
   4828  =?koi8-r?b?eCB4?=
   4829  =?koi8-r?b?eHh4?=
   4830  =?koi8-r?b?IHh4?=
   4831  =?koi8-r?b?eHgg?=
   4832  =?koi8-r?b?eHh4?=
   4833  =?koi8-r?b?eCB4?=
   4834  =?koi8-r?b?eHh4?=
   4835  =?koi8-r?b?IHh4?=
   4836  =?koi8-r?b?eHgg?=
   4837  =?koi8-r?b?eHh4?=
   4838  =?koi8-r?b?eCB4?=
   4839  =?koi8-r?b?eHh4?=
   4840  =?koi8-r?b?IHh4?=
   4841  =?koi8-r?b?eHgg?=
   4842  =?koi8-r?b?eHh4?=
   4843  =?koi8-r?b?eCB4?=
   4844  =?koi8-r?b?eHh4?=
   4845  =?koi8-r?b?IA==?=""")
   4846         eq(x, str(make_header(decode_header(s))))
   4847         h = Header(charset='koi8-r', maxlinelen=40)
   4848         h.append(x)
   4849         s = h.encode()
   4850         eq(s, """\
   4851 =?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
   4852  =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
   4853  =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
   4854  =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
   4855  =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
   4856  =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
   4857         eq(x, str(make_header(decode_header(s))))
   4858 
   4859     def test_us_ascii_header(self):
   4860         eq = self.assertEqual
   4861         s = 'hello'
   4862         x = decode_header(s)
   4863         eq(x, [('hello', None)])
   4864         h = make_header(x)
   4865         eq(s, h.encode())
   4866 
   4867     def test_string_charset(self):
   4868         eq = self.assertEqual
   4869         h = Header()
   4870         h.append('hello', 'iso-8859-1')
   4871         eq(h, 'hello')
   4872 
   4873 ##    def test_unicode_error(self):
   4874 ##        raises = self.assertRaises
   4875 ##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
   4876 ##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
   4877 ##        h = Header()
   4878 ##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
   4879 ##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
   4880 ##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
   4881 
   4882     def test_utf8_shortest(self):
   4883         eq = self.assertEqual
   4884         h = Header('p\xf6stal', 'utf-8')
   4885         eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
   4886         h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
   4887         eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
   4888 
   4889     def test_bad_8bit_header(self):
   4890         raises = self.assertRaises
   4891         eq = self.assertEqual
   4892         x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
   4893         raises(UnicodeError, Header, x)
   4894         h = Header()
   4895         raises(UnicodeError, h.append, x)
   4896         e = x.decode('utf-8', 'replace')
   4897         eq(str(Header(x, errors='replace')), e)
   4898         h.append(x, errors='replace')
   4899         eq(str(h), e)
   4900 
   4901     def test_escaped_8bit_header(self):
   4902         x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
   4903         e = x.decode('ascii', 'surrogateescape')
   4904         h = Header(e, charset=email.charset.UNKNOWN8BIT)
   4905         self.assertEqual(str(h),
   4906                         'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
   4907         self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
   4908 
   4909     def test_header_handles_binary_unknown8bit(self):
   4910         x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
   4911         h = Header(x, charset=email.charset.UNKNOWN8BIT)
   4912         self.assertEqual(str(h),
   4913                         'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
   4914         self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
   4915 
   4916     def test_make_header_handles_binary_unknown8bit(self):
   4917         x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
   4918         h = Header(x, charset=email.charset.UNKNOWN8BIT)
   4919         h2 = email.header.make_header(email.header.decode_header(h))
   4920         self.assertEqual(str(h2),
   4921                         'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
   4922         self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
   4923 
   4924     def test_modify_returned_list_does_not_change_header(self):
   4925         h = Header('test')
   4926         chunks = email.header.decode_header(h)
   4927         chunks.append(('ascii', 'test2'))
   4928         self.assertEqual(str(h), 'test')
   4929 
   4930     def test_encoded_adjacent_nonencoded(self):
   4931         eq = self.assertEqual
   4932         h = Header()
   4933         h.append('hello', 'iso-8859-1')
   4934         h.append('world')
   4935         s = h.encode()
   4936         eq(s, '=?iso-8859-1?q?hello?= world')
   4937         h = make_header(decode_header(s))
   4938         eq(h.encode(), s)
   4939 
   4940     def test_whitespace_keeper(self):
   4941         eq = self.assertEqual
   4942         s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
   4943         parts = decode_header(s)
   4944         eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
   4945         hdr = make_header(parts)
   4946         eq(hdr.encode(),
   4947            'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
   4948 
   4949     def test_broken_base64_header(self):
   4950         raises = self.assertRaises
   4951         s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
   4952         raises(errors.HeaderParseError, decode_header, s)
   4953 
   4954     def test_shift_jis_charset(self):
   4955         h = Header('', charset='shift_jis')
   4956         self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
   4957 
   4958     def test_flatten_header_with_no_value(self):
   4959         # Issue 11401 (regression from email 4.x)  Note that the space after
   4960         # the header doesn't reflect the input, but this is also the way
   4961         # email 4.x behaved.  At some point it would be nice to fix that.
   4962         msg = email.message_from_string("EmptyHeader:")
   4963         self.assertEqual(str(msg), "EmptyHeader: \n\n")
   4964 
   4965     def test_encode_preserves_leading_ws_on_value(self):
   4966         msg = Message()
   4967         msg['SomeHeader'] = '   value with leading ws'
   4968         self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
   4969 
   4970 
   4971 
   4972 # Test RFC 2231 header parameters (en/de)coding
   4973 class TestRFC2231(TestEmailBase):
   4974 
   4975     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
   4976     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
   4977     def test_get_param(self):
   4978         eq = self.assertEqual
   4979         msg = self._msgobj('msg_29.txt')
   4980         eq(msg.get_param('title'),
   4981            ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
   4982         eq(msg.get_param('title', unquote=False),
   4983            ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
   4984 
   4985     def test_set_param(self):
   4986         eq = self.ndiffAssertEqual
   4987         msg = Message()
   4988         msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
   4989                       charset='us-ascii')
   4990         eq(msg.get_param('title'),
   4991            ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
   4992         msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
   4993                       charset='us-ascii', language='en')
   4994         eq(msg.get_param('title'),
   4995            ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
   4996         msg = self._msgobj('msg_01.txt')
   4997         msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
   4998                       charset='us-ascii', language='en')
   4999         eq(msg.as_string(maxheaderlen=78), """\
   5000 Return-Path: <bbb (at] zzz.org>
   5001 Delivered-To: bbb (at] zzz.org
   5002 Received: by mail.zzz.org (Postfix, from userid 889)
   5003 \tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
   5004 MIME-Version: 1.0
   5005 Content-Transfer-Encoding: 7bit
   5006 Message-ID: <15090.61304.110929.45684 (at] aaa.zzz.org>
   5007 From: bbb (at] ddd.com (John X. Doe)
   5008 To: bbb (at] zzz.org
   5009 Subject: This is a test message
   5010 Date: Fri, 4 May 2001 14:05:44 -0400
   5011 Content-Type: text/plain; charset=us-ascii;
   5012  title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
   5013 
   5014 
   5015 Hi,
   5016 
   5017 Do you like this message?
   5018 
   5019 -Me
   5020 """)
   5021 
   5022     def test_set_param_requote(self):
   5023         msg = Message()
   5024         msg.set_param('title', 'foo')
   5025         self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
   5026         msg.set_param('title', 'bar', requote=False)
   5027         self.assertEqual(msg['content-type'], 'text/plain; title=bar')
   5028         # tspecial is still quoted.
   5029         msg.set_param('title', "(bar)bell", requote=False)
   5030         self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
   5031 
   5032     def test_del_param(self):
   5033         eq = self.ndiffAssertEqual
   5034         msg = self._msgobj('msg_01.txt')
   5035         msg.set_param('foo', 'bar', charset='us-ascii', language='en')
   5036         msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
   5037             charset='us-ascii', language='en')
   5038         msg.del_param('foo', header='Content-Type')
   5039         eq(msg.as_string(maxheaderlen=78), """\
   5040 Return-Path: <bbb (at] zzz.org>
   5041 Delivered-To: bbb (at] zzz.org
   5042 Received: by mail.zzz.org (Postfix, from userid 889)
   5043 \tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
   5044 MIME-Version: 1.0
   5045 Content-Transfer-Encoding: 7bit
   5046 Message-ID: <15090.61304.110929.45684 (at] aaa.zzz.org>
   5047 From: bbb (at] ddd.com (John X. Doe)
   5048 To: bbb (at] zzz.org
   5049 Subject: This is a test message
   5050 Date: Fri, 4 May 2001 14:05:44 -0400
   5051 Content-Type: text/plain; charset="us-ascii";
   5052  title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
   5053 
   5054 
   5055 Hi,
   5056 
   5057 Do you like this message?
   5058 
   5059 -Me
   5060 """)
   5061 
   5062     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
   5063     # I changed the charset name, though, because the one in the file isn't
   5064     # a legal charset name.  Should add a test for an illegal charset.
   5065     def test_rfc2231_get_content_charset(self):
   5066         eq = self.assertEqual
   5067         msg = self._msgobj('msg_32.txt')
   5068         eq(msg.get_content_charset(), 'us-ascii')
   5069 
   5070     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
   5071     def test_rfc2231_parse_rfc_quoting(self):
   5072         m = textwrap.dedent('''\
   5073             Content-Disposition: inline;
   5074             \tfilename*0*=''This%20is%20even%20more%20;
   5075             \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
   5076             \tfilename*2="is it not.pdf"
   5077 
   5078             ''')
   5079         msg = email.message_from_string(m)
   5080         self.assertEqual(msg.get_filename(),
   5081                          'This is even more ***fun*** is it not.pdf')
   5082         self.assertEqual(m, msg.as_string())
   5083 
   5084     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
   5085     def test_rfc2231_parse_extra_quoting(self):
   5086         m = textwrap.dedent('''\
   5087             Content-Disposition: inline;
   5088             \tfilename*0*="''This%20is%20even%20more%20";
   5089             \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5090             \tfilename*2="is it not.pdf"
   5091 
   5092             ''')
   5093         msg = email.message_from_string(m)
   5094         self.assertEqual(msg.get_filename(),
   5095                          'This is even more ***fun*** is it not.pdf')
   5096         self.assertEqual(m, msg.as_string())
   5097 
   5098     # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
   5099     # but new test uses *0* because otherwise lang/charset is not valid.
   5100     # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
   5101     def test_rfc2231_no_language_or_charset(self):
   5102         m = '''\
   5103 Content-Transfer-Encoding: 8bit
   5104 Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
   5105 Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
   5106 
   5107 '''
   5108         msg = email.message_from_string(m)
   5109         param = msg.get_param('NAME')
   5110         self.assertNotIsInstance(param, tuple)
   5111         self.assertEqual(
   5112             param,
   5113             'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
   5114 
   5115     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
   5116     def test_rfc2231_no_language_or_charset_in_filename(self):
   5117         m = '''\
   5118 Content-Disposition: inline;
   5119 \tfilename*0*="''This%20is%20even%20more%20";
   5120 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5121 \tfilename*2="is it not.pdf"
   5122 
   5123 '''
   5124         msg = email.message_from_string(m)
   5125         self.assertEqual(msg.get_filename(),
   5126                          'This is even more ***fun*** is it not.pdf')
   5127 
   5128     # Duplicate of previous test?
   5129     def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
   5130         m = '''\
   5131 Content-Disposition: inline;
   5132 \tfilename*0*="''This%20is%20even%20more%20";
   5133 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5134 \tfilename*2="is it not.pdf"
   5135 
   5136 '''
   5137         msg = email.message_from_string(m)
   5138         self.assertEqual(msg.get_filename(),
   5139                          'This is even more ***fun*** is it not.pdf')
   5140 
   5141     # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
   5142     # but the test below is wrong (the first part should be decoded).
   5143     def test_rfc2231_partly_encoded(self):
   5144         m = '''\
   5145 Content-Disposition: inline;
   5146 \tfilename*0="''This%20is%20even%20more%20";
   5147 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5148 \tfilename*2="is it not.pdf"
   5149 
   5150 '''
   5151         msg = email.message_from_string(m)
   5152         self.assertEqual(
   5153             msg.get_filename(),
   5154             'This%20is%20even%20more%20***fun*** is it not.pdf')
   5155 
   5156     def test_rfc2231_partly_nonencoded(self):
   5157         m = '''\
   5158 Content-Disposition: inline;
   5159 \tfilename*0="This%20is%20even%20more%20";
   5160 \tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
   5161 \tfilename*2="is it not.pdf"
   5162 
   5163 '''
   5164         msg = email.message_from_string(m)
   5165         self.assertEqual(
   5166             msg.get_filename(),
   5167             'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
   5168 
   5169     def test_rfc2231_no_language_or_charset_in_boundary(self):
   5170         m = '''\
   5171 Content-Type: multipart/alternative;
   5172 \tboundary*0*="''This%20is%20even%20more%20";
   5173 \tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5174 \tboundary*2="is it not.pdf"
   5175 
   5176 '''
   5177         msg = email.message_from_string(m)
   5178         self.assertEqual(msg.get_boundary(),
   5179                          'This is even more ***fun*** is it not.pdf')
   5180 
   5181     def test_rfc2231_no_language_or_charset_in_charset(self):
   5182         # This is a nonsensical charset value, but tests the code anyway
   5183         m = '''\
   5184 Content-Type: text/plain;
   5185 \tcharset*0*="This%20is%20even%20more%20";
   5186 \tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5187 \tcharset*2="is it not.pdf"
   5188 
   5189 '''
   5190         msg = email.message_from_string(m)
   5191         self.assertEqual(msg.get_content_charset(),
   5192                          'this is even more ***fun*** is it not.pdf')
   5193 
   5194     # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
   5195     def test_rfc2231_bad_encoding_in_filename(self):
   5196         m = '''\
   5197 Content-Disposition: inline;
   5198 \tfilename*0*="bogus'xx'This%20is%20even%20more%20";
   5199 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5200 \tfilename*2="is it not.pdf"
   5201 
   5202 '''
   5203         msg = email.message_from_string(m)
   5204         self.assertEqual(msg.get_filename(),
   5205                          'This is even more ***fun*** is it not.pdf')
   5206 
   5207     def test_rfc2231_bad_encoding_in_charset(self):
   5208         m = """\
   5209 Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
   5210 
   5211 """
   5212         msg = email.message_from_string(m)
   5213         # This should return None because non-ascii characters in the charset
   5214         # are not allowed.
   5215         self.assertEqual(msg.get_content_charset(), None)
   5216 
   5217     def test_rfc2231_bad_character_in_charset(self):
   5218         m = """\
   5219 Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
   5220 
   5221 """
   5222         msg = email.message_from_string(m)
   5223         # This should return None because non-ascii characters in the charset
   5224         # are not allowed.
   5225         self.assertEqual(msg.get_content_charset(), None)
   5226 
   5227     def test_rfc2231_bad_character_in_filename(self):
   5228         m = '''\
   5229 Content-Disposition: inline;
   5230 \tfilename*0*="ascii'xx'This%20is%20even%20more%20";
   5231 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
   5232 \tfilename*2*="is it not.pdf%E2"
   5233 
   5234 '''
   5235         msg = email.message_from_string(m)
   5236         self.assertEqual(msg.get_filename(),
   5237                          'This is even more ***fun*** is it not.pdf\ufffd')
   5238 
   5239     def test_rfc2231_unknown_encoding(self):
   5240         m = """\
   5241 Content-Transfer-Encoding: 8bit
   5242 Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
   5243 
   5244 """
   5245         msg = email.message_from_string(m)
   5246         self.assertEqual(msg.get_filename(), 'myfile.txt')
   5247 
   5248     def test_rfc2231_single_tick_in_filename_extended(self):
   5249         eq = self.assertEqual
   5250         m = """\
   5251 Content-Type: application/x-foo;
   5252 \tname*0*=\"Frank's\"; name*1*=\" Document\"
   5253 
   5254 """
   5255         msg = email.message_from_string(m)
   5256         charset, language, s = msg.get_param('name')
   5257         eq(charset, None)
   5258         eq(language, None)
   5259         eq(s, "Frank's Document")
   5260 
   5261     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
   5262     def test_rfc2231_single_tick_in_filename(self):
   5263         m = """\
   5264 Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
   5265 
   5266 """
   5267         msg = email.message_from_string(m)
   5268         param = msg.get_param('name')
   5269         self.assertNotIsInstance(param, tuple)
   5270         self.assertEqual(param, "Frank's Document")
   5271 
   5272     def test_rfc2231_missing_tick(self):
   5273         m = '''\
   5274 Content-Disposition: inline;
   5275 \tfilename*0*="'This%20is%20broken";
   5276 '''
   5277         msg = email.message_from_string(m)
   5278         self.assertEqual(
   5279             msg.get_filename(),
   5280             "'This is broken")
   5281 
   5282     def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
   5283         m = '''\
   5284 Content-Disposition: inline;
   5285 \tfilename*0*="'This%20is%E2broken";
   5286 '''
   5287         msg = email.message_from_string(m)
   5288         self.assertEqual(
   5289             msg.get_filename(),
   5290             "'This is\ufffdbroken")
   5291 
   5292     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
   5293     def test_rfc2231_tick_attack_extended(self):
   5294         eq = self.assertEqual
   5295         m = """\
   5296 Content-Type: application/x-foo;
   5297 \tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
   5298 
   5299 """
   5300         msg = email.message_from_string(m)
   5301         charset, language, s = msg.get_param('name')
   5302         eq(charset, 'us-ascii')
   5303         eq(language, 'en-us')
   5304         eq(s, "Frank's Document")
   5305 
   5306     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
   5307     def test_rfc2231_tick_attack(self):
   5308         m = """\
   5309 Content-Type: application/x-foo;
   5310 \tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
   5311 
   5312 """
   5313         msg = email.message_from_string(m)
   5314         param = msg.get_param('name')
   5315         self.assertNotIsInstance(param, tuple)
   5316         self.assertEqual(param, "us-ascii'en-us'Frank's Document")
   5317 
   5318     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
   5319     def test_rfc2231_no_extended_values(self):
   5320         eq = self.assertEqual
   5321         m = """\
   5322 Content-Type: application/x-foo; name=\"Frank's Document\"
   5323 
   5324 """
   5325         msg = email.message_from_string(m)
   5326         eq(msg.get_param('name'), "Frank's Document")
   5327 
   5328     # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
   5329     def test_rfc2231_encoded_then_unencoded_segments(self):
   5330         eq = self.assertEqual
   5331         m = """\
   5332 Content-Type: application/x-foo;
   5333 \tname*0*=\"us-ascii'en-us'My\";
   5334 \tname*1=\" Document\";
   5335 \tname*2*=\" For You\"
   5336 
   5337 """
   5338         msg = email.message_from_string(m)
   5339         charset, language, s = msg.get_param('name')
   5340         eq(charset, 'us-ascii')
   5341         eq(language, 'en-us')
   5342         eq(s, 'My Document For You')
   5343 
   5344     # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
   5345     # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
   5346     def test_rfc2231_unencoded_then_encoded_segments(self):
   5347         eq = self.assertEqual
   5348         m = """\
   5349 Content-Type: application/x-foo;
   5350 \tname*0=\"us-ascii'en-us'My\";
   5351 \tname*1*=\" Document\";
   5352 \tname*2*=\" For You\"
   5353 
   5354 """
   5355         msg = email.message_from_string(m)
   5356         charset, language, s = msg.get_param('name')
   5357         eq(charset, 'us-ascii')
   5358         eq(language, 'en-us')
   5359         eq(s, 'My Document For You')
   5360 
   5361 
   5362 
   5363 # Tests to ensure that signed parts of an email are completely preserved, as
   5364 # required by RFC1847 section 2.1.  Note that these are incomplete, because the
   5365 # email package does not currently always preserve the body.  See issue 1670765.
   5366 class TestSigned(TestEmailBase):
   5367 
   5368     def _msg_and_obj(self, filename):
   5369         with openfile(filename) as fp:
   5370             original = fp.read()
   5371             msg = email.message_from_string(original)
   5372         return original, msg
   5373 
   5374     def _signed_parts_eq(self, original, result):
   5375         # Extract the first mime part of each message
   5376         import re
   5377         repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
   5378         inpart = repart.search(original).group(2)
   5379         outpart = repart.search(result).group(2)
   5380         self.assertEqual(outpart, inpart)
   5381 
   5382     def test_long_headers_as_string(self):
   5383         original, msg = self._msg_and_obj('msg_45.txt')
   5384         result = msg.as_string()
   5385         self._signed_parts_eq(original, result)
   5386 
   5387     def test_long_headers_as_string_maxheaderlen(self):
   5388         original, msg = self._msg_and_obj('msg_45.txt')
   5389         result = msg.as_string(maxheaderlen=60)
   5390         self._signed_parts_eq(original, result)
   5391 
   5392     def test_long_headers_flatten(self):
   5393         original, msg = self._msg_and_obj('msg_45.txt')
   5394         fp = StringIO()
   5395         Generator(fp).flatten(msg)
   5396         result = fp.getvalue()
   5397         self._signed_parts_eq(original, result)
   5398 
   5399 
   5400 
   5401 if __name__ == '__main__':
   5402     unittest.main()
   5403