1 # Copyright (C) 2001-2010 Python Software Foundation 2 # Contact: email-sig (at] python.org 3 # email package unit tests 4 5 import re 6 import time 7 import base64 8 import unittest 9 import textwrap 10 11 from io import StringIO, BytesIO 12 from itertools import chain 13 from random import choice 14 from socket import getfqdn 15 try: 16 from threading import Thread 17 except ImportError: 18 from dummy_threading import Thread 19 20 import email 21 import email.policy 22 23 from email.charset import Charset 24 from email.header import Header, decode_header, make_header 25 from email.parser import Parser, HeaderParser 26 from email.generator import Generator, DecodedGenerator, BytesGenerator 27 from email.message import Message 28 from email.mime.application import MIMEApplication 29 from email.mime.audio import MIMEAudio 30 from email.mime.text import MIMEText 31 from email.mime.image import MIMEImage 32 from email.mime.base import MIMEBase 33 from email.mime.message import MIMEMessage 34 from email.mime.multipart import MIMEMultipart 35 from email.mime.nonmultipart import MIMENonMultipart 36 from email import utils 37 from email import errors 38 from email import encoders 39 from email import iterators 40 from email import base64mime 41 from email import quoprimime 42 43 from test.support import unlink, start_threads 44 from test.test_email import openfile, TestEmailBase 45 46 # These imports are documented to work, but we are testing them using a 47 # different path, so we import them here just to make sure they are importable. 48 from email.parser import FeedParser, BytesFeedParser 49 50 NL = '\n' 51 EMPTYSTRING = '' 52 SPACE = ' ' 53 54 55 # Test various aspects of the Message class's API 56 class TestMessageAPI(TestEmailBase): 57 def test_get_all(self): 58 eq = self.assertEqual 59 msg = self._msgobj('msg_20.txt') 60 eq(msg.get_all('cc'), ['ccc (at] zzz.org', 'ddd (at] zzz.org', 'eee (at] zzz.org']) 61 eq(msg.get_all('xx', 'n/a'), 'n/a') 62 63 def test_getset_charset(self): 64 eq = self.assertEqual 65 msg = Message() 66 eq(msg.get_charset(), None) 67 charset = Charset('iso-8859-1') 68 msg.set_charset(charset) 69 eq(msg['mime-version'], '1.0') 70 eq(msg.get_content_type(), 'text/plain') 71 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 72 eq(msg.get_param('charset'), 'iso-8859-1') 73 eq(msg['content-transfer-encoding'], 'quoted-printable') 74 eq(msg.get_charset().input_charset, 'iso-8859-1') 75 # Remove the charset 76 msg.set_charset(None) 77 eq(msg.get_charset(), None) 78 eq(msg['content-type'], 'text/plain') 79 # Try adding a charset when there's already MIME headers present 80 msg = Message() 81 msg['MIME-Version'] = '2.0' 82 msg['Content-Type'] = 'text/x-weird' 83 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 84 msg.set_charset(charset) 85 eq(msg['mime-version'], '2.0') 86 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 87 eq(msg['content-transfer-encoding'], 'quinted-puntable') 88 89 def test_set_charset_from_string(self): 90 eq = self.assertEqual 91 msg = Message() 92 msg.set_charset('us-ascii') 93 eq(msg.get_charset().input_charset, 'us-ascii') 94 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 95 96 def test_set_payload_with_charset(self): 97 msg = Message() 98 charset = Charset('iso-8859-1') 99 msg.set_payload('This is a string payload', charset) 100 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 101 102 def test_set_payload_with_8bit_data_and_charset(self): 103 data = b'\xd0\x90\xd0\x91\xd0\x92' 104 charset = Charset('utf-8') 105 msg = Message() 106 msg.set_payload(data, charset) 107 self.assertEqual(msg['content-transfer-encoding'], 'base64') 108 self.assertEqual(msg.get_payload(decode=True), data) 109 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 110 111 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 112 data = b'\xd0\x90\xd0\x91\xd0\x92' 113 charset = Charset('utf-8') 114 charset.body_encoding = None # Disable base64 encoding 115 msg = Message() 116 msg.set_payload(data.decode('utf-8'), charset) 117 self.assertEqual(msg['content-transfer-encoding'], '8bit') 118 self.assertEqual(msg.get_payload(decode=True), data) 119 120 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 121 data = b'\xd0\x90\xd0\x91\xd0\x92' 122 charset = Charset('utf-8') 123 charset.body_encoding = None # Disable base64 encoding 124 msg = Message() 125 msg.set_payload(data, charset) 126 self.assertEqual(msg['content-transfer-encoding'], '8bit') 127 self.assertEqual(msg.get_payload(decode=True), data) 128 129 def test_set_payload_to_list(self): 130 msg = Message() 131 msg.set_payload([]) 132 self.assertEqual(msg.get_payload(), []) 133 134 def test_attach_when_payload_is_string(self): 135 msg = Message() 136 msg['Content-Type'] = 'multipart/mixed' 137 msg.set_payload('string payload') 138 sub_msg = MIMEMessage(Message()) 139 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 140 msg.attach, sub_msg) 141 142 def test_get_charsets(self): 143 eq = self.assertEqual 144 145 msg = self._msgobj('msg_08.txt') 146 charsets = msg.get_charsets() 147 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 148 149 msg = self._msgobj('msg_09.txt') 150 charsets = msg.get_charsets('dingbat') 151 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 152 'koi8-r']) 153 154 msg = self._msgobj('msg_12.txt') 155 charsets = msg.get_charsets() 156 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 157 'iso-8859-3', 'us-ascii', 'koi8-r']) 158 159 def test_get_filename(self): 160 eq = self.assertEqual 161 162 msg = self._msgobj('msg_04.txt') 163 filenames = [p.get_filename() for p in msg.get_payload()] 164 eq(filenames, ['msg.txt', 'msg.txt']) 165 166 msg = self._msgobj('msg_07.txt') 167 subpart = msg.get_payload(1) 168 eq(subpart.get_filename(), 'dingusfish.gif') 169 170 def test_get_filename_with_name_parameter(self): 171 eq = self.assertEqual 172 173 msg = self._msgobj('msg_44.txt') 174 filenames = [p.get_filename() for p in msg.get_payload()] 175 eq(filenames, ['msg.txt', 'msg.txt']) 176 177 def test_get_boundary(self): 178 eq = self.assertEqual 179 msg = self._msgobj('msg_07.txt') 180 # No quotes! 181 eq(msg.get_boundary(), 'BOUNDARY') 182 183 def test_set_boundary(self): 184 eq = self.assertEqual 185 # This one has no existing boundary parameter, but the Content-Type: 186 # header appears fifth. 187 msg = self._msgobj('msg_01.txt') 188 msg.set_boundary('BOUNDARY') 189 header, value = msg.items()[4] 190 eq(header.lower(), 'content-type') 191 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 192 # This one has a Content-Type: header, with a boundary, stuck in the 193 # middle of its headers. Make sure the order is preserved; it should 194 # be fifth. 195 msg = self._msgobj('msg_04.txt') 196 msg.set_boundary('BOUNDARY') 197 header, value = msg.items()[4] 198 eq(header.lower(), 'content-type') 199 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 200 # And this one has no Content-Type: header at all. 201 msg = self._msgobj('msg_03.txt') 202 self.assertRaises(errors.HeaderParseError, 203 msg.set_boundary, 'BOUNDARY') 204 205 def test_make_boundary(self): 206 msg = MIMEMultipart('form-data') 207 # Note that when the boundary gets created is an implementation 208 # detail and might change. 209 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 210 # Trigger creation of boundary 211 msg.as_string() 212 self.assertEqual(msg.items()[0][1][:33], 213 'multipart/form-data; boundary="==') 214 # XXX: there ought to be tests of the uniqueness of the boundary, too. 215 216 def test_message_rfc822_only(self): 217 # Issue 7970: message/rfc822 not in multipart parsed by 218 # HeaderParser caused an exception when flattened. 219 with openfile('msg_46.txt') as fp: 220 msgdata = fp.read() 221 parser = HeaderParser() 222 msg = parser.parsestr(msgdata) 223 out = StringIO() 224 gen = Generator(out, True, 0) 225 gen.flatten(msg, False) 226 self.assertEqual(out.getvalue(), msgdata) 227 228 def test_byte_message_rfc822_only(self): 229 # Make sure new bytes header parser also passes this. 230 with openfile('msg_46.txt') as fp: 231 msgdata = fp.read().encode('ascii') 232 parser = email.parser.BytesHeaderParser() 233 msg = parser.parsebytes(msgdata) 234 out = BytesIO() 235 gen = email.generator.BytesGenerator(out) 236 gen.flatten(msg) 237 self.assertEqual(out.getvalue(), msgdata) 238 239 def test_get_decoded_payload(self): 240 eq = self.assertEqual 241 msg = self._msgobj('msg_10.txt') 242 # The outer message is a multipart 243 eq(msg.get_payload(decode=True), None) 244 # Subpart 1 is 7bit encoded 245 eq(msg.get_payload(0).get_payload(decode=True), 246 b'This is a 7bit encoded message.\n') 247 # Subpart 2 is quopri 248 eq(msg.get_payload(1).get_payload(decode=True), 249 b'\xa1This is a Quoted Printable encoded message!\n') 250 # Subpart 3 is base64 251 eq(msg.get_payload(2).get_payload(decode=True), 252 b'This is a Base64 encoded message.') 253 # Subpart 4 is base64 with a trailing newline, which 254 # used to be stripped (issue 7143). 255 eq(msg.get_payload(3).get_payload(decode=True), 256 b'This is a Base64 encoded message.\n') 257 # Subpart 5 has no Content-Transfer-Encoding: header. 258 eq(msg.get_payload(4).get_payload(decode=True), 259 b'This has no Content-Transfer-Encoding: header.\n') 260 261 def test_get_decoded_uu_payload(self): 262 eq = self.assertEqual 263 msg = Message() 264 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 265 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 266 msg['content-transfer-encoding'] = cte 267 eq(msg.get_payload(decode=True), b'hello world') 268 # Now try some bogus data 269 msg.set_payload('foo') 270 eq(msg.get_payload(decode=True), b'foo') 271 272 def test_get_payload_n_raises_on_non_multipart(self): 273 msg = Message() 274 self.assertRaises(TypeError, msg.get_payload, 1) 275 276 def test_decoded_generator(self): 277 eq = self.assertEqual 278 msg = self._msgobj('msg_07.txt') 279 with openfile('msg_17.txt') as fp: 280 text = fp.read() 281 s = StringIO() 282 g = DecodedGenerator(s) 283 g.flatten(msg) 284 eq(s.getvalue(), text) 285 286 def test__contains__(self): 287 msg = Message() 288 msg['From'] = 'Me' 289 msg['to'] = 'You' 290 # Check for case insensitivity 291 self.assertIn('from', msg) 292 self.assertIn('From', msg) 293 self.assertIn('FROM', msg) 294 self.assertIn('to', msg) 295 self.assertIn('To', msg) 296 self.assertIn('TO', msg) 297 298 def test_as_string(self): 299 msg = self._msgobj('msg_01.txt') 300 with openfile('msg_01.txt') as fp: 301 text = fp.read() 302 self.assertEqual(text, str(msg)) 303 fullrepr = msg.as_string(unixfrom=True) 304 lines = fullrepr.split('\n') 305 self.assertTrue(lines[0].startswith('From ')) 306 self.assertEqual(text, NL.join(lines[1:])) 307 308 def test_as_string_policy(self): 309 msg = self._msgobj('msg_01.txt') 310 newpolicy = msg.policy.clone(linesep='\r\n') 311 fullrepr = msg.as_string(policy=newpolicy) 312 s = StringIO() 313 g = Generator(s, policy=newpolicy) 314 g.flatten(msg) 315 self.assertEqual(fullrepr, s.getvalue()) 316 317 def test_as_bytes(self): 318 msg = self._msgobj('msg_01.txt') 319 with openfile('msg_01.txt') as fp: 320 data = fp.read().encode('ascii') 321 self.assertEqual(data, bytes(msg)) 322 fullrepr = msg.as_bytes(unixfrom=True) 323 lines = fullrepr.split(b'\n') 324 self.assertTrue(lines[0].startswith(b'From ')) 325 self.assertEqual(data, b'\n'.join(lines[1:])) 326 327 def test_as_bytes_policy(self): 328 msg = self._msgobj('msg_01.txt') 329 newpolicy = msg.policy.clone(linesep='\r\n') 330 fullrepr = msg.as_bytes(policy=newpolicy) 331 s = BytesIO() 332 g = BytesGenerator(s,policy=newpolicy) 333 g.flatten(msg) 334 self.assertEqual(fullrepr, s.getvalue()) 335 336 # test_headerregistry.TestContentTypeHeader.bad_params 337 def test_bad_param(self): 338 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 339 self.assertEqual(msg.get_param('baz'), '') 340 341 def test_missing_filename(self): 342 msg = email.message_from_string("From: foo\n") 343 self.assertEqual(msg.get_filename(), None) 344 345 def test_bogus_filename(self): 346 msg = email.message_from_string( 347 "Content-Disposition: blarg; filename\n") 348 self.assertEqual(msg.get_filename(), '') 349 350 def test_missing_boundary(self): 351 msg = email.message_from_string("From: foo\n") 352 self.assertEqual(msg.get_boundary(), None) 353 354 def test_get_params(self): 355 eq = self.assertEqual 356 msg = email.message_from_string( 357 'X-Header: foo=one; bar=two; baz=three\n') 358 eq(msg.get_params(header='x-header'), 359 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 360 msg = email.message_from_string( 361 'X-Header: foo; bar=one; baz=two\n') 362 eq(msg.get_params(header='x-header'), 363 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 364 eq(msg.get_params(), None) 365 msg = email.message_from_string( 366 'X-Header: foo; bar="one"; baz=two\n') 367 eq(msg.get_params(header='x-header'), 368 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 369 370 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 371 def test_get_param_liberal(self): 372 msg = Message() 373 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 374 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 375 376 def test_get_param(self): 377 eq = self.assertEqual 378 msg = email.message_from_string( 379 "X-Header: foo=one; bar=two; baz=three\n") 380 eq(msg.get_param('bar', header='x-header'), 'two') 381 eq(msg.get_param('quuz', header='x-header'), None) 382 eq(msg.get_param('quuz'), None) 383 msg = email.message_from_string( 384 'X-Header: foo; bar="one"; baz=two\n') 385 eq(msg.get_param('foo', header='x-header'), '') 386 eq(msg.get_param('bar', header='x-header'), 'one') 387 eq(msg.get_param('baz', header='x-header'), 'two') 388 # XXX: We are not RFC-2045 compliant! We cannot parse: 389 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 390 # msg.get_param("weird") 391 # yet. 392 393 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 394 def test_get_param_funky_continuation_lines(self): 395 msg = self._msgobj('msg_22.txt') 396 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 397 398 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 399 def test_get_param_with_semis_in_quotes(self): 400 msg = email.message_from_string( 401 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 402 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 403 self.assertEqual(msg.get_param('name', unquote=False), 404 '"Jim&&Jill"') 405 406 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 407 def test_get_param_with_quotes(self): 408 msg = email.message_from_string( 409 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 410 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 411 msg = email.message_from_string( 412 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 413 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 414 415 def test_field_containment(self): 416 msg = email.message_from_string('Header: exists') 417 self.assertIn('header', msg) 418 self.assertIn('Header', msg) 419 self.assertIn('HEADER', msg) 420 self.assertNotIn('headerx', msg) 421 422 def test_set_param(self): 423 eq = self.assertEqual 424 msg = Message() 425 msg.set_param('charset', 'iso-2022-jp') 426 eq(msg.get_param('charset'), 'iso-2022-jp') 427 msg.set_param('importance', 'high value') 428 eq(msg.get_param('importance'), 'high value') 429 eq(msg.get_param('importance', unquote=False), '"high value"') 430 eq(msg.get_params(), [('text/plain', ''), 431 ('charset', 'iso-2022-jp'), 432 ('importance', 'high value')]) 433 eq(msg.get_params(unquote=False), [('text/plain', ''), 434 ('charset', '"iso-2022-jp"'), 435 ('importance', '"high value"')]) 436 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 437 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 438 439 def test_del_param(self): 440 eq = self.assertEqual 441 msg = self._msgobj('msg_05.txt') 442 eq(msg.get_params(), 443 [('multipart/report', ''), ('report-type', 'delivery-status'), 444 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 445 old_val = msg.get_param("report-type") 446 msg.del_param("report-type") 447 eq(msg.get_params(), 448 [('multipart/report', ''), 449 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 450 msg.set_param("report-type", old_val) 451 eq(msg.get_params(), 452 [('multipart/report', ''), 453 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 454 ('report-type', old_val)]) 455 456 def test_del_param_on_other_header(self): 457 msg = Message() 458 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 459 msg.del_param('filename', 'content-disposition') 460 self.assertEqual(msg['content-disposition'], 'attachment') 461 462 def test_del_param_on_nonexistent_header(self): 463 msg = Message() 464 # Deleting param on empty msg should not raise exception. 465 msg.del_param('filename', 'content-disposition') 466 467 def test_del_nonexistent_param(self): 468 msg = Message() 469 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 470 existing_header = msg['Content-Type'] 471 msg.del_param('foobar', header='Content-Type') 472 self.assertEqual(msg['Content-Type'], existing_header) 473 474 def test_set_type(self): 475 eq = self.assertEqual 476 msg = Message() 477 self.assertRaises(ValueError, msg.set_type, 'text') 478 msg.set_type('text/plain') 479 eq(msg['content-type'], 'text/plain') 480 msg.set_param('charset', 'us-ascii') 481 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 482 msg.set_type('text/html') 483 eq(msg['content-type'], 'text/html; charset="us-ascii"') 484 485 def test_set_type_on_other_header(self): 486 msg = Message() 487 msg['X-Content-Type'] = 'text/plain' 488 msg.set_type('application/octet-stream', 'X-Content-Type') 489 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 490 491 def test_get_content_type_missing(self): 492 msg = Message() 493 self.assertEqual(msg.get_content_type(), 'text/plain') 494 495 def test_get_content_type_missing_with_default_type(self): 496 msg = Message() 497 msg.set_default_type('message/rfc822') 498 self.assertEqual(msg.get_content_type(), 'message/rfc822') 499 500 def test_get_content_type_from_message_implicit(self): 501 msg = self._msgobj('msg_30.txt') 502 self.assertEqual(msg.get_payload(0).get_content_type(), 503 'message/rfc822') 504 505 def test_get_content_type_from_message_explicit(self): 506 msg = self._msgobj('msg_28.txt') 507 self.assertEqual(msg.get_payload(0).get_content_type(), 508 'message/rfc822') 509 510 def test_get_content_type_from_message_text_plain_implicit(self): 511 msg = self._msgobj('msg_03.txt') 512 self.assertEqual(msg.get_content_type(), 'text/plain') 513 514 def test_get_content_type_from_message_text_plain_explicit(self): 515 msg = self._msgobj('msg_01.txt') 516 self.assertEqual(msg.get_content_type(), 'text/plain') 517 518 def test_get_content_maintype_missing(self): 519 msg = Message() 520 self.assertEqual(msg.get_content_maintype(), 'text') 521 522 def test_get_content_maintype_missing_with_default_type(self): 523 msg = Message() 524 msg.set_default_type('message/rfc822') 525 self.assertEqual(msg.get_content_maintype(), 'message') 526 527 def test_get_content_maintype_from_message_implicit(self): 528 msg = self._msgobj('msg_30.txt') 529 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 530 531 def test_get_content_maintype_from_message_explicit(self): 532 msg = self._msgobj('msg_28.txt') 533 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 534 535 def test_get_content_maintype_from_message_text_plain_implicit(self): 536 msg = self._msgobj('msg_03.txt') 537 self.assertEqual(msg.get_content_maintype(), 'text') 538 539 def test_get_content_maintype_from_message_text_plain_explicit(self): 540 msg = self._msgobj('msg_01.txt') 541 self.assertEqual(msg.get_content_maintype(), 'text') 542 543 def test_get_content_subtype_missing(self): 544 msg = Message() 545 self.assertEqual(msg.get_content_subtype(), 'plain') 546 547 def test_get_content_subtype_missing_with_default_type(self): 548 msg = Message() 549 msg.set_default_type('message/rfc822') 550 self.assertEqual(msg.get_content_subtype(), 'rfc822') 551 552 def test_get_content_subtype_from_message_implicit(self): 553 msg = self._msgobj('msg_30.txt') 554 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 555 556 def test_get_content_subtype_from_message_explicit(self): 557 msg = self._msgobj('msg_28.txt') 558 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 559 560 def test_get_content_subtype_from_message_text_plain_implicit(self): 561 msg = self._msgobj('msg_03.txt') 562 self.assertEqual(msg.get_content_subtype(), 'plain') 563 564 def test_get_content_subtype_from_message_text_plain_explicit(self): 565 msg = self._msgobj('msg_01.txt') 566 self.assertEqual(msg.get_content_subtype(), 'plain') 567 568 def test_get_content_maintype_error(self): 569 msg = Message() 570 msg['Content-Type'] = 'no-slash-in-this-string' 571 self.assertEqual(msg.get_content_maintype(), 'text') 572 573 def test_get_content_subtype_error(self): 574 msg = Message() 575 msg['Content-Type'] = 'no-slash-in-this-string' 576 self.assertEqual(msg.get_content_subtype(), 'plain') 577 578 def test_replace_header(self): 579 eq = self.assertEqual 580 msg = Message() 581 msg.add_header('First', 'One') 582 msg.add_header('Second', 'Two') 583 msg.add_header('Third', 'Three') 584 eq(msg.keys(), ['First', 'Second', 'Third']) 585 eq(msg.values(), ['One', 'Two', 'Three']) 586 msg.replace_header('Second', 'Twenty') 587 eq(msg.keys(), ['First', 'Second', 'Third']) 588 eq(msg.values(), ['One', 'Twenty', 'Three']) 589 msg.add_header('First', 'Eleven') 590 msg.replace_header('First', 'One Hundred') 591 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 592 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 593 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 594 595 def test_get_content_disposition(self): 596 msg = Message() 597 self.assertIsNone(msg.get_content_disposition()) 598 msg.add_header('Content-Disposition', 'attachment', 599 filename='random.avi') 600 self.assertEqual(msg.get_content_disposition(), 'attachment') 601 msg.replace_header('Content-Disposition', 'inline') 602 self.assertEqual(msg.get_content_disposition(), 'inline') 603 msg.replace_header('Content-Disposition', 'InlinE') 604 self.assertEqual(msg.get_content_disposition(), 'inline') 605 606 # test_defect_handling:test_invalid_chars_in_base64_payload 607 def test_broken_base64_payload(self): 608 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 609 msg = Message() 610 msg['content-type'] = 'audio/x-midi' 611 msg['content-transfer-encoding'] = 'base64' 612 msg.set_payload(x) 613 self.assertEqual(msg.get_payload(decode=True), 614 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 615 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 616 self.assertIsInstance(msg.defects[0], 617 errors.InvalidBase64CharactersDefect) 618 619 def test_broken_unicode_payload(self): 620 # This test improves coverage but is not a compliance test. 621 # The behavior in this situation is currently undefined by the API. 622 x = 'this is a br\xf6ken thing to do' 623 msg = Message() 624 msg['content-type'] = 'text/plain' 625 msg['content-transfer-encoding'] = '8bit' 626 msg.set_payload(x) 627 self.assertEqual(msg.get_payload(decode=True), 628 bytes(x, 'raw-unicode-escape')) 629 630 def test_questionable_bytes_payload(self): 631 # This test improves coverage but is not a compliance test, 632 # since it involves poking inside the black box. 633 x = 'this is a qustionable thing to do'.encode('utf-8') 634 msg = Message() 635 msg['content-type'] = 'text/plain; charset="utf-8"' 636 msg['content-transfer-encoding'] = '8bit' 637 msg._payload = x 638 self.assertEqual(msg.get_payload(decode=True), x) 639 640 # Issue 1078919 641 def test_ascii_add_header(self): 642 msg = Message() 643 msg.add_header('Content-Disposition', 'attachment', 644 filename='bud.gif') 645 self.assertEqual('attachment; filename="bud.gif"', 646 msg['Content-Disposition']) 647 648 def test_noascii_add_header(self): 649 msg = Message() 650 msg.add_header('Content-Disposition', 'attachment', 651 filename="Fuballer.ppt") 652 self.assertEqual( 653 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 654 msg['Content-Disposition']) 655 656 def test_nonascii_add_header_via_triple(self): 657 msg = Message() 658 msg.add_header('Content-Disposition', 'attachment', 659 filename=('iso-8859-1', '', 'Fuballer.ppt')) 660 self.assertEqual( 661 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 662 msg['Content-Disposition']) 663 664 def test_ascii_add_header_with_tspecial(self): 665 msg = Message() 666 msg.add_header('Content-Disposition', 'attachment', 667 filename="windows [filename].ppt") 668 self.assertEqual( 669 'attachment; filename="windows [filename].ppt"', 670 msg['Content-Disposition']) 671 672 def test_nonascii_add_header_with_tspecial(self): 673 msg = Message() 674 msg.add_header('Content-Disposition', 'attachment', 675 filename="Fuballer [filename].ppt") 676 self.assertEqual( 677 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 678 msg['Content-Disposition']) 679 680 def test_binary_quopri_payload(self): 681 for charset in ('latin-1', 'ascii'): 682 msg = Message() 683 msg['content-type'] = 'text/plain; charset=%s' % charset 684 msg['content-transfer-encoding'] = 'quoted-printable' 685 msg.set_payload(b'foo=e6=96=87bar') 686 self.assertEqual( 687 msg.get_payload(decode=True), 688 b'foo\xe6\x96\x87bar', 689 'get_payload returns wrong result with charset %s.' % charset) 690 691 def test_binary_base64_payload(self): 692 for charset in ('latin-1', 'ascii'): 693 msg = Message() 694 msg['content-type'] = 'text/plain; charset=%s' % charset 695 msg['content-transfer-encoding'] = 'base64' 696 msg.set_payload(b'Zm9v5paHYmFy') 697 self.assertEqual( 698 msg.get_payload(decode=True), 699 b'foo\xe6\x96\x87bar', 700 'get_payload returns wrong result with charset %s.' % charset) 701 702 def test_binary_uuencode_payload(self): 703 for charset in ('latin-1', 'ascii'): 704 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 705 msg = Message() 706 msg['content-type'] = 'text/plain; charset=%s' % charset 707 msg['content-transfer-encoding'] = encoding 708 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 709 self.assertEqual( 710 msg.get_payload(decode=True), 711 b'foo\xe6\x96\x87bar', 712 str(('get_payload returns wrong result ', 713 'with charset {0} and encoding {1}.')).\ 714 format(charset, encoding)) 715 716 def test_add_header_with_name_only_param(self): 717 msg = Message() 718 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 719 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 720 721 def test_add_header_with_no_value(self): 722 msg = Message() 723 msg.add_header('X-Status', None) 724 self.assertEqual('', msg['X-Status']) 725 726 # Issue 5871: reject an attempt to embed a header inside a header value 727 # (header injection attack). 728 def test_embedded_header_via_Header_rejected(self): 729 msg = Message() 730 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 731 self.assertRaises(errors.HeaderParseError, msg.as_string) 732 733 def test_embedded_header_via_string_rejected(self): 734 msg = Message() 735 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 736 self.assertRaises(errors.HeaderParseError, msg.as_string) 737 738 def test_unicode_header_defaults_to_utf8_encoding(self): 739 # Issue 14291 740 m = MIMEText('abc\n') 741 m['Subject'] = ' test' 742 self.assertEqual(str(m),textwrap.dedent("""\ 743 Content-Type: text/plain; charset="us-ascii" 744 MIME-Version: 1.0 745 Content-Transfer-Encoding: 7bit 746 Subject: =?utf-8?q?=C3=89_test?= 747 748 abc 749 """)) 750 751 def test_unicode_body_defaults_to_utf8_encoding(self): 752 # Issue 14291 753 m = MIMEText(' testabc\n') 754 self.assertEqual(str(m),textwrap.dedent("""\ 755 Content-Type: text/plain; charset="utf-8" 756 MIME-Version: 1.0 757 Content-Transfer-Encoding: base64 758 759 w4kgdGVzdGFiYwo= 760 """)) 761 762 763 # Test the email.encoders module 764 class TestEncoders(unittest.TestCase): 765 766 def test_EncodersEncode_base64(self): 767 with openfile('PyBanner048.gif', 'rb') as fp: 768 bindata = fp.read() 769 mimed = email.mime.image.MIMEImage(bindata) 770 base64ed = mimed.get_payload() 771 # the transfer-encoded body lines should all be <=76 characters 772 lines = base64ed.split('\n') 773 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 774 775 def test_encode_empty_payload(self): 776 eq = self.assertEqual 777 msg = Message() 778 msg.set_charset('us-ascii') 779 eq(msg['content-transfer-encoding'], '7bit') 780 781 def test_default_cte(self): 782 eq = self.assertEqual 783 # 7bit data and the default us-ascii _charset 784 msg = MIMEText('hello world') 785 eq(msg['content-transfer-encoding'], '7bit') 786 # Similar, but with 8bit data 787 msg = MIMEText('hello \xf8 world') 788 eq(msg['content-transfer-encoding'], 'base64') 789 # And now with a different charset 790 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 791 eq(msg['content-transfer-encoding'], 'quoted-printable') 792 793 def test_encode7or8bit(self): 794 # Make sure a charset whose input character set is 8bit but 795 # whose output character set is 7bit gets a transfer-encoding 796 # of 7bit. 797 eq = self.assertEqual 798 msg = MIMEText('\n', _charset='euc-jp') 799 eq(msg['content-transfer-encoding'], '7bit') 800 eq(msg.as_string(), textwrap.dedent("""\ 801 MIME-Version: 1.0 802 Content-Type: text/plain; charset="iso-2022-jp" 803 Content-Transfer-Encoding: 7bit 804 805 \x1b$BJ8\x1b(B 806 """)) 807 808 def test_qp_encode_latin1(self): 809 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 810 self.assertEqual(str(msg), textwrap.dedent("""\ 811 MIME-Version: 1.0 812 Content-Type: text/text; charset="iso-8859-1" 813 Content-Transfer-Encoding: quoted-printable 814 815 =E1=F6 816 """)) 817 818 def test_qp_encode_non_latin1(self): 819 # Issue 16948 820 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 821 self.assertEqual(str(msg), textwrap.dedent("""\ 822 MIME-Version: 1.0 823 Content-Type: text/text; charset="iso-8859-2" 824 Content-Transfer-Encoding: quoted-printable 825 826 =BF 827 """)) 828 829 830 # Test long header wrapping 831 class TestLongHeaders(TestEmailBase): 832 833 maxDiff = None 834 835 def test_split_long_continuation(self): 836 eq = self.ndiffAssertEqual 837 msg = email.message_from_string("""\ 838 Subject: bug demonstration 839 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 840 \tmore text 841 842 test 843 """) 844 sfp = StringIO() 845 g = Generator(sfp) 846 g.flatten(msg) 847 eq(sfp.getvalue(), """\ 848 Subject: bug demonstration 849 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 850 \tmore text 851 852 test 853 """) 854 855 def test_another_long_almost_unsplittable_header(self): 856 eq = self.ndiffAssertEqual 857 hstr = """\ 858 bug demonstration 859 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 860 \tmore text""" 861 h = Header(hstr, continuation_ws='\t') 862 eq(h.encode(), """\ 863 bug demonstration 864 \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 865 \tmore text""") 866 h = Header(hstr.replace('\t', ' ')) 867 eq(h.encode(), """\ 868 bug demonstration 869 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 870 more text""") 871 872 def test_long_nonstring(self): 873 eq = self.ndiffAssertEqual 874 g = Charset("iso-8859-1") 875 cz = Charset("iso-8859-2") 876 utf8 = Charset("utf-8") 877 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 878 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 879 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 880 b'bef\xf6rdert. ') 881 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 882 b'd\xf9vtipu.. ') 883 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 884 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 885 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 886 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 887 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 888 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 889 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 890 '\u3044\u307e\u3059\u3002') 891 h = Header(g_head, g, header_name='Subject') 892 h.append(cz_head, cz) 893 h.append(utf8_head, utf8) 894 msg = Message() 895 msg['Subject'] = h 896 sfp = StringIO() 897 g = Generator(sfp) 898 g.flatten(msg) 899 eq(sfp.getvalue(), """\ 900 Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 901 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 902 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 903 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 904 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 905 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 906 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 907 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 908 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 909 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 910 =?utf-8?b?44CC?= 911 912 """) 913 eq(h.encode(maxlinelen=76), """\ 914 =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 915 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 916 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 917 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 918 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 919 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 920 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 921 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 922 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 923 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 924 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 925 926 def test_long_header_encode(self): 927 eq = self.ndiffAssertEqual 928 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 929 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 930 header_name='X-Foobar-Spoink-Defrobnit') 931 eq(h.encode(), '''\ 932 wasnipoop; giraffes="very-long-necked-animals"; 933 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 934 935 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 936 eq = self.ndiffAssertEqual 937 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 938 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 939 header_name='X-Foobar-Spoink-Defrobnit', 940 continuation_ws='\t') 941 eq(h.encode(), '''\ 942 wasnipoop; giraffes="very-long-necked-animals"; 943 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 944 945 def test_long_header_encode_with_tab_continuation(self): 946 eq = self.ndiffAssertEqual 947 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 948 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 949 header_name='X-Foobar-Spoink-Defrobnit', 950 continuation_ws='\t') 951 eq(h.encode(), '''\ 952 wasnipoop; giraffes="very-long-necked-animals"; 953 \tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 954 955 def test_header_encode_with_different_output_charset(self): 956 h = Header('', 'euc-jp') 957 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 958 959 def test_long_header_encode_with_different_output_charset(self): 960 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 961 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 962 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 963 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 964 res = """\ 965 =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 966 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 967 self.assertEqual(h.encode(), res) 968 969 def test_header_splitter(self): 970 eq = self.ndiffAssertEqual 971 msg = MIMEText('') 972 # It'd be great if we could use add_header() here, but that doesn't 973 # guarantee an order of the parameters. 974 msg['X-Foobar-Spoink-Defrobnit'] = ( 975 'wasnipoop; giraffes="very-long-necked-animals"; ' 976 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 977 sfp = StringIO() 978 g = Generator(sfp) 979 g.flatten(msg) 980 eq(sfp.getvalue(), '''\ 981 Content-Type: text/plain; charset="us-ascii" 982 MIME-Version: 1.0 983 Content-Transfer-Encoding: 7bit 984 X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 985 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 986 987 ''') 988 989 def test_no_semis_header_splitter(self): 990 eq = self.ndiffAssertEqual 991 msg = Message() 992 msg['From'] = 'test (at] dom.ain' 993 msg['References'] = SPACE.join('<%d (at] dom.ain>' % i for i in range(10)) 994 msg.set_payload('Test') 995 sfp = StringIO() 996 g = Generator(sfp) 997 g.flatten(msg) 998 eq(sfp.getvalue(), """\ 999 From: test (at] dom.ain 1000 References: <0 (at] dom.ain> <1 (at] dom.ain> <2 (at] dom.ain> <3 (at] dom.ain> <4 (at] dom.ain> 1001 <5 (at] dom.ain> <6 (at] dom.ain> <7 (at] dom.ain> <8 (at] dom.ain> <9 (at] dom.ain> 1002 1003 Test""") 1004 1005 def test_last_split_chunk_does_not_fit(self): 1006 eq = self.ndiffAssertEqual 1007 h = Header('Subject: the first part of this is short, but_the_second' 1008 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1009 '_all_by_itself') 1010 eq(h.encode(), """\ 1011 Subject: the first part of this is short, 1012 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1013 1014 def test_splittable_leading_char_followed_by_overlong_unsplitable(self): 1015 eq = self.ndiffAssertEqual 1016 h = Header(', but_the_second' 1017 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1018 '_all_by_itself') 1019 eq(h.encode(), """\ 1020 , 1021 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1022 1023 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): 1024 eq = self.ndiffAssertEqual 1025 h = Header(', , but_the_second' 1026 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1027 '_all_by_itself') 1028 eq(h.encode(), """\ 1029 , , 1030 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1031 1032 def test_trailing_splitable_on_overlong_unsplitable(self): 1033 eq = self.ndiffAssertEqual 1034 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1035 'be_on_a_line_all_by_itself;') 1036 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1037 "be_on_a_line_all_by_itself;") 1038 1039 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): 1040 eq = self.ndiffAssertEqual 1041 h = Header('; ' 1042 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1043 'be_on_a_line_all_by_itself; ') 1044 eq(h.encode(), """\ 1045 ; 1046 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1047 1048 def test_long_header_with_multiple_sequential_split_chars(self): 1049 eq = self.ndiffAssertEqual 1050 h = Header('This is a long line that has two whitespaces in a row. ' 1051 'This used to cause truncation of the header when folded') 1052 eq(h.encode(), """\ 1053 This is a long line that has two whitespaces in a row. This used to cause 1054 truncation of the header when folded""") 1055 1056 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1057 eq = self.ndiffAssertEqual 1058 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1059 'they;arenotlegal;fold,points') 1060 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1061 "arenotlegal;fold,points") 1062 1063 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1064 eq = self.ndiffAssertEqual 1065 h = Header('this is a test where we need to have more than one line ' 1066 'before; our final line that is just too big to fit;; ' 1067 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1068 'be_on_a_line_all_by_itself;') 1069 eq(h.encode(), """\ 1070 this is a test where we need to have more than one line before; 1071 our final line that is just too big to fit;; 1072 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1073 1074 def test_overlong_last_part_followed_by_split_point(self): 1075 eq = self.ndiffAssertEqual 1076 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1077 'be_on_a_line_all_by_itself ') 1078 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1079 "should_be_on_a_line_all_by_itself ") 1080 1081 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1082 eq = self.ndiffAssertEqual 1083 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1084 'before_our_final_line_; ; ' 1085 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1086 'be_on_a_line_all_by_itself; ') 1087 eq(h.encode(), """\ 1088 this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1089 ; 1090 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1091 1092 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1093 eq = self.ndiffAssertEqual 1094 h = Header('this is a test where we need to have more than one line ' 1095 'before our final line; ; ' 1096 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1097 'be_on_a_line_all_by_itself; ') 1098 eq(h.encode(), """\ 1099 this is a test where we need to have more than one line before our final line; 1100 ; 1101 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1102 1103 def test_long_header_with_whitespace_runs(self): 1104 eq = self.ndiffAssertEqual 1105 msg = Message() 1106 msg['From'] = 'test (at] dom.ain' 1107 msg['References'] = SPACE.join(['<foo (at] dom.ain> '] * 10) 1108 msg.set_payload('Test') 1109 sfp = StringIO() 1110 g = Generator(sfp) 1111 g.flatten(msg) 1112 eq(sfp.getvalue(), """\ 1113 From: test (at] dom.ain 1114 References: <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> 1115 <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> 1116 <foo (at] dom.ain> <foo (at] dom.ain>\x20\x20 1117 1118 Test""") 1119 1120 def test_long_run_with_semi_header_splitter(self): 1121 eq = self.ndiffAssertEqual 1122 msg = Message() 1123 msg['From'] = 'test (at] dom.ain' 1124 msg['References'] = SPACE.join(['<foo (at] dom.ain>'] * 10) + '; abc' 1125 msg.set_payload('Test') 1126 sfp = StringIO() 1127 g = Generator(sfp) 1128 g.flatten(msg) 1129 eq(sfp.getvalue(), """\ 1130 From: test (at] dom.ain 1131 References: <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> 1132 <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> <foo (at] dom.ain> 1133 <foo (at] dom.ain>; abc 1134 1135 Test""") 1136 1137 def test_splitter_split_on_punctuation_only_if_fws(self): 1138 eq = self.ndiffAssertEqual 1139 msg = Message() 1140 msg['From'] = 'test (at] dom.ain' 1141 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1142 'they;arenotlegal;fold,points') 1143 msg.set_payload('Test') 1144 sfp = StringIO() 1145 g = Generator(sfp) 1146 g.flatten(msg) 1147 # XXX the space after the header should not be there. 1148 eq(sfp.getvalue(), """\ 1149 From: test (at] dom.ain 1150 References:\x20 1151 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1152 1153 Test""") 1154 1155 def test_no_split_long_header(self): 1156 eq = self.ndiffAssertEqual 1157 hstr = 'References: ' + 'x' * 80 1158 h = Header(hstr) 1159 # These come on two lines because Headers are really field value 1160 # classes and don't really know about their field names. 1161 eq(h.encode(), """\ 1162 References: 1163 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1164 h = Header('x' * 80) 1165 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1166 1167 def test_splitting_multiple_long_lines(self): 1168 eq = self.ndiffAssertEqual 1169 hstr = """\ 1170 from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1171 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1172 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin (at] babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1173 """ 1174 h = Header(hstr, continuation_ws='\t') 1175 eq(h.encode(), """\ 1176 from babylon.socal-raves.org (localhost [127.0.0.1]); 1177 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1178 for <mailman-admin (at] babylon.socal-raves.org>; 1179 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1180 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1181 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1182 for <mailman-admin (at] babylon.socal-raves.org>; 1183 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1184 \tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1185 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1186 for <mailman-admin (at] babylon.socal-raves.org>; 1187 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1188 1189 def test_splitting_first_line_only_is_long(self): 1190 eq = self.ndiffAssertEqual 1191 hstr = """\ 1192 from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1193 \tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1194 \tid 17k4h5-00034i-00 1195 \tfor test (at] mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1196 h = Header(hstr, maxlinelen=78, header_name='Received', 1197 continuation_ws='\t') 1198 eq(h.encode(), """\ 1199 from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1200 helo=cthulhu.gerg.ca) 1201 \tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1202 \tid 17k4h5-00034i-00 1203 \tfor test (at] mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1204 1205 def test_long_8bit_header(self): 1206 eq = self.ndiffAssertEqual 1207 msg = Message() 1208 h = Header('Britische Regierung gibt', 'iso-8859-1', 1209 header_name='Subject') 1210 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1211 eq(h.encode(maxlinelen=76), """\ 1212 =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1213 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1214 msg['Subject'] = h 1215 eq(msg.as_string(maxheaderlen=76), """\ 1216 Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1217 =?iso-8859-1?q?hore-Windkraftprojekte?= 1218 1219 """) 1220 eq(msg.as_string(maxheaderlen=0), """\ 1221 Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1222 1223 """) 1224 1225 def test_long_8bit_header_no_charset(self): 1226 eq = self.ndiffAssertEqual 1227 msg = Message() 1228 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1229 'f\xfcr Offshore-Windkraftprojekte ' 1230 '<a-very-long-address (at] example.com>') 1231 msg['Reply-To'] = header_string 1232 eq(msg.as_string(maxheaderlen=78), """\ 1233 Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1234 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1235 1236 """) 1237 msg = Message() 1238 msg['Reply-To'] = Header(header_string, 1239 header_name='Reply-To') 1240 eq(msg.as_string(maxheaderlen=78), """\ 1241 Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1242 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1243 1244 """) 1245 1246 def test_long_to_header(self): 1247 eq = self.ndiffAssertEqual 1248 to = ('"Someone Test #A" <someone (at] eecs.umich.edu>,' 1249 '<someone (at] eecs.umich.edu>, ' 1250 '"Someone Test #B" <someone (at] umich.edu>, ' 1251 '"Someone Test #C" <someone (at] eecs.umich.edu>, ' 1252 '"Someone Test #D" <someone (at] eecs.umich.edu>') 1253 msg = Message() 1254 msg['To'] = to 1255 eq(msg.as_string(maxheaderlen=78), '''\ 1256 To: "Someone Test #A" <someone (at] eecs.umich.edu>,<someone (at] eecs.umich.edu>, 1257 "Someone Test #B" <someone (at] umich.edu>, 1258 "Someone Test #C" <someone (at] eecs.umich.edu>, 1259 "Someone Test #D" <someone (at] eecs.umich.edu> 1260 1261 ''') 1262 1263 def test_long_line_after_append(self): 1264 eq = self.ndiffAssertEqual 1265 s = 'This is an example of string which has almost the limit of header length.' 1266 h = Header(s) 1267 h.append('Add another line.') 1268 eq(h.encode(maxlinelen=76), """\ 1269 This is an example of string which has almost the limit of header length. 1270 Add another line.""") 1271 1272 def test_shorter_line_with_append(self): 1273 eq = self.ndiffAssertEqual 1274 s = 'This is a shorter line.' 1275 h = Header(s) 1276 h.append('Add another sentence. (Surprise?)') 1277 eq(h.encode(), 1278 'This is a shorter line. Add another sentence. (Surprise?)') 1279 1280 def test_long_field_name(self): 1281 eq = self.ndiffAssertEqual 1282 fn = 'X-Very-Very-Very-Long-Header-Name' 1283 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1284 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1285 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1286 'bef\xf6rdert. ') 1287 h = Header(gs, 'iso-8859-1', header_name=fn) 1288 # BAW: this seems broken because the first line is too long 1289 eq(h.encode(maxlinelen=76), """\ 1290 =?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1291 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1292 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1293 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1294 1295 def test_long_received_header(self): 1296 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1297 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1298 'Wed, 05 Mar 2003 18:10:18 -0700') 1299 msg = Message() 1300 msg['Received-1'] = Header(h, continuation_ws='\t') 1301 msg['Received-2'] = h 1302 # This should be splitting on spaces not semicolons. 1303 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1304 Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1305 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1306 Wed, 05 Mar 2003 18:10:18 -0700 1307 Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1308 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1309 Wed, 05 Mar 2003 18:10:18 -0700 1310 1311 """) 1312 1313 def test_string_headerinst_eq(self): 1314 h = ('<15975.17901.207240.414604 (at] sgigritzmann1.mathematik.' 1315 'tu-muenchen.de> (David Bremner\'s message of ' 1316 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1317 msg = Message() 1318 msg['Received-1'] = Header(h, header_name='Received-1', 1319 continuation_ws='\t') 1320 msg['Received-2'] = h 1321 # XXX The space after the ':' should not be there. 1322 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1323 Received-1:\x20 1324 <15975.17901.207240.414604 (at] sgigritzmann1.mathematik.tu-muenchen.de> (David 1325 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1326 Received-2:\x20 1327 <15975.17901.207240.414604 (at] sgigritzmann1.mathematik.tu-muenchen.de> (David 1328 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1329 1330 """) 1331 1332 def test_long_unbreakable_lines_with_continuation(self): 1333 eq = self.ndiffAssertEqual 1334 msg = Message() 1335 t = """\ 1336 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1337 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1338 msg['Face-1'] = t 1339 msg['Face-2'] = Header(t, header_name='Face-2') 1340 msg['Face-3'] = ' ' + t 1341 # XXX This splitting is all wrong. It the first value line should be 1342 # snug against the field name or the space after the header not there. 1343 eq(msg.as_string(maxheaderlen=78), """\ 1344 Face-1:\x20 1345 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1346 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1347 Face-2:\x20 1348 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1349 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1350 Face-3:\x20 1351 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1352 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1353 1354 """) 1355 1356 def test_another_long_multiline_header(self): 1357 eq = self.ndiffAssertEqual 1358 m = ('Received: from siimage.com ' 1359 '([172.25.1.3]) by zima.siliconimage.com with ' 1360 'Microsoft SMTPSVC(5.0.2195.4905); ' 1361 'Wed, 16 Oct 2002 07:41:11 -0700') 1362 msg = email.message_from_string(m) 1363 eq(msg.as_string(maxheaderlen=78), '''\ 1364 Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1365 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1366 1367 ''') 1368 1369 def test_long_lines_with_different_header(self): 1370 eq = self.ndiffAssertEqual 1371 h = ('List-Unsubscribe: ' 1372 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1373 ' <mailto:spamassassin-talk-request (at] lists.sourceforge.net' 1374 '?subject=unsubscribe>') 1375 msg = Message() 1376 msg['List'] = h 1377 msg['List'] = Header(h, header_name='List') 1378 eq(msg.as_string(maxheaderlen=78), """\ 1379 List: List-Unsubscribe: 1380 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1381 <mailto:spamassassin-talk-request (at] lists.sourceforge.net?subject=unsubscribe> 1382 List: List-Unsubscribe: 1383 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1384 <mailto:spamassassin-talk-request (at] lists.sourceforge.net?subject=unsubscribe> 1385 1386 """) 1387 1388 def test_long_rfc2047_header_with_embedded_fws(self): 1389 h = Header(textwrap.dedent("""\ 1390 We're going to pretend this header is in a non-ascii character set 1391 \tto see if line wrapping with encoded words and embedded 1392 folding white space works"""), 1393 charset='utf-8', 1394 header_name='Test') 1395 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1396 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1397 =?utf-8?q?cter_set?= 1398 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1399 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1400 1401 1402 1403 # Test mangling of "From " lines in the body of a message 1404 class TestFromMangling(unittest.TestCase): 1405 def setUp(self): 1406 self.msg = Message() 1407 self.msg['From'] = 'aaa (at] bbb.org' 1408 self.msg.set_payload("""\ 1409 From the desk of A.A.A.: 1410 Blah blah blah 1411 """) 1412 1413 def test_mangled_from(self): 1414 s = StringIO() 1415 g = Generator(s, mangle_from_=True) 1416 g.flatten(self.msg) 1417 self.assertEqual(s.getvalue(), """\ 1418 From: aaa (at] bbb.org 1419 1420 >From the desk of A.A.A.: 1421 Blah blah blah 1422 """) 1423 1424 def test_dont_mangle_from(self): 1425 s = StringIO() 1426 g = Generator(s, mangle_from_=False) 1427 g.flatten(self.msg) 1428 self.assertEqual(s.getvalue(), """\ 1429 From: aaa (at] bbb.org 1430 1431 From the desk of A.A.A.: 1432 Blah blah blah 1433 """) 1434 1435 def test_mangle_from_in_preamble_and_epilog(self): 1436 s = StringIO() 1437 g = Generator(s, mangle_from_=True) 1438 msg = email.message_from_string(textwrap.dedent("""\ 1439 From: foo (at] bar.com 1440 Mime-Version: 1.0 1441 Content-Type: multipart/mixed; boundary=XXX 1442 1443 From somewhere unknown 1444 1445 --XXX 1446 Content-Type: text/plain 1447 1448 foo 1449 1450 --XXX-- 1451 1452 From somewhere unknowable 1453 """)) 1454 g.flatten(msg) 1455 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1456 if x.startswith('>From ')]), 2) 1457 1458 def test_mangled_from_with_bad_bytes(self): 1459 source = textwrap.dedent("""\ 1460 Content-Type: text/plain; charset="utf-8" 1461 MIME-Version: 1.0 1462 Content-Transfer-Encoding: 8bit 1463 From: aaa (at] bbb.org 1464 1465 """).encode('utf-8') 1466 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1467 b = BytesIO() 1468 g = BytesGenerator(b, mangle_from_=True) 1469 g.flatten(msg) 1470 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1471 1472 1473 # Test the basic MIMEAudio class 1474 class TestMIMEAudio(unittest.TestCase): 1475 def setUp(self): 1476 with openfile('audiotest.au', 'rb') as fp: 1477 self._audiodata = fp.read() 1478 self._au = MIMEAudio(self._audiodata) 1479 1480 def test_guess_minor_type(self): 1481 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1482 1483 def test_encoding(self): 1484 payload = self._au.get_payload() 1485 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1486 self._audiodata) 1487 1488 def test_checkSetMinor(self): 1489 au = MIMEAudio(self._audiodata, 'fish') 1490 self.assertEqual(au.get_content_type(), 'audio/fish') 1491 1492 def test_add_header(self): 1493 eq = self.assertEqual 1494 self._au.add_header('Content-Disposition', 'attachment', 1495 filename='audiotest.au') 1496 eq(self._au['content-disposition'], 1497 'attachment; filename="audiotest.au"') 1498 eq(self._au.get_params(header='content-disposition'), 1499 [('attachment', ''), ('filename', 'audiotest.au')]) 1500 eq(self._au.get_param('filename', header='content-disposition'), 1501 'audiotest.au') 1502 missing = [] 1503 eq(self._au.get_param('attachment', header='content-disposition'), '') 1504 self.assertIs(self._au.get_param('foo', failobj=missing, 1505 header='content-disposition'), missing) 1506 # Try some missing stuff 1507 self.assertIs(self._au.get_param('foobar', missing), missing) 1508 self.assertIs(self._au.get_param('attachment', missing, 1509 header='foobar'), missing) 1510 1511 1512 1513 # Test the basic MIMEImage class 1514 class TestMIMEImage(unittest.TestCase): 1515 def setUp(self): 1516 with openfile('PyBanner048.gif', 'rb') as fp: 1517 self._imgdata = fp.read() 1518 self._im = MIMEImage(self._imgdata) 1519 1520 def test_guess_minor_type(self): 1521 self.assertEqual(self._im.get_content_type(), 'image/gif') 1522 1523 def test_encoding(self): 1524 payload = self._im.get_payload() 1525 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1526 self._imgdata) 1527 1528 def test_checkSetMinor(self): 1529 im = MIMEImage(self._imgdata, 'fish') 1530 self.assertEqual(im.get_content_type(), 'image/fish') 1531 1532 def test_add_header(self): 1533 eq = self.assertEqual 1534 self._im.add_header('Content-Disposition', 'attachment', 1535 filename='dingusfish.gif') 1536 eq(self._im['content-disposition'], 1537 'attachment; filename="dingusfish.gif"') 1538 eq(self._im.get_params(header='content-disposition'), 1539 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1540 eq(self._im.get_param('filename', header='content-disposition'), 1541 'dingusfish.gif') 1542 missing = [] 1543 eq(self._im.get_param('attachment', header='content-disposition'), '') 1544 self.assertIs(self._im.get_param('foo', failobj=missing, 1545 header='content-disposition'), missing) 1546 # Try some missing stuff 1547 self.assertIs(self._im.get_param('foobar', missing), missing) 1548 self.assertIs(self._im.get_param('attachment', missing, 1549 header='foobar'), missing) 1550 1551 1552 1553 # Test the basic MIMEApplication class 1554 class TestMIMEApplication(unittest.TestCase): 1555 def test_headers(self): 1556 eq = self.assertEqual 1557 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1558 eq(msg.get_content_type(), 'application/octet-stream') 1559 eq(msg['content-transfer-encoding'], 'base64') 1560 1561 def test_body(self): 1562 eq = self.assertEqual 1563 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1564 msg = MIMEApplication(bytesdata) 1565 # whitespace in the cte encoded block is RFC-irrelevant. 1566 eq(msg.get_payload().strip(), '+vv8/f7/') 1567 eq(msg.get_payload(decode=True), bytesdata) 1568 1569 def test_binary_body_with_encode_7or8bit(self): 1570 # Issue 17171. 1571 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1572 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1573 # Treated as a string, this will be invalid code points. 1574 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1575 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1576 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1577 s = BytesIO() 1578 g = BytesGenerator(s) 1579 g.flatten(msg) 1580 wireform = s.getvalue() 1581 msg2 = email.message_from_bytes(wireform) 1582 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1583 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1584 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1585 1586 def test_binary_body_with_encode_noop(self): 1587 # Issue 16564: This does not produce an RFC valid message, since to be 1588 # valid it should have a CTE of binary. But the below works in 1589 # Python2, and is documented as working this way. 1590 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1591 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1592 # Treated as a string, this will be invalid code points. 1593 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1594 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1595 s = BytesIO() 1596 g = BytesGenerator(s) 1597 g.flatten(msg) 1598 wireform = s.getvalue() 1599 msg2 = email.message_from_bytes(wireform) 1600 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1601 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1602 1603 def test_binary_body_with_unicode_linend_encode_noop(self): 1604 # Issue 19003: This is a variation on #16564. 1605 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1606 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1607 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1608 s = BytesIO() 1609 g = BytesGenerator(s) 1610 g.flatten(msg) 1611 wireform = s.getvalue() 1612 msg2 = email.message_from_bytes(wireform) 1613 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1614 1615 def test_binary_body_with_encode_quopri(self): 1616 # Issue 14360. 1617 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1618 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1619 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1620 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1621 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1622 s = BytesIO() 1623 g = BytesGenerator(s) 1624 g.flatten(msg) 1625 wireform = s.getvalue() 1626 msg2 = email.message_from_bytes(wireform) 1627 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1628 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1629 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1630 1631 def test_binary_body_with_encode_base64(self): 1632 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1633 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1634 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1635 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1636 s = BytesIO() 1637 g = BytesGenerator(s) 1638 g.flatten(msg) 1639 wireform = s.getvalue() 1640 msg2 = email.message_from_bytes(wireform) 1641 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1642 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1643 1644 1645 # Test the basic MIMEText class 1646 class TestMIMEText(unittest.TestCase): 1647 def setUp(self): 1648 self._msg = MIMEText('hello there') 1649 1650 def test_types(self): 1651 eq = self.assertEqual 1652 eq(self._msg.get_content_type(), 'text/plain') 1653 eq(self._msg.get_param('charset'), 'us-ascii') 1654 missing = [] 1655 self.assertIs(self._msg.get_param('foobar', missing), missing) 1656 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1657 missing) 1658 1659 def test_payload(self): 1660 self.assertEqual(self._msg.get_payload(), 'hello there') 1661 self.assertFalse(self._msg.is_multipart()) 1662 1663 def test_charset(self): 1664 eq = self.assertEqual 1665 msg = MIMEText('hello there', _charset='us-ascii') 1666 eq(msg.get_charset().input_charset, 'us-ascii') 1667 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1668 # Also accept a Charset instance 1669 charset = Charset('utf-8') 1670 charset.body_encoding = None 1671 msg = MIMEText('hello there', _charset=charset) 1672 eq(msg.get_charset().input_charset, 'utf-8') 1673 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1674 eq(msg.get_payload(), 'hello there') 1675 1676 def test_7bit_input(self): 1677 eq = self.assertEqual 1678 msg = MIMEText('hello there', _charset='us-ascii') 1679 eq(msg.get_charset().input_charset, 'us-ascii') 1680 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1681 1682 def test_7bit_input_no_charset(self): 1683 eq = self.assertEqual 1684 msg = MIMEText('hello there') 1685 eq(msg.get_charset(), 'us-ascii') 1686 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1687 self.assertIn('hello there', msg.as_string()) 1688 1689 def test_utf8_input(self): 1690 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1691 eq = self.assertEqual 1692 msg = MIMEText(teststr, _charset='utf-8') 1693 eq(msg.get_charset().output_charset, 'utf-8') 1694 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1695 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1696 1697 @unittest.skip("can't fix because of backward compat in email5, " 1698 "will fix in email6") 1699 def test_utf8_input_no_charset(self): 1700 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1701 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1702 1703 1704 1705 # Test complicated multipart/* messages 1706 class TestMultipart(TestEmailBase): 1707 def setUp(self): 1708 with openfile('PyBanner048.gif', 'rb') as fp: 1709 data = fp.read() 1710 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1711 image = MIMEImage(data, name='dingusfish.gif') 1712 image.add_header('content-disposition', 'attachment', 1713 filename='dingusfish.gif') 1714 intro = MIMEText('''\ 1715 Hi there, 1716 1717 This is the dingus fish. 1718 ''') 1719 container.attach(intro) 1720 container.attach(image) 1721 container['From'] = 'Barry <barry (at] digicool.com>' 1722 container['To'] = 'Dingus Lovers <cravindogs (at] cravindogs.com>' 1723 container['Subject'] = 'Here is your dingus fish' 1724 1725 now = 987809702.54848599 1726 timetuple = time.localtime(now) 1727 if timetuple[-1] == 0: 1728 tzsecs = time.timezone 1729 else: 1730 tzsecs = time.altzone 1731 if tzsecs > 0: 1732 sign = '-' 1733 else: 1734 sign = '+' 1735 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1736 container['Date'] = time.strftime( 1737 '%a, %d %b %Y %H:%M:%S', 1738 time.localtime(now)) + tzoffset 1739 self._msg = container 1740 self._im = image 1741 self._txt = intro 1742 1743 def test_hierarchy(self): 1744 # convenience 1745 eq = self.assertEqual 1746 raises = self.assertRaises 1747 # tests 1748 m = self._msg 1749 self.assertTrue(m.is_multipart()) 1750 eq(m.get_content_type(), 'multipart/mixed') 1751 eq(len(m.get_payload()), 2) 1752 raises(IndexError, m.get_payload, 2) 1753 m0 = m.get_payload(0) 1754 m1 = m.get_payload(1) 1755 self.assertIs(m0, self._txt) 1756 self.assertIs(m1, self._im) 1757 eq(m.get_payload(), [m0, m1]) 1758 self.assertFalse(m0.is_multipart()) 1759 self.assertFalse(m1.is_multipart()) 1760 1761 def test_empty_multipart_idempotent(self): 1762 text = """\ 1763 Content-Type: multipart/mixed; boundary="BOUNDARY" 1764 MIME-Version: 1.0 1765 Subject: A subject 1766 To: aperson (at] dom.ain 1767 From: bperson (at] dom.ain 1768 1769 1770 --BOUNDARY 1771 1772 1773 --BOUNDARY-- 1774 """ 1775 msg = Parser().parsestr(text) 1776 self.ndiffAssertEqual(text, msg.as_string()) 1777 1778 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1779 outer = MIMEBase('multipart', 'mixed') 1780 outer['Subject'] = 'A subject' 1781 outer['To'] = 'aperson (at] dom.ain' 1782 outer['From'] = 'bperson (at] dom.ain' 1783 outer.set_boundary('BOUNDARY') 1784 self.ndiffAssertEqual(outer.as_string(), '''\ 1785 Content-Type: multipart/mixed; boundary="BOUNDARY" 1786 MIME-Version: 1.0 1787 Subject: A subject 1788 To: aperson (at] dom.ain 1789 From: bperson (at] dom.ain 1790 1791 --BOUNDARY 1792 1793 --BOUNDARY-- 1794 ''') 1795 1796 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1797 outer = MIMEBase('multipart', 'mixed') 1798 outer['Subject'] = 'A subject' 1799 outer['To'] = 'aperson (at] dom.ain' 1800 outer['From'] = 'bperson (at] dom.ain' 1801 outer.preamble = '' 1802 outer.epilogue = '' 1803 outer.set_boundary('BOUNDARY') 1804 self.ndiffAssertEqual(outer.as_string(), '''\ 1805 Content-Type: multipart/mixed; boundary="BOUNDARY" 1806 MIME-Version: 1.0 1807 Subject: A subject 1808 To: aperson (at] dom.ain 1809 From: bperson (at] dom.ain 1810 1811 1812 --BOUNDARY 1813 1814 --BOUNDARY-- 1815 ''') 1816 1817 def test_one_part_in_a_multipart(self): 1818 eq = self.ndiffAssertEqual 1819 outer = MIMEBase('multipart', 'mixed') 1820 outer['Subject'] = 'A subject' 1821 outer['To'] = 'aperson (at] dom.ain' 1822 outer['From'] = 'bperson (at] dom.ain' 1823 outer.set_boundary('BOUNDARY') 1824 msg = MIMEText('hello world') 1825 outer.attach(msg) 1826 eq(outer.as_string(), '''\ 1827 Content-Type: multipart/mixed; boundary="BOUNDARY" 1828 MIME-Version: 1.0 1829 Subject: A subject 1830 To: aperson (at] dom.ain 1831 From: bperson (at] dom.ain 1832 1833 --BOUNDARY 1834 Content-Type: text/plain; charset="us-ascii" 1835 MIME-Version: 1.0 1836 Content-Transfer-Encoding: 7bit 1837 1838 hello world 1839 --BOUNDARY-- 1840 ''') 1841 1842 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1843 eq = self.ndiffAssertEqual 1844 outer = MIMEBase('multipart', 'mixed') 1845 outer['Subject'] = 'A subject' 1846 outer['To'] = 'aperson (at] dom.ain' 1847 outer['From'] = 'bperson (at] dom.ain' 1848 outer.preamble = '' 1849 msg = MIMEText('hello world') 1850 outer.attach(msg) 1851 outer.set_boundary('BOUNDARY') 1852 eq(outer.as_string(), '''\ 1853 Content-Type: multipart/mixed; boundary="BOUNDARY" 1854 MIME-Version: 1.0 1855 Subject: A subject 1856 To: aperson (at] dom.ain 1857 From: bperson (at] dom.ain 1858 1859 1860 --BOUNDARY 1861 Content-Type: text/plain; charset="us-ascii" 1862 MIME-Version: 1.0 1863 Content-Transfer-Encoding: 7bit 1864 1865 hello world 1866 --BOUNDARY-- 1867 ''') 1868 1869 1870 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1871 eq = self.ndiffAssertEqual 1872 outer = MIMEBase('multipart', 'mixed') 1873 outer['Subject'] = 'A subject' 1874 outer['To'] = 'aperson (at] dom.ain' 1875 outer['From'] = 'bperson (at] dom.ain' 1876 outer.preamble = None 1877 msg = MIMEText('hello world') 1878 outer.attach(msg) 1879 outer.set_boundary('BOUNDARY') 1880 eq(outer.as_string(), '''\ 1881 Content-Type: multipart/mixed; boundary="BOUNDARY" 1882 MIME-Version: 1.0 1883 Subject: A subject 1884 To: aperson (at] dom.ain 1885 From: bperson (at] dom.ain 1886 1887 --BOUNDARY 1888 Content-Type: text/plain; charset="us-ascii" 1889 MIME-Version: 1.0 1890 Content-Transfer-Encoding: 7bit 1891 1892 hello world 1893 --BOUNDARY-- 1894 ''') 1895 1896 1897 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1898 eq = self.ndiffAssertEqual 1899 outer = MIMEBase('multipart', 'mixed') 1900 outer['Subject'] = 'A subject' 1901 outer['To'] = 'aperson (at] dom.ain' 1902 outer['From'] = 'bperson (at] dom.ain' 1903 outer.epilogue = None 1904 msg = MIMEText('hello world') 1905 outer.attach(msg) 1906 outer.set_boundary('BOUNDARY') 1907 eq(outer.as_string(), '''\ 1908 Content-Type: multipart/mixed; boundary="BOUNDARY" 1909 MIME-Version: 1.0 1910 Subject: A subject 1911 To: aperson (at] dom.ain 1912 From: bperson (at] dom.ain 1913 1914 --BOUNDARY 1915 Content-Type: text/plain; charset="us-ascii" 1916 MIME-Version: 1.0 1917 Content-Transfer-Encoding: 7bit 1918 1919 hello world 1920 --BOUNDARY-- 1921 ''') 1922 1923 1924 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1925 eq = self.ndiffAssertEqual 1926 outer = MIMEBase('multipart', 'mixed') 1927 outer['Subject'] = 'A subject' 1928 outer['To'] = 'aperson (at] dom.ain' 1929 outer['From'] = 'bperson (at] dom.ain' 1930 outer.epilogue = '' 1931 msg = MIMEText('hello world') 1932 outer.attach(msg) 1933 outer.set_boundary('BOUNDARY') 1934 eq(outer.as_string(), '''\ 1935 Content-Type: multipart/mixed; boundary="BOUNDARY" 1936 MIME-Version: 1.0 1937 Subject: A subject 1938 To: aperson (at] dom.ain 1939 From: bperson (at] dom.ain 1940 1941 --BOUNDARY 1942 Content-Type: text/plain; charset="us-ascii" 1943 MIME-Version: 1.0 1944 Content-Transfer-Encoding: 7bit 1945 1946 hello world 1947 --BOUNDARY-- 1948 ''') 1949 1950 1951 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1952 eq = self.ndiffAssertEqual 1953 outer = MIMEBase('multipart', 'mixed') 1954 outer['Subject'] = 'A subject' 1955 outer['To'] = 'aperson (at] dom.ain' 1956 outer['From'] = 'bperson (at] dom.ain' 1957 outer.epilogue = '\n' 1958 msg = MIMEText('hello world') 1959 outer.attach(msg) 1960 outer.set_boundary('BOUNDARY') 1961 eq(outer.as_string(), '''\ 1962 Content-Type: multipart/mixed; boundary="BOUNDARY" 1963 MIME-Version: 1.0 1964 Subject: A subject 1965 To: aperson (at] dom.ain 1966 From: bperson (at] dom.ain 1967 1968 --BOUNDARY 1969 Content-Type: text/plain; charset="us-ascii" 1970 MIME-Version: 1.0 1971 Content-Transfer-Encoding: 7bit 1972 1973 hello world 1974 --BOUNDARY-- 1975 1976 ''') 1977 1978 def test_message_external_body(self): 1979 eq = self.assertEqual 1980 msg = self._msgobj('msg_36.txt') 1981 eq(len(msg.get_payload()), 2) 1982 msg1 = msg.get_payload(1) 1983 eq(msg1.get_content_type(), 'multipart/alternative') 1984 eq(len(msg1.get_payload()), 2) 1985 for subpart in msg1.get_payload(): 1986 eq(subpart.get_content_type(), 'message/external-body') 1987 eq(len(subpart.get_payload()), 1) 1988 subsubpart = subpart.get_payload(0) 1989 eq(subsubpart.get_content_type(), 'text/plain') 1990 1991 def test_double_boundary(self): 1992 # msg_37.txt is a multipart that contains two dash-boundary's in a 1993 # row. Our interpretation of RFC 2046 calls for ignoring the second 1994 # and subsequent boundaries. 1995 msg = self._msgobj('msg_37.txt') 1996 self.assertEqual(len(msg.get_payload()), 3) 1997 1998 def test_nested_inner_contains_outer_boundary(self): 1999 eq = self.ndiffAssertEqual 2000 # msg_38.txt has an inner part that contains outer boundaries. My 2001 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2002 # these are illegal and should be interpreted as unterminated inner 2003 # parts. 2004 msg = self._msgobj('msg_38.txt') 2005 sfp = StringIO() 2006 iterators._structure(msg, sfp) 2007 eq(sfp.getvalue(), """\ 2008 multipart/mixed 2009 multipart/mixed 2010 multipart/alternative 2011 text/plain 2012 text/plain 2013 text/plain 2014 text/plain 2015 """) 2016 2017 def test_nested_with_same_boundary(self): 2018 eq = self.ndiffAssertEqual 2019 # msg 39.txt is similarly evil in that it's got inner parts that use 2020 # the same boundary as outer parts. Again, I believe the way this is 2021 # parsed is closest to the spirit of RFC 2046 2022 msg = self._msgobj('msg_39.txt') 2023 sfp = StringIO() 2024 iterators._structure(msg, sfp) 2025 eq(sfp.getvalue(), """\ 2026 multipart/mixed 2027 multipart/mixed 2028 multipart/alternative 2029 application/octet-stream 2030 application/octet-stream 2031 text/plain 2032 """) 2033 2034 def test_boundary_in_non_multipart(self): 2035 msg = self._msgobj('msg_40.txt') 2036 self.assertEqual(msg.as_string(), '''\ 2037 MIME-Version: 1.0 2038 Content-Type: text/html; boundary="--961284236552522269" 2039 2040 ----961284236552522269 2041 Content-Type: text/html; 2042 Content-Transfer-Encoding: 7Bit 2043 2044 <html></html> 2045 2046 ----961284236552522269-- 2047 ''') 2048 2049 def test_boundary_with_leading_space(self): 2050 eq = self.assertEqual 2051 msg = email.message_from_string('''\ 2052 MIME-Version: 1.0 2053 Content-Type: multipart/mixed; boundary=" XXXX" 2054 2055 -- XXXX 2056 Content-Type: text/plain 2057 2058 2059 -- XXXX 2060 Content-Type: text/plain 2061 2062 -- XXXX-- 2063 ''') 2064 self.assertTrue(msg.is_multipart()) 2065 eq(msg.get_boundary(), ' XXXX') 2066 eq(len(msg.get_payload()), 2) 2067 2068 def test_boundary_without_trailing_newline(self): 2069 m = Parser().parsestr("""\ 2070 Content-Type: multipart/mixed; boundary="===============0012394164==" 2071 MIME-Version: 1.0 2072 2073 --===============0012394164== 2074 Content-Type: image/file1.jpg 2075 MIME-Version: 1.0 2076 Content-Transfer-Encoding: base64 2077 2078 YXNkZg== 2079 --===============0012394164==--""") 2080 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2081 2082 def test_mimebase_default_policy(self): 2083 m = MIMEBase('multipart', 'mixed') 2084 self.assertIs(m.policy, email.policy.compat32) 2085 2086 def test_mimebase_custom_policy(self): 2087 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2088 self.assertIs(m.policy, email.policy.default) 2089 2090 # Test some badly formatted messages 2091 class TestNonConformant(TestEmailBase): 2092 2093 def test_parse_missing_minor_type(self): 2094 eq = self.assertEqual 2095 msg = self._msgobj('msg_14.txt') 2096 eq(msg.get_content_type(), 'text/plain') 2097 eq(msg.get_content_maintype(), 'text') 2098 eq(msg.get_content_subtype(), 'plain') 2099 2100 # test_defect_handling 2101 def test_same_boundary_inner_outer(self): 2102 msg = self._msgobj('msg_15.txt') 2103 # XXX We can probably eventually do better 2104 inner = msg.get_payload(0) 2105 self.assertTrue(hasattr(inner, 'defects')) 2106 self.assertEqual(len(inner.defects), 1) 2107 self.assertIsInstance(inner.defects[0], 2108 errors.StartBoundaryNotFoundDefect) 2109 2110 # test_defect_handling 2111 def test_multipart_no_boundary(self): 2112 msg = self._msgobj('msg_25.txt') 2113 self.assertIsInstance(msg.get_payload(), str) 2114 self.assertEqual(len(msg.defects), 2) 2115 self.assertIsInstance(msg.defects[0], 2116 errors.NoBoundaryInMultipartDefect) 2117 self.assertIsInstance(msg.defects[1], 2118 errors.MultipartInvariantViolationDefect) 2119 2120 multipart_msg = textwrap.dedent("""\ 2121 Date: Wed, 14 Nov 2007 12:56:23 GMT 2122 From: foo (at] bar.invalid 2123 To: foo (at] bar.invalid 2124 Subject: Content-Transfer-Encoding: base64 and multipart 2125 MIME-Version: 1.0 2126 Content-Type: multipart/mixed; 2127 boundary="===============3344438784458119861=="{} 2128 2129 --===============3344438784458119861== 2130 Content-Type: text/plain 2131 2132 Test message 2133 2134 --===============3344438784458119861== 2135 Content-Type: application/octet-stream 2136 Content-Transfer-Encoding: base64 2137 2138 YWJj 2139 2140 --===============3344438784458119861==-- 2141 """) 2142 2143 # test_defect_handling 2144 def test_multipart_invalid_cte(self): 2145 msg = self._str_msg( 2146 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2147 self.assertEqual(len(msg.defects), 1) 2148 self.assertIsInstance(msg.defects[0], 2149 errors.InvalidMultipartContentTransferEncodingDefect) 2150 2151 # test_defect_handling 2152 def test_multipart_no_cte_no_defect(self): 2153 msg = self._str_msg(self.multipart_msg.format('')) 2154 self.assertEqual(len(msg.defects), 0) 2155 2156 # test_defect_handling 2157 def test_multipart_valid_cte_no_defect(self): 2158 for cte in ('7bit', '8bit', 'BINary'): 2159 msg = self._str_msg( 2160 self.multipart_msg.format( 2161 "\nContent-Transfer-Encoding: {}".format(cte))) 2162 self.assertEqual(len(msg.defects), 0) 2163 2164 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2165 def test_invalid_content_type(self): 2166 eq = self.assertEqual 2167 neq = self.ndiffAssertEqual 2168 msg = Message() 2169 # RFC 2045, $5.2 says invalid yields text/plain 2170 msg['Content-Type'] = 'text' 2171 eq(msg.get_content_maintype(), 'text') 2172 eq(msg.get_content_subtype(), 'plain') 2173 eq(msg.get_content_type(), 'text/plain') 2174 # Clear the old value and try something /really/ invalid 2175 del msg['content-type'] 2176 msg['Content-Type'] = 'foo' 2177 eq(msg.get_content_maintype(), 'text') 2178 eq(msg.get_content_subtype(), 'plain') 2179 eq(msg.get_content_type(), 'text/plain') 2180 # Still, make sure that the message is idempotently generated 2181 s = StringIO() 2182 g = Generator(s) 2183 g.flatten(msg) 2184 neq(s.getvalue(), 'Content-Type: foo\n\n') 2185 2186 def test_no_start_boundary(self): 2187 eq = self.ndiffAssertEqual 2188 msg = self._msgobj('msg_31.txt') 2189 eq(msg.get_payload(), """\ 2190 --BOUNDARY 2191 Content-Type: text/plain 2192 2193 message 1 2194 2195 --BOUNDARY 2196 Content-Type: text/plain 2197 2198 message 2 2199 2200 --BOUNDARY-- 2201 """) 2202 2203 def test_no_separating_blank_line(self): 2204 eq = self.ndiffAssertEqual 2205 msg = self._msgobj('msg_35.txt') 2206 eq(msg.as_string(), """\ 2207 From: aperson (at] dom.ain 2208 To: bperson (at] dom.ain 2209 Subject: here's something interesting 2210 2211 counter to RFC 2822, there's no separating newline here 2212 """) 2213 2214 # test_defect_handling 2215 def test_lying_multipart(self): 2216 msg = self._msgobj('msg_41.txt') 2217 self.assertTrue(hasattr(msg, 'defects')) 2218 self.assertEqual(len(msg.defects), 2) 2219 self.assertIsInstance(msg.defects[0], 2220 errors.NoBoundaryInMultipartDefect) 2221 self.assertIsInstance(msg.defects[1], 2222 errors.MultipartInvariantViolationDefect) 2223 2224 # test_defect_handling 2225 def test_missing_start_boundary(self): 2226 outer = self._msgobj('msg_42.txt') 2227 # The message structure is: 2228 # 2229 # multipart/mixed 2230 # text/plain 2231 # message/rfc822 2232 # multipart/mixed [*] 2233 # 2234 # [*] This message is missing its start boundary 2235 bad = outer.get_payload(1).get_payload(0) 2236 self.assertEqual(len(bad.defects), 1) 2237 self.assertIsInstance(bad.defects[0], 2238 errors.StartBoundaryNotFoundDefect) 2239 2240 # test_defect_handling 2241 def test_first_line_is_continuation_header(self): 2242 eq = self.assertEqual 2243 m = ' Line 1\nSubject: test\n\nbody' 2244 msg = email.message_from_string(m) 2245 eq(msg.keys(), ['Subject']) 2246 eq(msg.get_payload(), 'body') 2247 eq(len(msg.defects), 1) 2248 self.assertDefectsEqual(msg.defects, 2249 [errors.FirstHeaderLineIsContinuationDefect]) 2250 eq(msg.defects[0].line, ' Line 1\n') 2251 2252 # test_defect_handling 2253 def test_missing_header_body_separator(self): 2254 # Our heuristic if we see a line that doesn't look like a header (no 2255 # leading whitespace but no ':') is to assume that the blank line that 2256 # separates the header from the body is missing, and to stop parsing 2257 # headers and start parsing the body. 2258 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2259 self.assertEqual(msg.keys(), ['Subject']) 2260 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2261 self.assertDefectsEqual(msg.defects, 2262 [errors.MissingHeaderBodySeparatorDefect]) 2263 2264 2265 # Test RFC 2047 header encoding and decoding 2266 class TestRFC2047(TestEmailBase): 2267 def test_rfc2047_multiline(self): 2268 eq = self.assertEqual 2269 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2270 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2271 dh = decode_header(s) 2272 eq(dh, [ 2273 (b'Re: ', None), 2274 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2275 (b' baz foo bar ', None), 2276 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2277 header = make_header(dh) 2278 eq(str(header), 2279 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2280 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2281 Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2282 =?mac-iceland?q?=9Arg=8Cs?=""") 2283 2284 def test_whitespace_keeper_unicode(self): 2285 eq = self.assertEqual 2286 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard (at] dom.ain>' 2287 dh = decode_header(s) 2288 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2289 (b' Pirard <pirard (at] dom.ain>', None)]) 2290 header = str(make_header(dh)) 2291 eq(header, 'Andr\xe9 Pirard <pirard (at] dom.ain>') 2292 2293 def test_whitespace_keeper_unicode_2(self): 2294 eq = self.assertEqual 2295 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2296 dh = decode_header(s) 2297 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2298 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2299 hu = str(make_header(dh)) 2300 eq(hu, 'The quick brown fox jumped over the lazy dog') 2301 2302 def test_rfc2047_missing_whitespace(self): 2303 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2304 dh = decode_header(s) 2305 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2306 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2307 (b'sbord', None)]) 2308 2309 def test_rfc2047_with_whitespace(self): 2310 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2311 dh = decode_header(s) 2312 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2313 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2314 (b' sbord', None)]) 2315 2316 def test_rfc2047_B_bad_padding(self): 2317 s = '=?iso-8859-1?B?%s?=' 2318 data = [ # only test complete bytes 2319 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2320 ('dmk=', b'vi'), ('dmk', b'vi') 2321 ] 2322 for q, a in data: 2323 dh = decode_header(s % q) 2324 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2325 2326 def test_rfc2047_Q_invalid_digits(self): 2327 # issue 10004. 2328 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2329 self.assertEqual(decode_header(s), 2330 [(b'andr\xe9=zz', 'iso-8859-1')]) 2331 2332 def test_rfc2047_rfc2047_1(self): 2333 # 1st testcase at end of rfc2047 2334 s = '(=?ISO-8859-1?Q?a?=)' 2335 self.assertEqual(decode_header(s), 2336 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2337 2338 def test_rfc2047_rfc2047_2(self): 2339 # 2nd testcase at end of rfc2047 2340 s = '(=?ISO-8859-1?Q?a?= b)' 2341 self.assertEqual(decode_header(s), 2342 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2343 2344 def test_rfc2047_rfc2047_3(self): 2345 # 3rd testcase at end of rfc2047 2346 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2347 self.assertEqual(decode_header(s), 2348 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2349 2350 def test_rfc2047_rfc2047_4(self): 2351 # 4th testcase at end of rfc2047 2352 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2353 self.assertEqual(decode_header(s), 2354 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2355 2356 def test_rfc2047_rfc2047_5a(self): 2357 # 5th testcase at end of rfc2047 newline is \r\n 2358 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2359 self.assertEqual(decode_header(s), 2360 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2361 2362 def test_rfc2047_rfc2047_5b(self): 2363 # 5th testcase at end of rfc2047 newline is \n 2364 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2365 self.assertEqual(decode_header(s), 2366 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2367 2368 def test_rfc2047_rfc2047_6(self): 2369 # 6th testcase at end of rfc2047 2370 s = '(=?ISO-8859-1?Q?a_b?=)' 2371 self.assertEqual(decode_header(s), 2372 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2373 2374 def test_rfc2047_rfc2047_7(self): 2375 # 7th testcase at end of rfc2047 2376 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2377 self.assertEqual(decode_header(s), 2378 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2379 (b')', None)]) 2380 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2381 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2382 2383 def test_multiline_header(self): 2384 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller (at] xxx.com>' 2385 self.assertEqual(decode_header(s), 2386 [(b'"M\xfcller T"', 'windows-1252'), 2387 (b'<T.Mueller (at] xxx.com>', None)]) 2388 self.assertEqual(make_header(decode_header(s)).encode(), 2389 ''.join(s.splitlines())) 2390 self.assertEqual(str(make_header(decode_header(s))), 2391 '"Mller T" <T.Mueller (at] xxx.com>') 2392 2393 2394 # Test the MIMEMessage class 2395 class TestMIMEMessage(TestEmailBase): 2396 def setUp(self): 2397 with openfile('msg_11.txt') as fp: 2398 self._text = fp.read() 2399 2400 def test_type_error(self): 2401 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2402 2403 def test_valid_argument(self): 2404 eq = self.assertEqual 2405 subject = 'A sub-message' 2406 m = Message() 2407 m['Subject'] = subject 2408 r = MIMEMessage(m) 2409 eq(r.get_content_type(), 'message/rfc822') 2410 payload = r.get_payload() 2411 self.assertIsInstance(payload, list) 2412 eq(len(payload), 1) 2413 subpart = payload[0] 2414 self.assertIs(subpart, m) 2415 eq(subpart['subject'], subject) 2416 2417 def test_bad_multipart(self): 2418 msg1 = Message() 2419 msg1['Subject'] = 'subpart 1' 2420 msg2 = Message() 2421 msg2['Subject'] = 'subpart 2' 2422 r = MIMEMessage(msg1) 2423 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2424 2425 def test_generate(self): 2426 # First craft the message to be encapsulated 2427 m = Message() 2428 m['Subject'] = 'An enclosed message' 2429 m.set_payload('Here is the body of the message.\n') 2430 r = MIMEMessage(m) 2431 r['Subject'] = 'The enclosing message' 2432 s = StringIO() 2433 g = Generator(s) 2434 g.flatten(r) 2435 self.assertEqual(s.getvalue(), """\ 2436 Content-Type: message/rfc822 2437 MIME-Version: 1.0 2438 Subject: The enclosing message 2439 2440 Subject: An enclosed message 2441 2442 Here is the body of the message. 2443 """) 2444 2445 def test_parse_message_rfc822(self): 2446 eq = self.assertEqual 2447 msg = self._msgobj('msg_11.txt') 2448 eq(msg.get_content_type(), 'message/rfc822') 2449 payload = msg.get_payload() 2450 self.assertIsInstance(payload, list) 2451 eq(len(payload), 1) 2452 submsg = payload[0] 2453 self.assertIsInstance(submsg, Message) 2454 eq(submsg['subject'], 'An enclosed message') 2455 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2456 2457 def test_dsn(self): 2458 eq = self.assertEqual 2459 # msg 16 is a Delivery Status Notification, see RFC 1894 2460 msg = self._msgobj('msg_16.txt') 2461 eq(msg.get_content_type(), 'multipart/report') 2462 self.assertTrue(msg.is_multipart()) 2463 eq(len(msg.get_payload()), 3) 2464 # Subpart 1 is a text/plain, human readable section 2465 subpart = msg.get_payload(0) 2466 eq(subpart.get_content_type(), 'text/plain') 2467 eq(subpart.get_payload(), """\ 2468 This report relates to a message you sent with the following header fields: 2469 2470 Message-id: <002001c144a6$8752e060$56104586 (at] oxy.edu> 2471 Date: Sun, 23 Sep 2001 20:10:55 -0700 2472 From: "Ian T. Henry" <henryi (at] oxy.edu> 2473 To: SoCal Raves <scr (at] socal-raves.org> 2474 Subject: [scr] yeah for Ians!! 2475 2476 Your message cannot be delivered to the following recipients: 2477 2478 Recipient address: jangel1 (at] cougar.noc.ucla.edu 2479 Reason: recipient reached disk quota 2480 2481 """) 2482 # Subpart 2 contains the machine parsable DSN information. It 2483 # consists of two blocks of headers, represented by two nested Message 2484 # objects. 2485 subpart = msg.get_payload(1) 2486 eq(subpart.get_content_type(), 'message/delivery-status') 2487 eq(len(subpart.get_payload()), 2) 2488 # message/delivery-status should treat each block as a bunch of 2489 # headers, i.e. a bunch of Message objects. 2490 dsn1 = subpart.get_payload(0) 2491 self.assertIsInstance(dsn1, Message) 2492 eq(dsn1['original-envelope-id'], '0GK500B4HD0888 (at] cougar.noc.ucla.edu') 2493 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2494 # Try a missing one <wink> 2495 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2496 dsn2 = subpart.get_payload(1) 2497 self.assertIsInstance(dsn2, Message) 2498 eq(dsn2['action'], 'failed') 2499 eq(dsn2.get_params(header='original-recipient'), 2500 [('rfc822', ''), ('jangel1 (at] cougar.noc.ucla.edu', '')]) 2501 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2502 # Subpart 3 is the original message 2503 subpart = msg.get_payload(2) 2504 eq(subpart.get_content_type(), 'message/rfc822') 2505 payload = subpart.get_payload() 2506 self.assertIsInstance(payload, list) 2507 eq(len(payload), 1) 2508 subsubpart = payload[0] 2509 self.assertIsInstance(subsubpart, Message) 2510 eq(subsubpart.get_content_type(), 'text/plain') 2511 eq(subsubpart['message-id'], 2512 '<002001c144a6$8752e060$56104586 (at] oxy.edu>') 2513 2514 def test_epilogue(self): 2515 eq = self.ndiffAssertEqual 2516 with openfile('msg_21.txt') as fp: 2517 text = fp.read() 2518 msg = Message() 2519 msg['From'] = 'aperson (at] dom.ain' 2520 msg['To'] = 'bperson (at] dom.ain' 2521 msg['Subject'] = 'Test' 2522 msg.preamble = 'MIME message' 2523 msg.epilogue = 'End of MIME message\n' 2524 msg1 = MIMEText('One') 2525 msg2 = MIMEText('Two') 2526 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2527 msg.attach(msg1) 2528 msg.attach(msg2) 2529 sfp = StringIO() 2530 g = Generator(sfp) 2531 g.flatten(msg) 2532 eq(sfp.getvalue(), text) 2533 2534 def test_no_nl_preamble(self): 2535 eq = self.ndiffAssertEqual 2536 msg = Message() 2537 msg['From'] = 'aperson (at] dom.ain' 2538 msg['To'] = 'bperson (at] dom.ain' 2539 msg['Subject'] = 'Test' 2540 msg.preamble = 'MIME message' 2541 msg.epilogue = '' 2542 msg1 = MIMEText('One') 2543 msg2 = MIMEText('Two') 2544 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2545 msg.attach(msg1) 2546 msg.attach(msg2) 2547 eq(msg.as_string(), """\ 2548 From: aperson (at] dom.ain 2549 To: bperson (at] dom.ain 2550 Subject: Test 2551 Content-Type: multipart/mixed; boundary="BOUNDARY" 2552 2553 MIME message 2554 --BOUNDARY 2555 Content-Type: text/plain; charset="us-ascii" 2556 MIME-Version: 1.0 2557 Content-Transfer-Encoding: 7bit 2558 2559 One 2560 --BOUNDARY 2561 Content-Type: text/plain; charset="us-ascii" 2562 MIME-Version: 1.0 2563 Content-Transfer-Encoding: 7bit 2564 2565 Two 2566 --BOUNDARY-- 2567 """) 2568 2569 def test_default_type(self): 2570 eq = self.assertEqual 2571 with openfile('msg_30.txt') as fp: 2572 msg = email.message_from_file(fp) 2573 container1 = msg.get_payload(0) 2574 eq(container1.get_default_type(), 'message/rfc822') 2575 eq(container1.get_content_type(), 'message/rfc822') 2576 container2 = msg.get_payload(1) 2577 eq(container2.get_default_type(), 'message/rfc822') 2578 eq(container2.get_content_type(), 'message/rfc822') 2579 container1a = container1.get_payload(0) 2580 eq(container1a.get_default_type(), 'text/plain') 2581 eq(container1a.get_content_type(), 'text/plain') 2582 container2a = container2.get_payload(0) 2583 eq(container2a.get_default_type(), 'text/plain') 2584 eq(container2a.get_content_type(), 'text/plain') 2585 2586 def test_default_type_with_explicit_container_type(self): 2587 eq = self.assertEqual 2588 with openfile('msg_28.txt') as fp: 2589 msg = email.message_from_file(fp) 2590 container1 = msg.get_payload(0) 2591 eq(container1.get_default_type(), 'message/rfc822') 2592 eq(container1.get_content_type(), 'message/rfc822') 2593 container2 = msg.get_payload(1) 2594 eq(container2.get_default_type(), 'message/rfc822') 2595 eq(container2.get_content_type(), 'message/rfc822') 2596 container1a = container1.get_payload(0) 2597 eq(container1a.get_default_type(), 'text/plain') 2598 eq(container1a.get_content_type(), 'text/plain') 2599 container2a = container2.get_payload(0) 2600 eq(container2a.get_default_type(), 'text/plain') 2601 eq(container2a.get_content_type(), 'text/plain') 2602 2603 def test_default_type_non_parsed(self): 2604 eq = self.assertEqual 2605 neq = self.ndiffAssertEqual 2606 # Set up container 2607 container = MIMEMultipart('digest', 'BOUNDARY') 2608 container.epilogue = '' 2609 # Set up subparts 2610 subpart1a = MIMEText('message 1\n') 2611 subpart2a = MIMEText('message 2\n') 2612 subpart1 = MIMEMessage(subpart1a) 2613 subpart2 = MIMEMessage(subpart2a) 2614 container.attach(subpart1) 2615 container.attach(subpart2) 2616 eq(subpart1.get_content_type(), 'message/rfc822') 2617 eq(subpart1.get_default_type(), 'message/rfc822') 2618 eq(subpart2.get_content_type(), 'message/rfc822') 2619 eq(subpart2.get_default_type(), 'message/rfc822') 2620 neq(container.as_string(0), '''\ 2621 Content-Type: multipart/digest; boundary="BOUNDARY" 2622 MIME-Version: 1.0 2623 2624 --BOUNDARY 2625 Content-Type: message/rfc822 2626 MIME-Version: 1.0 2627 2628 Content-Type: text/plain; charset="us-ascii" 2629 MIME-Version: 1.0 2630 Content-Transfer-Encoding: 7bit 2631 2632 message 1 2633 2634 --BOUNDARY 2635 Content-Type: message/rfc822 2636 MIME-Version: 1.0 2637 2638 Content-Type: text/plain; charset="us-ascii" 2639 MIME-Version: 1.0 2640 Content-Transfer-Encoding: 7bit 2641 2642 message 2 2643 2644 --BOUNDARY-- 2645 ''') 2646 del subpart1['content-type'] 2647 del subpart1['mime-version'] 2648 del subpart2['content-type'] 2649 del subpart2['mime-version'] 2650 eq(subpart1.get_content_type(), 'message/rfc822') 2651 eq(subpart1.get_default_type(), 'message/rfc822') 2652 eq(subpart2.get_content_type(), 'message/rfc822') 2653 eq(subpart2.get_default_type(), 'message/rfc822') 2654 neq(container.as_string(0), '''\ 2655 Content-Type: multipart/digest; boundary="BOUNDARY" 2656 MIME-Version: 1.0 2657 2658 --BOUNDARY 2659 2660 Content-Type: text/plain; charset="us-ascii" 2661 MIME-Version: 1.0 2662 Content-Transfer-Encoding: 7bit 2663 2664 message 1 2665 2666 --BOUNDARY 2667 2668 Content-Type: text/plain; charset="us-ascii" 2669 MIME-Version: 1.0 2670 Content-Transfer-Encoding: 7bit 2671 2672 message 2 2673 2674 --BOUNDARY-- 2675 ''') 2676 2677 def test_mime_attachments_in_constructor(self): 2678 eq = self.assertEqual 2679 text1 = MIMEText('') 2680 text2 = MIMEText('') 2681 msg = MIMEMultipart(_subparts=(text1, text2)) 2682 eq(len(msg.get_payload()), 2) 2683 eq(msg.get_payload(0), text1) 2684 eq(msg.get_payload(1), text2) 2685 2686 def test_default_multipart_constructor(self): 2687 msg = MIMEMultipart() 2688 self.assertTrue(msg.is_multipart()) 2689 2690 def test_multipart_default_policy(self): 2691 msg = MIMEMultipart() 2692 msg['To'] = 'a (at] b.com' 2693 msg['To'] = 'c (at] d.com' 2694 self.assertEqual(msg.get_all('to'), ['a (at] b.com', 'c (at] d.com']) 2695 2696 def test_multipart_custom_policy(self): 2697 msg = MIMEMultipart(policy=email.policy.default) 2698 msg['To'] = 'a (at] b.com' 2699 with self.assertRaises(ValueError) as cm: 2700 msg['To'] = 'c (at] d.com' 2701 self.assertEqual(str(cm.exception), 2702 'There may be at most 1 To headers in a message') 2703 2704 # A general test of parser->model->generator idempotency. IOW, read a message 2705 # in, parse it into a message object tree, then without touching the tree, 2706 # regenerate the plain text. The original text and the transformed text 2707 # should be identical. Note: that we ignore the Unix-From since that may 2708 # contain a changed date. 2709 class TestIdempotent(TestEmailBase): 2710 2711 linesep = '\n' 2712 2713 def _msgobj(self, filename): 2714 with openfile(filename) as fp: 2715 data = fp.read() 2716 msg = email.message_from_string(data) 2717 return msg, data 2718 2719 def _idempotent(self, msg, text, unixfrom=False): 2720 eq = self.ndiffAssertEqual 2721 s = StringIO() 2722 g = Generator(s, maxheaderlen=0) 2723 g.flatten(msg, unixfrom=unixfrom) 2724 eq(text, s.getvalue()) 2725 2726 def test_parse_text_message(self): 2727 eq = self.assertEqual 2728 msg, text = self._msgobj('msg_01.txt') 2729 eq(msg.get_content_type(), 'text/plain') 2730 eq(msg.get_content_maintype(), 'text') 2731 eq(msg.get_content_subtype(), 'plain') 2732 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2733 eq(msg.get_param('charset'), 'us-ascii') 2734 eq(msg.preamble, None) 2735 eq(msg.epilogue, None) 2736 self._idempotent(msg, text) 2737 2738 def test_parse_untyped_message(self): 2739 eq = self.assertEqual 2740 msg, text = self._msgobj('msg_03.txt') 2741 eq(msg.get_content_type(), 'text/plain') 2742 eq(msg.get_params(), None) 2743 eq(msg.get_param('charset'), None) 2744 self._idempotent(msg, text) 2745 2746 def test_simple_multipart(self): 2747 msg, text = self._msgobj('msg_04.txt') 2748 self._idempotent(msg, text) 2749 2750 def test_MIME_digest(self): 2751 msg, text = self._msgobj('msg_02.txt') 2752 self._idempotent(msg, text) 2753 2754 def test_long_header(self): 2755 msg, text = self._msgobj('msg_27.txt') 2756 self._idempotent(msg, text) 2757 2758 def test_MIME_digest_with_part_headers(self): 2759 msg, text = self._msgobj('msg_28.txt') 2760 self._idempotent(msg, text) 2761 2762 def test_mixed_with_image(self): 2763 msg, text = self._msgobj('msg_06.txt') 2764 self._idempotent(msg, text) 2765 2766 def test_multipart_report(self): 2767 msg, text = self._msgobj('msg_05.txt') 2768 self._idempotent(msg, text) 2769 2770 def test_dsn(self): 2771 msg, text = self._msgobj('msg_16.txt') 2772 self._idempotent(msg, text) 2773 2774 def test_preamble_epilogue(self): 2775 msg, text = self._msgobj('msg_21.txt') 2776 self._idempotent(msg, text) 2777 2778 def test_multipart_one_part(self): 2779 msg, text = self._msgobj('msg_23.txt') 2780 self._idempotent(msg, text) 2781 2782 def test_multipart_no_parts(self): 2783 msg, text = self._msgobj('msg_24.txt') 2784 self._idempotent(msg, text) 2785 2786 def test_no_start_boundary(self): 2787 msg, text = self._msgobj('msg_31.txt') 2788 self._idempotent(msg, text) 2789 2790 def test_rfc2231_charset(self): 2791 msg, text = self._msgobj('msg_32.txt') 2792 self._idempotent(msg, text) 2793 2794 def test_more_rfc2231_parameters(self): 2795 msg, text = self._msgobj('msg_33.txt') 2796 self._idempotent(msg, text) 2797 2798 def test_text_plain_in_a_multipart_digest(self): 2799 msg, text = self._msgobj('msg_34.txt') 2800 self._idempotent(msg, text) 2801 2802 def test_nested_multipart_mixeds(self): 2803 msg, text = self._msgobj('msg_12a.txt') 2804 self._idempotent(msg, text) 2805 2806 def test_message_external_body_idempotent(self): 2807 msg, text = self._msgobj('msg_36.txt') 2808 self._idempotent(msg, text) 2809 2810 def test_message_delivery_status(self): 2811 msg, text = self._msgobj('msg_43.txt') 2812 self._idempotent(msg, text, unixfrom=True) 2813 2814 def test_message_signed_idempotent(self): 2815 msg, text = self._msgobj('msg_45.txt') 2816 self._idempotent(msg, text) 2817 2818 def test_content_type(self): 2819 eq = self.assertEqual 2820 # Get a message object and reset the seek pointer for other tests 2821 msg, text = self._msgobj('msg_05.txt') 2822 eq(msg.get_content_type(), 'multipart/report') 2823 # Test the Content-Type: parameters 2824 params = {} 2825 for pk, pv in msg.get_params(): 2826 params[pk] = pv 2827 eq(params['report-type'], 'delivery-status') 2828 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2829 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2830 eq(msg.epilogue, self.linesep) 2831 eq(len(msg.get_payload()), 3) 2832 # Make sure the subparts are what we expect 2833 msg1 = msg.get_payload(0) 2834 eq(msg1.get_content_type(), 'text/plain') 2835 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2836 msg2 = msg.get_payload(1) 2837 eq(msg2.get_content_type(), 'text/plain') 2838 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2839 msg3 = msg.get_payload(2) 2840 eq(msg3.get_content_type(), 'message/rfc822') 2841 self.assertIsInstance(msg3, Message) 2842 payload = msg3.get_payload() 2843 self.assertIsInstance(payload, list) 2844 eq(len(payload), 1) 2845 msg4 = payload[0] 2846 self.assertIsInstance(msg4, Message) 2847 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2848 2849 def test_parser(self): 2850 eq = self.assertEqual 2851 msg, text = self._msgobj('msg_06.txt') 2852 # Check some of the outer headers 2853 eq(msg.get_content_type(), 'message/rfc822') 2854 # Make sure the payload is a list of exactly one sub-Message, and that 2855 # that submessage has a type of text/plain 2856 payload = msg.get_payload() 2857 self.assertIsInstance(payload, list) 2858 eq(len(payload), 1) 2859 msg1 = payload[0] 2860 self.assertIsInstance(msg1, Message) 2861 eq(msg1.get_content_type(), 'text/plain') 2862 self.assertIsInstance(msg1.get_payload(), str) 2863 eq(msg1.get_payload(), self.linesep) 2864 2865 2866 2867 # Test various other bits of the package's functionality 2868 class TestMiscellaneous(TestEmailBase): 2869 def test_message_from_string(self): 2870 with openfile('msg_01.txt') as fp: 2871 text = fp.read() 2872 msg = email.message_from_string(text) 2873 s = StringIO() 2874 # Don't wrap/continue long headers since we're trying to test 2875 # idempotency. 2876 g = Generator(s, maxheaderlen=0) 2877 g.flatten(msg) 2878 self.assertEqual(text, s.getvalue()) 2879 2880 def test_message_from_file(self): 2881 with openfile('msg_01.txt') as fp: 2882 text = fp.read() 2883 fp.seek(0) 2884 msg = email.message_from_file(fp) 2885 s = StringIO() 2886 # Don't wrap/continue long headers since we're trying to test 2887 # idempotency. 2888 g = Generator(s, maxheaderlen=0) 2889 g.flatten(msg) 2890 self.assertEqual(text, s.getvalue()) 2891 2892 def test_message_from_string_with_class(self): 2893 with openfile('msg_01.txt') as fp: 2894 text = fp.read() 2895 2896 # Create a subclass 2897 class MyMessage(Message): 2898 pass 2899 2900 msg = email.message_from_string(text, MyMessage) 2901 self.assertIsInstance(msg, MyMessage) 2902 # Try something more complicated 2903 with openfile('msg_02.txt') as fp: 2904 text = fp.read() 2905 msg = email.message_from_string(text, MyMessage) 2906 for subpart in msg.walk(): 2907 self.assertIsInstance(subpart, MyMessage) 2908 2909 def test_message_from_file_with_class(self): 2910 # Create a subclass 2911 class MyMessage(Message): 2912 pass 2913 2914 with openfile('msg_01.txt') as fp: 2915 msg = email.message_from_file(fp, MyMessage) 2916 self.assertIsInstance(msg, MyMessage) 2917 # Try something more complicated 2918 with openfile('msg_02.txt') as fp: 2919 msg = email.message_from_file(fp, MyMessage) 2920 for subpart in msg.walk(): 2921 self.assertIsInstance(subpart, MyMessage) 2922 2923 def test_custom_message_does_not_require_arguments(self): 2924 class MyMessage(Message): 2925 def __init__(self): 2926 super().__init__() 2927 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2928 self.assertIsInstance(msg, MyMessage) 2929 2930 def test__all__(self): 2931 module = __import__('email') 2932 self.assertEqual(sorted(module.__all__), [ 2933 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2934 'generator', 'header', 'iterators', 'message', 2935 'message_from_binary_file', 'message_from_bytes', 2936 'message_from_file', 'message_from_string', 'mime', 'parser', 2937 'quoprimime', 'utils', 2938 ]) 2939 2940 def test_formatdate(self): 2941 now = time.time() 2942 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2943 time.gmtime(now)[:6]) 2944 2945 def test_formatdate_localtime(self): 2946 now = time.time() 2947 self.assertEqual( 2948 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 2949 time.localtime(now)[:6]) 2950 2951 def test_formatdate_usegmt(self): 2952 now = time.time() 2953 self.assertEqual( 2954 utils.formatdate(now, localtime=False), 2955 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 2956 self.assertEqual( 2957 utils.formatdate(now, localtime=False, usegmt=True), 2958 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 2959 2960 # parsedate and parsedate_tz will become deprecated interfaces someday 2961 def test_parsedate_returns_None_for_invalid_strings(self): 2962 self.assertIsNone(utils.parsedate('')) 2963 self.assertIsNone(utils.parsedate_tz('')) 2964 self.assertIsNone(utils.parsedate('0')) 2965 self.assertIsNone(utils.parsedate_tz('0')) 2966 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 2967 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 2968 # Not a part of the spec but, but this has historically worked: 2969 self.assertIsNone(utils.parsedate(None)) 2970 self.assertIsNone(utils.parsedate_tz(None)) 2971 2972 def test_parsedate_compact(self): 2973 # The FWS after the comma is optional 2974 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 2975 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 2976 2977 def test_parsedate_no_dayofweek(self): 2978 eq = self.assertEqual 2979 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 2980 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 2981 2982 def test_parsedate_compact_no_dayofweek(self): 2983 eq = self.assertEqual 2984 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 2985 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 2986 2987 def test_parsedate_no_space_before_positive_offset(self): 2988 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 2989 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 2990 2991 def test_parsedate_no_space_before_negative_offset(self): 2992 # Issue 1155362: we already handled '+' for this case. 2993 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 2994 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 2995 2996 2997 def test_parsedate_accepts_time_with_dots(self): 2998 eq = self.assertEqual 2999 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3000 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3001 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3002 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3003 3004 def test_parsedate_acceptable_to_time_functions(self): 3005 eq = self.assertEqual 3006 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3007 t = int(time.mktime(timetup)) 3008 eq(time.localtime(t)[:6], timetup[:6]) 3009 eq(int(time.strftime('%Y', timetup)), 2003) 3010 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3011 t = int(time.mktime(timetup[:9])) 3012 eq(time.localtime(t)[:6], timetup[:6]) 3013 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3014 3015 def test_mktime_tz(self): 3016 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3017 -1, -1, -1, 0)), 0) 3018 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3019 -1, -1, -1, 1234)), -1234) 3020 3021 def test_parsedate_y2k(self): 3022 """Test for parsing a date with a two-digit year. 3023 3024 Parsing a date with a two-digit year should return the correct 3025 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3026 obsoletes RFC822) requires four-digit years. 3027 3028 """ 3029 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3030 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3031 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3032 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3033 3034 def test_parseaddr_empty(self): 3035 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3036 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3037 3038 def test_noquote_dump(self): 3039 self.assertEqual( 3040 utils.formataddr(('A Silly Person', 'person (at] dom.ain')), 3041 'A Silly Person <person (at] dom.ain>') 3042 3043 def test_escape_dump(self): 3044 self.assertEqual( 3045 utils.formataddr(('A (Very) Silly Person', 'person (at] dom.ain')), 3046 r'"A (Very) Silly Person" <person (at] dom.ain>') 3047 self.assertEqual( 3048 utils.parseaddr(r'"A \(Very\) Silly Person" <person (at] dom.ain>'), 3049 ('A (Very) Silly Person', 'person (at] dom.ain')) 3050 a = r'A \(Special\) Person' 3051 b = 'person (at] dom.ain' 3052 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3053 3054 def test_escape_backslashes(self): 3055 self.assertEqual( 3056 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person (at] dom.ain')), 3057 r'"Arthur \\Backslash\\ Foobar" <person (at] dom.ain>') 3058 a = r'Arthur \Backslash\ Foobar' 3059 b = 'person (at] dom.ain' 3060 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3061 3062 def test_quotes_unicode_names(self): 3063 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3064 name = "H\u00e4ns W\u00fcrst" 3065 addr = 'person (at] dom.ain' 3066 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person (at] dom.ain>" 3067 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person (at] dom.ain>" 3068 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3069 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3070 latin1_quopri) 3071 3072 def test_accepts_any_charset_like_object(self): 3073 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3074 name = "H\u00e4ns W\u00fcrst" 3075 addr = 'person (at] dom.ain' 3076 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person (at] dom.ain>" 3077 foobar = "FOOBAR" 3078 class CharsetMock: 3079 def header_encode(self, string): 3080 return foobar 3081 mock = CharsetMock() 3082 mock_expected = "%s <%s>" % (foobar, addr) 3083 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3084 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3085 utf8_base64) 3086 3087 def test_invalid_charset_like_object_raises_error(self): 3088 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3089 name = "H\u00e4ns W\u00fcrst" 3090 addr = 'person (at] dom.ain' 3091 # An object without a header_encode method: 3092 bad_charset = object() 3093 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3094 bad_charset) 3095 3096 def test_unicode_address_raises_error(self): 3097 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3098 addr = 'pers\u00f6n (at] dom.in' 3099 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3100 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3101 3102 def test_name_with_dot(self): 3103 x = 'John X. Doe <jxd (at] example.com>' 3104 y = '"John X. Doe" <jxd (at] example.com>' 3105 a, b = ('John X. Doe', 'jxd (at] example.com') 3106 self.assertEqual(utils.parseaddr(x), (a, b)) 3107 self.assertEqual(utils.parseaddr(y), (a, b)) 3108 # formataddr() quotes the name if there's a dot in it 3109 self.assertEqual(utils.formataddr((a, b)), y) 3110 3111 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3112 # issue 10005. Note that in the third test the second pair of 3113 # backslashes is not actually a quoted pair because it is not inside a 3114 # comment or quoted string: the address being parsed has a quoted 3115 # string containing a quoted backslash, followed by 'example' and two 3116 # backslashes, followed by another quoted string containing a space and 3117 # the word 'example'. parseaddr copies those two backslashes 3118 # literally. Per rfc5322 this is not technically correct since a \ may 3119 # not appear in an address outside of a quoted string. It is probably 3120 # a sensible Postel interpretation, though. 3121 eq = self.assertEqual 3122 eq(utils.parseaddr('""example" example"@example.com'), 3123 ('', '""example" example"@example.com')) 3124 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3125 ('', '"\\"example\\" example"@example.com')) 3126 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3127 ('', '"\\\\"example\\\\" example"@example.com')) 3128 3129 def test_parseaddr_preserves_spaces_in_local_part(self): 3130 # issue 9286. A normal RFC5322 local part should not contain any 3131 # folding white space, but legacy local parts can (they are a sequence 3132 # of atoms, not dotatoms). On the other hand we strip whitespace from 3133 # before the @ and around dots, on the assumption that the whitespace 3134 # around the punctuation is a mistake in what would otherwise be 3135 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3136 self.assertEqual(('', "merwok wok (at] xample.com"), 3137 utils.parseaddr("merwok wok (at] xample.com")) 3138 self.assertEqual(('', "merwok wok (at] xample.com"), 3139 utils.parseaddr("merwok wok (at] xample.com")) 3140 self.assertEqual(('', "merwok wok (at] xample.com"), 3141 utils.parseaddr(" merwok wok @xample.com")) 3142 self.assertEqual(('', 'merwok"wok" wok (at] xample.com'), 3143 utils.parseaddr('merwok"wok" wok (at] xample.com')) 3144 self.assertEqual(('', 'merwok.wok.wok (at] xample.com'), 3145 utils.parseaddr('merwok. wok . wok (at] xample.com')) 3146 3147 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3148 addr = ("'foo (at] example.com' (foo (at] example.com)", 3149 'foo (at] example.com') 3150 addrstr = ('"\'foo (at] example.com\' ' 3151 '(foo (at] example.com)" <foo (at] example.com>') 3152 self.assertEqual(utils.parseaddr(addrstr), addr) 3153 self.assertEqual(utils.formataddr(addr), addrstr) 3154 3155 3156 def test_multiline_from_comment(self): 3157 x = """\ 3158 Foo 3159 \tBar <foo (at] example.com>""" 3160 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo (at] example.com')) 3161 3162 def test_quote_dump(self): 3163 self.assertEqual( 3164 utils.formataddr(('A Silly; Person', 'person (at] dom.ain')), 3165 r'"A Silly; Person" <person (at] dom.ain>') 3166 3167 def test_charset_richcomparisons(self): 3168 eq = self.assertEqual 3169 ne = self.assertNotEqual 3170 cset1 = Charset() 3171 cset2 = Charset() 3172 eq(cset1, 'us-ascii') 3173 eq(cset1, 'US-ASCII') 3174 eq(cset1, 'Us-AsCiI') 3175 eq('us-ascii', cset1) 3176 eq('US-ASCII', cset1) 3177 eq('Us-AsCiI', cset1) 3178 ne(cset1, 'usascii') 3179 ne(cset1, 'USASCII') 3180 ne(cset1, 'UsAsCiI') 3181 ne('usascii', cset1) 3182 ne('USASCII', cset1) 3183 ne('UsAsCiI', cset1) 3184 eq(cset1, cset2) 3185 eq(cset2, cset1) 3186 3187 def test_getaddresses(self): 3188 eq = self.assertEqual 3189 eq(utils.getaddresses(['aperson (at] dom.ain (Al Person)', 3190 'Bud Person <bperson (at] dom.ain>']), 3191 [('Al Person', 'aperson (at] dom.ain'), 3192 ('Bud Person', 'bperson (at] dom.ain')]) 3193 3194 def test_getaddresses_nasty(self): 3195 eq = self.assertEqual 3196 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3197 eq(utils.getaddresses( 3198 ['[]*-- =~$']), 3199 [('', ''), ('', ''), ('', '*--')]) 3200 eq(utils.getaddresses( 3201 ['foo: ;', '"Jason R. Mastaler" <jason (at] dom.ain>']), 3202 [('', ''), ('Jason R. Mastaler', 'jason (at] dom.ain')]) 3203 3204 def test_getaddresses_embedded_comment(self): 3205 """Test proper handling of a nested comment""" 3206 eq = self.assertEqual 3207 addrs = utils.getaddresses(['User ((nested comment)) <foo (at] bar.com>']) 3208 eq(addrs[0][1], 'foo (at] bar.com') 3209 3210 def test_make_msgid_collisions(self): 3211 # Test make_msgid uniqueness, even with multiple threads 3212 class MsgidsThread(Thread): 3213 def run(self): 3214 # generate msgids for 3 seconds 3215 self.msgids = [] 3216 append = self.msgids.append 3217 make_msgid = utils.make_msgid 3218 clock = time.monotonic 3219 tfin = clock() + 3.0 3220 while clock() < tfin: 3221 append(make_msgid(domain='testdomain-string')) 3222 3223 threads = [MsgidsThread() for i in range(5)] 3224 with start_threads(threads): 3225 pass 3226 all_ids = sum([t.msgids for t in threads], []) 3227 self.assertEqual(len(set(all_ids)), len(all_ids)) 3228 3229 def test_utils_quote_unquote(self): 3230 eq = self.assertEqual 3231 msg = Message() 3232 msg.add_header('content-disposition', 'attachment', 3233 filename='foo\\wacky"name') 3234 eq(msg.get_filename(), 'foo\\wacky"name') 3235 3236 def test_get_body_encoding_with_bogus_charset(self): 3237 charset = Charset('not a charset') 3238 self.assertEqual(charset.get_body_encoding(), 'base64') 3239 3240 def test_get_body_encoding_with_uppercase_charset(self): 3241 eq = self.assertEqual 3242 msg = Message() 3243 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3244 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3245 charsets = msg.get_charsets() 3246 eq(len(charsets), 1) 3247 eq(charsets[0], 'utf-8') 3248 charset = Charset(charsets[0]) 3249 eq(charset.get_body_encoding(), 'base64') 3250 msg.set_payload(b'hello world', charset=charset) 3251 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3252 eq(msg.get_payload(decode=True), b'hello world') 3253 eq(msg['content-transfer-encoding'], 'base64') 3254 # Try another one 3255 msg = Message() 3256 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3257 charsets = msg.get_charsets() 3258 eq(len(charsets), 1) 3259 eq(charsets[0], 'us-ascii') 3260 charset = Charset(charsets[0]) 3261 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3262 msg.set_payload('hello world', charset=charset) 3263 eq(msg.get_payload(), 'hello world') 3264 eq(msg['content-transfer-encoding'], '7bit') 3265 3266 def test_charsets_case_insensitive(self): 3267 lc = Charset('us-ascii') 3268 uc = Charset('US-ASCII') 3269 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3270 3271 def test_partial_falls_inside_message_delivery_status(self): 3272 eq = self.ndiffAssertEqual 3273 # The Parser interface provides chunks of data to FeedParser in 8192 3274 # byte gulps. SF bug #1076485 found one of those chunks inside 3275 # message/delivery-status header block, which triggered an 3276 # unreadline() of NeedMoreData. 3277 msg = self._msgobj('msg_43.txt') 3278 sfp = StringIO() 3279 iterators._structure(msg, sfp) 3280 eq(sfp.getvalue(), """\ 3281 multipart/report 3282 text/plain 3283 message/delivery-status 3284 text/plain 3285 text/plain 3286 text/plain 3287 text/plain 3288 text/plain 3289 text/plain 3290 text/plain 3291 text/plain 3292 text/plain 3293 text/plain 3294 text/plain 3295 text/plain 3296 text/plain 3297 text/plain 3298 text/plain 3299 text/plain 3300 text/plain 3301 text/plain 3302 text/plain 3303 text/plain 3304 text/plain 3305 text/plain 3306 text/plain 3307 text/plain 3308 text/plain 3309 text/plain 3310 text/rfc822-headers 3311 """) 3312 3313 def test_make_msgid_domain(self): 3314 self.assertEqual( 3315 email.utils.make_msgid(domain='testdomain-string')[-19:], 3316 '@testdomain-string>') 3317 3318 def test_make_msgid_idstring(self): 3319 self.assertEqual( 3320 email.utils.make_msgid(idstring='test-idstring', 3321 domain='testdomain-string')[-33:], 3322 '.test-idstring@testdomain-string>') 3323 3324 def test_make_msgid_default_domain(self): 3325 self.assertTrue( 3326 email.utils.make_msgid().endswith( 3327 '@' + getfqdn() + '>')) 3328 3329 def test_Generator_linend(self): 3330 # Issue 14645. 3331 with openfile('msg_26.txt', newline='\n') as f: 3332 msgtxt = f.read() 3333 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3334 msg = email.message_from_string(msgtxt) 3335 s = StringIO() 3336 g = email.generator.Generator(s) 3337 g.flatten(msg) 3338 self.assertEqual(s.getvalue(), msgtxt_nl) 3339 3340 def test_BytesGenerator_linend(self): 3341 # Issue 14645. 3342 with openfile('msg_26.txt', newline='\n') as f: 3343 msgtxt = f.read() 3344 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3345 msg = email.message_from_string(msgtxt_nl) 3346 s = BytesIO() 3347 g = email.generator.BytesGenerator(s) 3348 g.flatten(msg, linesep='\r\n') 3349 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3350 3351 def test_BytesGenerator_linend_with_non_ascii(self): 3352 # Issue 14645. 3353 with openfile('msg_26.txt', 'rb') as f: 3354 msgtxt = f.read() 3355 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3356 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3357 msg = email.message_from_bytes(msgtxt_nl) 3358 s = BytesIO() 3359 g = email.generator.BytesGenerator(s) 3360 g.flatten(msg, linesep='\r\n') 3361 self.assertEqual(s.getvalue(), msgtxt) 3362 3363 def test_mime_classes_policy_argument(self): 3364 with openfile('audiotest.au', 'rb') as fp: 3365 audiodata = fp.read() 3366 with openfile('PyBanner048.gif', 'rb') as fp: 3367 bindata = fp.read() 3368 classes = [ 3369 (MIMEApplication, ('',)), 3370 (MIMEAudio, (audiodata,)), 3371 (MIMEImage, (bindata,)), 3372 (MIMEMessage, (Message(),)), 3373 (MIMENonMultipart, ('multipart', 'mixed')), 3374 (MIMEText, ('',)), 3375 ] 3376 for cls, constructor in classes: 3377 with self.subTest(cls=cls.__name__, policy='compat32'): 3378 m = cls(*constructor) 3379 self.assertIs(m.policy, email.policy.compat32) 3380 with self.subTest(cls=cls.__name__, policy='default'): 3381 m = cls(*constructor, policy=email.policy.default) 3382 self.assertIs(m.policy, email.policy.default) 3383 3384 3385 # Test the iterator/generators 3386 class TestIterators(TestEmailBase): 3387 def test_body_line_iterator(self): 3388 eq = self.assertEqual 3389 neq = self.ndiffAssertEqual 3390 # First a simple non-multipart message 3391 msg = self._msgobj('msg_01.txt') 3392 it = iterators.body_line_iterator(msg) 3393 lines = list(it) 3394 eq(len(lines), 6) 3395 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3396 # Now a more complicated multipart 3397 msg = self._msgobj('msg_02.txt') 3398 it = iterators.body_line_iterator(msg) 3399 lines = list(it) 3400 eq(len(lines), 43) 3401 with openfile('msg_19.txt') as fp: 3402 neq(EMPTYSTRING.join(lines), fp.read()) 3403 3404 def test_typed_subpart_iterator(self): 3405 eq = self.assertEqual 3406 msg = self._msgobj('msg_04.txt') 3407 it = iterators.typed_subpart_iterator(msg, 'text') 3408 lines = [] 3409 subparts = 0 3410 for subpart in it: 3411 subparts += 1 3412 lines.append(subpart.get_payload()) 3413 eq(subparts, 2) 3414 eq(EMPTYSTRING.join(lines), """\ 3415 a simple kind of mirror 3416 to reflect upon our own 3417 a simple kind of mirror 3418 to reflect upon our own 3419 """) 3420 3421 def test_typed_subpart_iterator_default_type(self): 3422 eq = self.assertEqual 3423 msg = self._msgobj('msg_03.txt') 3424 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3425 lines = [] 3426 subparts = 0 3427 for subpart in it: 3428 subparts += 1 3429 lines.append(subpart.get_payload()) 3430 eq(subparts, 1) 3431 eq(EMPTYSTRING.join(lines), """\ 3432 3433 Hi, 3434 3435 Do you like this message? 3436 3437 -Me 3438 """) 3439 3440 def test_pushCR_LF(self): 3441 '''FeedParser BufferedSubFile.push() assumed it received complete 3442 line endings. A CR ending one push() followed by a LF starting 3443 the next push() added an empty line. 3444 ''' 3445 imt = [ 3446 ("a\r \n", 2), 3447 ("b", 0), 3448 ("c\n", 1), 3449 ("", 0), 3450 ("d\r\n", 1), 3451 ("e\r", 0), 3452 ("\nf", 1), 3453 ("\r\n", 1), 3454 ] 3455 from email.feedparser import BufferedSubFile, NeedMoreData 3456 bsf = BufferedSubFile() 3457 om = [] 3458 nt = 0 3459 for il, n in imt: 3460 bsf.push(il) 3461 nt += n 3462 n1 = 0 3463 for ol in iter(bsf.readline, NeedMoreData): 3464 om.append(ol) 3465 n1 += 1 3466 self.assertEqual(n, n1) 3467 self.assertEqual(len(om), nt) 3468 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3469 3470 def test_push_random(self): 3471 from email.feedparser import BufferedSubFile, NeedMoreData 3472 3473 n = 10000 3474 chunksize = 5 3475 chars = 'abcd \t\r\n' 3476 3477 s = ''.join(choice(chars) for i in range(n)) + '\n' 3478 target = s.splitlines(True) 3479 3480 bsf = BufferedSubFile() 3481 lines = [] 3482 for i in range(0, len(s), chunksize): 3483 chunk = s[i:i+chunksize] 3484 bsf.push(chunk) 3485 lines.extend(iter(bsf.readline, NeedMoreData)) 3486 self.assertEqual(lines, target) 3487 3488 3489 class TestFeedParsers(TestEmailBase): 3490 3491 def parse(self, chunks): 3492 feedparser = FeedParser() 3493 for chunk in chunks: 3494 feedparser.feed(chunk) 3495 return feedparser.close() 3496 3497 def test_empty_header_name_handled(self): 3498 # Issue 19996 3499 msg = self.parse("First: val\n: bad\nSecond: val") 3500 self.assertEqual(msg['First'], 'val') 3501 self.assertEqual(msg['Second'], 'val') 3502 3503 def test_newlines(self): 3504 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3505 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3506 m = self.parse(['a:\nb:\rc:\r\nd:']) 3507 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3508 m = self.parse(['a:\rb', 'c:\n']) 3509 self.assertEqual(m.keys(), ['a', 'bc']) 3510 m = self.parse(['a:\r', 'b:\n']) 3511 self.assertEqual(m.keys(), ['a', 'b']) 3512 m = self.parse(['a:\r', '\nb:\n']) 3513 self.assertEqual(m.keys(), ['a', 'b']) 3514 3515 # Only CR and LF should break header fields 3516 m = self.parse(['a:\x85b:\u2028c:\n']) 3517 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3518 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3519 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3520 3521 def test_long_lines(self): 3522 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3523 M, N = 1000, 20000 3524 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3525 self.assertEqual(m.items(), [('a', 'b')]) 3526 self.assertEqual(m.get_payload(), 'x'*M*N) 3527 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3528 self.assertEqual(m.items(), [('a', 'b')]) 3529 self.assertEqual(m.get_payload(), 'x'*M*N) 3530 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3531 self.assertEqual(m.items(), [('a', 'b')]) 3532 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3533 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3534 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3535 3536 3537 class TestParsers(TestEmailBase): 3538 3539 def test_header_parser(self): 3540 eq = self.assertEqual 3541 # Parse only the headers of a complex multipart MIME document 3542 with openfile('msg_02.txt') as fp: 3543 msg = HeaderParser().parse(fp) 3544 eq(msg['from'], 'ppp-request (at] zzz.org') 3545 eq(msg['to'], 'ppp (at] zzz.org') 3546 eq(msg.get_content_type(), 'multipart/mixed') 3547 self.assertFalse(msg.is_multipart()) 3548 self.assertIsInstance(msg.get_payload(), str) 3549 3550 def test_bytes_header_parser(self): 3551 eq = self.assertEqual 3552 # Parse only the headers of a complex multipart MIME document 3553 with openfile('msg_02.txt', 'rb') as fp: 3554 msg = email.parser.BytesHeaderParser().parse(fp) 3555 eq(msg['from'], 'ppp-request (at] zzz.org') 3556 eq(msg['to'], 'ppp (at] zzz.org') 3557 eq(msg.get_content_type(), 'multipart/mixed') 3558 self.assertFalse(msg.is_multipart()) 3559 self.assertIsInstance(msg.get_payload(), str) 3560 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3561 3562 def test_bytes_parser_does_not_close_file(self): 3563 with openfile('msg_02.txt', 'rb') as fp: 3564 email.parser.BytesParser().parse(fp) 3565 self.assertFalse(fp.closed) 3566 3567 def test_bytes_parser_on_exception_does_not_close_file(self): 3568 with openfile('msg_15.txt', 'rb') as fp: 3569 bytesParser = email.parser.BytesParser 3570 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3571 bytesParser(policy=email.policy.strict).parse, 3572 fp) 3573 self.assertFalse(fp.closed) 3574 3575 def test_parser_does_not_close_file(self): 3576 with openfile('msg_02.txt', 'r') as fp: 3577 email.parser.Parser().parse(fp) 3578 self.assertFalse(fp.closed) 3579 3580 def test_parser_on_exception_does_not_close_file(self): 3581 with openfile('msg_15.txt', 'r') as fp: 3582 parser = email.parser.Parser 3583 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3584 parser(policy=email.policy.strict).parse, fp) 3585 self.assertFalse(fp.closed) 3586 3587 def test_whitespace_continuation(self): 3588 eq = self.assertEqual 3589 # This message contains a line after the Subject: header that has only 3590 # whitespace, but it is not empty! 3591 msg = email.message_from_string("""\ 3592 From: aperson (at] dom.ain 3593 To: bperson (at] dom.ain 3594 Subject: the next line has a space on it 3595 \x20 3596 Date: Mon, 8 Apr 2002 15:09:19 -0400 3597 Message-ID: spam 3598 3599 Here's the message body 3600 """) 3601 eq(msg['subject'], 'the next line has a space on it\n ') 3602 eq(msg['message-id'], 'spam') 3603 eq(msg.get_payload(), "Here's the message body\n") 3604 3605 def test_whitespace_continuation_last_header(self): 3606 eq = self.assertEqual 3607 # Like the previous test, but the subject line is the last 3608 # header. 3609 msg = email.message_from_string("""\ 3610 From: aperson (at] dom.ain 3611 To: bperson (at] dom.ain 3612 Date: Mon, 8 Apr 2002 15:09:19 -0400 3613 Message-ID: spam 3614 Subject: the next line has a space on it 3615 \x20 3616 3617 Here's the message body 3618 """) 3619 eq(msg['subject'], 'the next line has a space on it\n ') 3620 eq(msg['message-id'], 'spam') 3621 eq(msg.get_payload(), "Here's the message body\n") 3622 3623 def test_crlf_separation(self): 3624 eq = self.assertEqual 3625 with openfile('msg_26.txt', newline='\n') as fp: 3626 msg = Parser().parse(fp) 3627 eq(len(msg.get_payload()), 2) 3628 part1 = msg.get_payload(0) 3629 eq(part1.get_content_type(), 'text/plain') 3630 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3631 part2 = msg.get_payload(1) 3632 eq(part2.get_content_type(), 'application/riscos') 3633 3634 def test_crlf_flatten(self): 3635 # Using newline='\n' preserves the crlfs in this input file. 3636 with openfile('msg_26.txt', newline='\n') as fp: 3637 text = fp.read() 3638 msg = email.message_from_string(text) 3639 s = StringIO() 3640 g = Generator(s) 3641 g.flatten(msg, linesep='\r\n') 3642 self.assertEqual(s.getvalue(), text) 3643 3644 maxDiff = None 3645 3646 def test_multipart_digest_with_extra_mime_headers(self): 3647 eq = self.assertEqual 3648 neq = self.ndiffAssertEqual 3649 with openfile('msg_28.txt') as fp: 3650 msg = email.message_from_file(fp) 3651 # Structure is: 3652 # multipart/digest 3653 # message/rfc822 3654 # text/plain 3655 # message/rfc822 3656 # text/plain 3657 eq(msg.is_multipart(), 1) 3658 eq(len(msg.get_payload()), 2) 3659 part1 = msg.get_payload(0) 3660 eq(part1.get_content_type(), 'message/rfc822') 3661 eq(part1.is_multipart(), 1) 3662 eq(len(part1.get_payload()), 1) 3663 part1a = part1.get_payload(0) 3664 eq(part1a.is_multipart(), 0) 3665 eq(part1a.get_content_type(), 'text/plain') 3666 neq(part1a.get_payload(), 'message 1\n') 3667 # next message/rfc822 3668 part2 = msg.get_payload(1) 3669 eq(part2.get_content_type(), 'message/rfc822') 3670 eq(part2.is_multipart(), 1) 3671 eq(len(part2.get_payload()), 1) 3672 part2a = part2.get_payload(0) 3673 eq(part2a.is_multipart(), 0) 3674 eq(part2a.get_content_type(), 'text/plain') 3675 neq(part2a.get_payload(), 'message 2\n') 3676 3677 def test_three_lines(self): 3678 # A bug report by Andrew McNamara 3679 lines = ['From: Andrew Person <aperson (at] dom.ain', 3680 'Subject: Test', 3681 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3682 msg = email.message_from_string(NL.join(lines)) 3683 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3684 3685 def test_strip_line_feed_and_carriage_return_in_headers(self): 3686 eq = self.assertEqual 3687 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3688 value1 = 'text' 3689 value2 = 'more text' 3690 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3691 value1, value2) 3692 msg = email.message_from_string(m) 3693 eq(msg.get('Header'), value1) 3694 eq(msg.get('Next-Header'), value2) 3695 3696 def test_rfc2822_header_syntax(self): 3697 eq = self.assertEqual 3698 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3699 msg = email.message_from_string(m) 3700 eq(len(msg), 3) 3701 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3702 eq(msg.get_payload(), 'body') 3703 3704 def test_rfc2822_space_not_allowed_in_header(self): 3705 eq = self.assertEqual 3706 m = '>From foo (at] example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3707 msg = email.message_from_string(m) 3708 eq(len(msg.keys()), 0) 3709 3710 def test_rfc2822_one_character_header(self): 3711 eq = self.assertEqual 3712 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3713 msg = email.message_from_string(m) 3714 headers = msg.keys() 3715 headers.sort() 3716 eq(headers, ['A', 'B', 'CC']) 3717 eq(msg.get_payload(), 'body') 3718 3719 def test_CRLFLF_at_end_of_part(self): 3720 # issue 5610: feedparser should not eat two chars from body part ending 3721 # with "\r\n\n". 3722 m = ( 3723 "From: foo (at] bar.com\n" 3724 "To: baz\n" 3725 "Mime-Version: 1.0\n" 3726 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3727 "\n" 3728 "--BOUNDARY\n" 3729 "Content-Type: text/plain\n" 3730 "\n" 3731 "body ending with CRLF newline\r\n" 3732 "\n" 3733 "--BOUNDARY--\n" 3734 ) 3735 msg = email.message_from_string(m) 3736 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3737 3738 3739 class Test8BitBytesHandling(TestEmailBase): 3740 # In Python3 all input is string, but that doesn't work if the actual input 3741 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3742 # decode byte streams using the surrogateescape error handler, and 3743 # reconvert to binary at appropriate places if we detect surrogates. This 3744 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3745 # but it does allow us to parse and preserve them, and to decode body 3746 # parts that use an 8bit CTE. 3747 3748 bodytest_msg = textwrap.dedent("""\ 3749 From: foo (at] bar.com 3750 To: baz 3751 Mime-Version: 1.0 3752 Content-Type: text/plain; charset={charset} 3753 Content-Transfer-Encoding: {cte} 3754 3755 {bodyline} 3756 """) 3757 3758 def test_known_8bit_CTE(self): 3759 m = self.bodytest_msg.format(charset='utf-8', 3760 cte='8bit', 3761 bodyline='pstal').encode('utf-8') 3762 msg = email.message_from_bytes(m) 3763 self.assertEqual(msg.get_payload(), "pstal\n") 3764 self.assertEqual(msg.get_payload(decode=True), 3765 "pstal\n".encode('utf-8')) 3766 3767 def test_unknown_8bit_CTE(self): 3768 m = self.bodytest_msg.format(charset='notavalidcharset', 3769 cte='8bit', 3770 bodyline='pstal').encode('utf-8') 3771 msg = email.message_from_bytes(m) 3772 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3773 self.assertEqual(msg.get_payload(decode=True), 3774 "pstal\n".encode('utf-8')) 3775 3776 def test_8bit_in_quopri_body(self): 3777 # This is non-RFC compliant data...without 'decode' the library code 3778 # decodes the body using the charset from the headers, and because the 3779 # source byte really is utf-8 this works. This is likely to fail 3780 # against real dirty data (ie: produce mojibake), but the data is 3781 # invalid anyway so it is as good a guess as any. But this means that 3782 # this test just confirms the current behavior; that behavior is not 3783 # necessarily the best possible behavior. With 'decode' it is 3784 # returning the raw bytes, so that test should be of correct behavior, 3785 # or at least produce the same result that email4 did. 3786 m = self.bodytest_msg.format(charset='utf-8', 3787 cte='quoted-printable', 3788 bodyline='p=C3=B6stl').encode('utf-8') 3789 msg = email.message_from_bytes(m) 3790 self.assertEqual(msg.get_payload(), 'p=C3=B6stl\n') 3791 self.assertEqual(msg.get_payload(decode=True), 3792 'pstl\n'.encode('utf-8')) 3793 3794 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3795 # This is similar to the previous test, but proves that if the 8bit 3796 # byte is undecodeable in the specified charset, it gets replaced 3797 # by the unicode 'unknown' character. Again, this may or may not 3798 # be the ideal behavior. Note that if decode=False none of the 3799 # decoders will get involved, so this is the only test we need 3800 # for this behavior. 3801 m = self.bodytest_msg.format(charset='ascii', 3802 cte='quoted-printable', 3803 bodyline='p=C3=B6stl').encode('utf-8') 3804 msg = email.message_from_bytes(m) 3805 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3806 self.assertEqual(msg.get_payload(decode=True), 3807 'pstl\n'.encode('utf-8')) 3808 3809 # test_defect_handling:test_invalid_chars_in_base64_payload 3810 def test_8bit_in_base64_body(self): 3811 # If we get 8bit bytes in a base64 body, we can just ignore them 3812 # as being outside the base64 alphabet and decode anyway. But 3813 # we register a defect. 3814 m = self.bodytest_msg.format(charset='utf-8', 3815 cte='base64', 3816 bodyline='cMO2c3RhbA=').encode('utf-8') 3817 msg = email.message_from_bytes(m) 3818 self.assertEqual(msg.get_payload(decode=True), 3819 'pstal'.encode('utf-8')) 3820 self.assertIsInstance(msg.defects[0], 3821 errors.InvalidBase64CharactersDefect) 3822 3823 def test_8bit_in_uuencode_body(self): 3824 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3825 # normal means, so the block is returned undecoded, but as bytes. 3826 m = self.bodytest_msg.format(charset='utf-8', 3827 cte='uuencode', 3828 bodyline='<,.V<W1A; ').encode('utf-8') 3829 msg = email.message_from_bytes(m) 3830 self.assertEqual(msg.get_payload(decode=True), 3831 '<,.V<W1A; \n'.encode('utf-8')) 3832 3833 3834 headertest_headers = ( 3835 ('From: foo (at] bar.com', ('From', 'foo (at] bar.com')), 3836 ('To: bz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3837 ('Subject: Maintenant je vous prsente mon collgue, le pouf clbre\n' 3838 '\tJean de Baddie', 3839 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3840 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3841 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3842 ('From: gst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3843 ) 3844 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3845 '\nYes, they are flying.\n').encode('utf-8') 3846 3847 def test_get_8bit_header(self): 3848 msg = email.message_from_bytes(self.headertest_msg) 3849 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3850 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3851 3852 def test_print_8bit_headers(self): 3853 msg = email.message_from_bytes(self.headertest_msg) 3854 self.assertEqual(str(msg), 3855 textwrap.dedent("""\ 3856 From: {} 3857 To: {} 3858 Subject: {} 3859 From: {} 3860 3861 Yes, they are flying. 3862 """).format(*[expected[1] for (_, expected) in 3863 self.headertest_headers])) 3864 3865 def test_values_with_8bit_headers(self): 3866 msg = email.message_from_bytes(self.headertest_msg) 3867 self.assertListEqual([str(x) for x in msg.values()], 3868 ['foo (at] bar.com', 3869 'b\uFFFD\uFFFDz', 3870 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3871 'coll\uFFFD\uFFFDgue, le pouf ' 3872 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3873 '\tJean de Baddie', 3874 "g\uFFFD\uFFFDst"]) 3875 3876 def test_items_with_8bit_headers(self): 3877 msg = email.message_from_bytes(self.headertest_msg) 3878 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3879 [('From', 'foo (at] bar.com'), 3880 ('To', 'b\uFFFD\uFFFDz'), 3881 ('Subject', 'Maintenant je vous ' 3882 'pr\uFFFD\uFFFDsente ' 3883 'mon coll\uFFFD\uFFFDgue, le pouf ' 3884 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3885 '\tJean de Baddie'), 3886 ('From', 'g\uFFFD\uFFFDst')]) 3887 3888 def test_get_all_with_8bit_headers(self): 3889 msg = email.message_from_bytes(self.headertest_msg) 3890 self.assertListEqual([str(x) for x in msg.get_all('from')], 3891 ['foo (at] bar.com', 3892 'g\uFFFD\uFFFDst']) 3893 3894 def test_get_content_type_with_8bit(self): 3895 msg = email.message_from_bytes(textwrap.dedent("""\ 3896 Content-Type: text/pl\xA7in; charset=utf-8 3897 """).encode('latin-1')) 3898 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3899 self.assertEqual(msg.get_content_maintype(), "text") 3900 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3901 3902 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3903 def test_get_params_with_8bit(self): 3904 msg = email.message_from_bytes( 3905 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3906 self.assertEqual(msg.get_params(header='x-header'), 3907 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3908 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3909 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3910 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3911 3912 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3913 def test_get_rfc2231_params_with_8bit(self): 3914 msg = email.message_from_bytes(textwrap.dedent("""\ 3915 Content-Type: text/plain; charset=us-ascii; 3916 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3917 ).encode('latin-1')) 3918 self.assertEqual(msg.get_param('title'), 3919 ('us-ascii', 'en', 'This is not f\uFFFDn')) 3920 3921 def test_set_rfc2231_params_with_8bit(self): 3922 msg = email.message_from_bytes(textwrap.dedent("""\ 3923 Content-Type: text/plain; charset=us-ascii; 3924 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3925 ).encode('latin-1')) 3926 msg.set_param('title', 'test') 3927 self.assertEqual(msg.get_param('title'), 'test') 3928 3929 def test_del_rfc2231_params_with_8bit(self): 3930 msg = email.message_from_bytes(textwrap.dedent("""\ 3931 Content-Type: text/plain; charset=us-ascii; 3932 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3933 ).encode('latin-1')) 3934 msg.del_param('title') 3935 self.assertEqual(msg.get_param('title'), None) 3936 self.assertEqual(msg.get_content_maintype(), 'text') 3937 3938 def test_get_payload_with_8bit_cte_header(self): 3939 msg = email.message_from_bytes(textwrap.dedent("""\ 3940 Content-Transfer-Encoding: b\xa7se64 3941 Content-Type: text/plain; charset=latin-1 3942 3943 payload 3944 """).encode('latin-1')) 3945 self.assertEqual(msg.get_payload(), 'payload\n') 3946 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 3947 3948 non_latin_bin_msg = textwrap.dedent("""\ 3949 From: foo (at] bar.com 3950 To: bz 3951 Subject: Maintenant je vous prsente mon collgue, le pouf clbre 3952 \tJean de Baddie 3953 Mime-Version: 1.0 3954 Content-Type: text/plain; charset="utf-8" 3955 Content-Transfer-Encoding: 8bit 3956 3957 , . 3958 """).encode('utf-8') 3959 3960 def test_bytes_generator(self): 3961 msg = email.message_from_bytes(self.non_latin_bin_msg) 3962 out = BytesIO() 3963 email.generator.BytesGenerator(out).flatten(msg) 3964 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 3965 3966 def test_bytes_generator_handles_None_body(self): 3967 #Issue 11019 3968 msg = email.message.Message() 3969 out = BytesIO() 3970 email.generator.BytesGenerator(out).flatten(msg) 3971 self.assertEqual(out.getvalue(), b"\n") 3972 3973 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 3974 From: foo (at] bar.com 3975 To: =?unknown-8bit?q?b=C3=A1z?= 3976 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 3977 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 3978 =?unknown-8bit?q?_Jean_de_Baddie?= 3979 Mime-Version: 1.0 3980 Content-Type: text/plain; charset="utf-8" 3981 Content-Transfer-Encoding: base64 3982 3983 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 3984 """) 3985 3986 def test_generator_handles_8bit(self): 3987 msg = email.message_from_bytes(self.non_latin_bin_msg) 3988 out = StringIO() 3989 email.generator.Generator(out).flatten(msg) 3990 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 3991 3992 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 3993 msg = email.message_from_bytes(self.non_latin_bin_msg) 3994 out = BytesIO() 3995 BytesGenerator(out).flatten(msg) 3996 orig_value = out.getvalue() 3997 Generator(StringIO()).flatten(msg) # Should not mutate msg! 3998 out = BytesIO() 3999 BytesGenerator(out).flatten(msg) 4000 self.assertEqual(out.getvalue(), orig_value) 4001 4002 def test_bytes_generator_with_unix_from(self): 4003 # The unixfrom contains a current date, so we can't check it 4004 # literally. Just make sure the first word is 'From' and the 4005 # rest of the message matches the input. 4006 msg = email.message_from_bytes(self.non_latin_bin_msg) 4007 out = BytesIO() 4008 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4009 lines = out.getvalue().split(b'\n') 4010 self.assertEqual(lines[0].split()[0], b'From') 4011 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4012 4013 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4014 non_latin_bin_msg_as7bit[2:4] = [ 4015 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4016 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4017 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4018 4019 def test_message_from_binary_file(self): 4020 fn = 'test.msg' 4021 self.addCleanup(unlink, fn) 4022 with open(fn, 'wb') as testfile: 4023 testfile.write(self.non_latin_bin_msg) 4024 with open(fn, 'rb') as testfile: 4025 m = email.parser.BytesParser().parse(testfile) 4026 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4027 4028 latin_bin_msg = textwrap.dedent("""\ 4029 From: foo (at] bar.com 4030 To: Dinsdale 4031 Subject: Nudge nudge, wink, wink 4032 Mime-Version: 1.0 4033 Content-Type: text/plain; charset="latin-1" 4034 Content-Transfer-Encoding: 8bit 4035 4036 oh l l, know what I mean, know what I mean? 4037 """).encode('latin-1') 4038 4039 latin_bin_msg_as7bit = textwrap.dedent("""\ 4040 From: foo (at] bar.com 4041 To: Dinsdale 4042 Subject: Nudge nudge, wink, wink 4043 Mime-Version: 1.0 4044 Content-Type: text/plain; charset="iso-8859-1" 4045 Content-Transfer-Encoding: quoted-printable 4046 4047 oh l=E0 l=E0, know what I mean, know what I mean? 4048 """) 4049 4050 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4051 m = email.message_from_bytes(self.latin_bin_msg) 4052 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4053 4054 def test_decoded_generator_emits_unicode_body(self): 4055 m = email.message_from_bytes(self.latin_bin_msg) 4056 out = StringIO() 4057 email.generator.DecodedGenerator(out).flatten(m) 4058 #DecodedHeader output contains an extra blank line compared 4059 #to the input message. RDM: not sure if this is a bug or not, 4060 #but it is not specific to the 8bit->7bit conversion. 4061 self.assertEqual(out.getvalue(), 4062 self.latin_bin_msg.decode('latin-1')+'\n') 4063 4064 def test_bytes_feedparser(self): 4065 bfp = email.feedparser.BytesFeedParser() 4066 for i in range(0, len(self.latin_bin_msg), 10): 4067 bfp.feed(self.latin_bin_msg[i:i+10]) 4068 m = bfp.close() 4069 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4070 4071 def test_crlf_flatten(self): 4072 with openfile('msg_26.txt', 'rb') as fp: 4073 text = fp.read() 4074 msg = email.message_from_bytes(text) 4075 s = BytesIO() 4076 g = email.generator.BytesGenerator(s) 4077 g.flatten(msg, linesep='\r\n') 4078 self.assertEqual(s.getvalue(), text) 4079 4080 def test_8bit_multipart(self): 4081 # Issue 11605 4082 source = textwrap.dedent("""\ 4083 Date: Fri, 18 Mar 2011 17:15:43 +0100 4084 To: foo (at] example.com 4085 From: foodwatch-Newsletter <bar (at] example.com> 4086 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4087 Message-ID: <76a486bee62b0d200f33dc2ca08220ad (at] localhost.localdomain> 4088 MIME-Version: 1.0 4089 Content-Type: multipart/alternative; 4090 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4091 4092 --b1_76a486bee62b0d200f33dc2ca08220ad 4093 Content-Type: text/plain; charset="utf-8" 4094 Content-Transfer-Encoding: 8bit 4095 4096 Guten Tag, , 4097 4098 mit groer Betroffenheit verfolgen auch wir im foodwatch-Team die 4099 Nachrichten aus Japan. 4100 4101 4102 --b1_76a486bee62b0d200f33dc2ca08220ad 4103 Content-Type: text/html; charset="utf-8" 4104 Content-Transfer-Encoding: 8bit 4105 4106 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4107 "http://www.w3.org/TR/html4/loose.dtd"> 4108 <html lang="de"> 4109 <head> 4110 <title>foodwatch - Newsletter</title> 4111 </head> 4112 <body> 4113 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4114 die Nachrichten aus Japan.</p> 4115 </body> 4116 </html> 4117 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4118 4119 """).encode('utf-8') 4120 msg = email.message_from_bytes(source) 4121 s = BytesIO() 4122 g = email.generator.BytesGenerator(s) 4123 g.flatten(msg) 4124 self.assertEqual(s.getvalue(), source) 4125 4126 def test_bytes_generator_b_encoding_linesep(self): 4127 # Issue 14062: b encoding was tacking on an extra \n. 4128 m = Message() 4129 # This has enough non-ascii that it should always end up b encoded. 4130 m['Subject'] = Header('luouk k') 4131 s = BytesIO() 4132 g = email.generator.BytesGenerator(s) 4133 g.flatten(m, linesep='\r\n') 4134 self.assertEqual( 4135 s.getvalue(), 4136 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4137 4138 def test_generator_b_encoding_linesep(self): 4139 # Since this broke in ByteGenerator, test Generator for completeness. 4140 m = Message() 4141 # This has enough non-ascii that it should always end up b encoded. 4142 m['Subject'] = Header('luouk k') 4143 s = StringIO() 4144 g = email.generator.Generator(s) 4145 g.flatten(m, linesep='\r\n') 4146 self.assertEqual( 4147 s.getvalue(), 4148 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4149 4150 maxDiff = None 4151 4152 4153 class BaseTestBytesGeneratorIdempotent: 4154 4155 maxDiff = None 4156 4157 def _msgobj(self, filename): 4158 with openfile(filename, 'rb') as fp: 4159 data = fp.read() 4160 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4161 msg = email.message_from_bytes(data) 4162 return msg, data 4163 4164 def _idempotent(self, msg, data, unixfrom=False): 4165 b = BytesIO() 4166 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4167 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4168 self.assertEqual(data, b.getvalue()) 4169 4170 4171 class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4172 TestIdempotent): 4173 linesep = '\n' 4174 blinesep = b'\n' 4175 normalize_linesep_regex = re.compile(br'\r\n') 4176 4177 4178 class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4179 TestIdempotent): 4180 linesep = '\r\n' 4181 blinesep = b'\r\n' 4182 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4183 4184 4185 class TestBase64(unittest.TestCase): 4186 def test_len(self): 4187 eq = self.assertEqual 4188 eq(base64mime.header_length('hello'), 4189 len(base64mime.body_encode(b'hello', eol=''))) 4190 for size in range(15): 4191 if size == 0 : bsize = 0 4192 elif size <= 3 : bsize = 4 4193 elif size <= 6 : bsize = 8 4194 elif size <= 9 : bsize = 12 4195 elif size <= 12: bsize = 16 4196 else : bsize = 20 4197 eq(base64mime.header_length('x' * size), bsize) 4198 4199 def test_decode(self): 4200 eq = self.assertEqual 4201 eq(base64mime.decode(''), b'') 4202 eq(base64mime.decode('aGVsbG8='), b'hello') 4203 4204 def test_encode(self): 4205 eq = self.assertEqual 4206 eq(base64mime.body_encode(b''), b'') 4207 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4208 # Test the binary flag 4209 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4210 # Test the maxlinelen arg 4211 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4212 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4213 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4214 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4215 eHh4eCB4eHh4IA== 4216 """) 4217 # Test the eol argument 4218 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4219 """\ 4220 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4221 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4222 eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4223 eHh4eCB4eHh4IA==\r 4224 """) 4225 4226 def test_header_encode(self): 4227 eq = self.assertEqual 4228 he = base64mime.header_encode 4229 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4230 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4231 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4232 # Test the charset option 4233 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4234 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4235 4236 4237 4238 class TestQuopri(unittest.TestCase): 4239 def setUp(self): 4240 # Set of characters (as byte integers) that don't need to be encoded 4241 # in headers. 4242 self.hlit = list(chain( 4243 range(ord('a'), ord('z') + 1), 4244 range(ord('A'), ord('Z') + 1), 4245 range(ord('0'), ord('9') + 1), 4246 (c for c in b'!*+-/'))) 4247 # Set of characters (as byte integers) that do need to be encoded in 4248 # headers. 4249 self.hnon = [c for c in range(256) if c not in self.hlit] 4250 assert len(self.hlit) + len(self.hnon) == 256 4251 # Set of characters (as byte integers) that don't need to be encoded 4252 # in bodies. 4253 self.blit = list(range(ord(' '), ord('~') + 1)) 4254 self.blit.append(ord('\t')) 4255 self.blit.remove(ord('=')) 4256 # Set of characters (as byte integers) that do need to be encoded in 4257 # bodies. 4258 self.bnon = [c for c in range(256) if c not in self.blit] 4259 assert len(self.blit) + len(self.bnon) == 256 4260 4261 def test_quopri_header_check(self): 4262 for c in self.hlit: 4263 self.assertFalse(quoprimime.header_check(c), 4264 'Should not be header quopri encoded: %s' % chr(c)) 4265 for c in self.hnon: 4266 self.assertTrue(quoprimime.header_check(c), 4267 'Should be header quopri encoded: %s' % chr(c)) 4268 4269 def test_quopri_body_check(self): 4270 for c in self.blit: 4271 self.assertFalse(quoprimime.body_check(c), 4272 'Should not be body quopri encoded: %s' % chr(c)) 4273 for c in self.bnon: 4274 self.assertTrue(quoprimime.body_check(c), 4275 'Should be body quopri encoded: %s' % chr(c)) 4276 4277 def test_header_quopri_len(self): 4278 eq = self.assertEqual 4279 eq(quoprimime.header_length(b'hello'), 5) 4280 # RFC 2047 chrome is not included in header_length(). 4281 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4282 quoprimime.header_length(b'hello') + 4283 # =?xxx?q?...?= means 10 extra characters 4284 10) 4285 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4286 # RFC 2047 chrome is not included in header_length(). 4287 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4288 quoprimime.header_length(b'h@e@l@l@o@') + 4289 # =?xxx?q?...?= means 10 extra characters 4290 10) 4291 for c in self.hlit: 4292 eq(quoprimime.header_length(bytes([c])), 1, 4293 'expected length 1 for %r' % chr(c)) 4294 for c in self.hnon: 4295 # Space is special; it's encoded to _ 4296 if c == ord(' '): 4297 continue 4298 eq(quoprimime.header_length(bytes([c])), 3, 4299 'expected length 3 for %r' % chr(c)) 4300 eq(quoprimime.header_length(b' '), 1) 4301 4302 def test_body_quopri_len(self): 4303 eq = self.assertEqual 4304 for c in self.blit: 4305 eq(quoprimime.body_length(bytes([c])), 1) 4306 for c in self.bnon: 4307 eq(quoprimime.body_length(bytes([c])), 3) 4308 4309 def test_quote_unquote_idempotent(self): 4310 for x in range(256): 4311 c = chr(x) 4312 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4313 4314 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4315 if charset is None: 4316 encoded_header = quoprimime.header_encode(header) 4317 else: 4318 encoded_header = quoprimime.header_encode(header, charset) 4319 self.assertEqual(encoded_header, expected_encoded_header) 4320 4321 def test_header_encode_null(self): 4322 self._test_header_encode(b'', '') 4323 4324 def test_header_encode_one_word(self): 4325 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4326 4327 def test_header_encode_two_lines(self): 4328 self._test_header_encode(b'hello\nworld', 4329 '=?iso-8859-1?q?hello=0Aworld?=') 4330 4331 def test_header_encode_non_ascii(self): 4332 self._test_header_encode(b'hello\xc7there', 4333 '=?iso-8859-1?q?hello=C7there?=') 4334 4335 def test_header_encode_alt_charset(self): 4336 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4337 charset='iso-8859-2') 4338 4339 def _test_header_decode(self, encoded_header, expected_decoded_header): 4340 decoded_header = quoprimime.header_decode(encoded_header) 4341 self.assertEqual(decoded_header, expected_decoded_header) 4342 4343 def test_header_decode_null(self): 4344 self._test_header_decode('', '') 4345 4346 def test_header_decode_one_word(self): 4347 self._test_header_decode('hello', 'hello') 4348 4349 def test_header_decode_two_lines(self): 4350 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4351 4352 def test_header_decode_non_ascii(self): 4353 self._test_header_decode('hello=C7there', 'hello\xc7there') 4354 4355 def test_header_decode_re_bug_18380(self): 4356 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4357 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4358 4359 def _test_decode(self, encoded, expected_decoded, eol=None): 4360 if eol is None: 4361 decoded = quoprimime.decode(encoded) 4362 else: 4363 decoded = quoprimime.decode(encoded, eol=eol) 4364 self.assertEqual(decoded, expected_decoded) 4365 4366 def test_decode_null_word(self): 4367 self._test_decode('', '') 4368 4369 def test_decode_null_line_null_word(self): 4370 self._test_decode('\r\n', '\n') 4371 4372 def test_decode_one_word(self): 4373 self._test_decode('hello', 'hello') 4374 4375 def test_decode_one_word_eol(self): 4376 self._test_decode('hello', 'hello', eol='X') 4377 4378 def test_decode_one_line(self): 4379 self._test_decode('hello\r\n', 'hello\n') 4380 4381 def test_decode_one_line_lf(self): 4382 self._test_decode('hello\n', 'hello\n') 4383 4384 def test_decode_one_line_cr(self): 4385 self._test_decode('hello\r', 'hello\n') 4386 4387 def test_decode_one_line_nl(self): 4388 self._test_decode('hello\n', 'helloX', eol='X') 4389 4390 def test_decode_one_line_crnl(self): 4391 self._test_decode('hello\r\n', 'helloX', eol='X') 4392 4393 def test_decode_one_line_one_word(self): 4394 self._test_decode('hello\r\nworld', 'hello\nworld') 4395 4396 def test_decode_one_line_one_word_eol(self): 4397 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4398 4399 def test_decode_two_lines(self): 4400 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4401 4402 def test_decode_two_lines_eol(self): 4403 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4404 4405 def test_decode_one_long_line(self): 4406 self._test_decode('Spam' * 250, 'Spam' * 250) 4407 4408 def test_decode_one_space(self): 4409 self._test_decode(' ', '') 4410 4411 def test_decode_multiple_spaces(self): 4412 self._test_decode(' ' * 5, '') 4413 4414 def test_decode_one_line_trailing_spaces(self): 4415 self._test_decode('hello \r\n', 'hello\n') 4416 4417 def test_decode_two_lines_trailing_spaces(self): 4418 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4419 4420 def test_decode_quoted_word(self): 4421 self._test_decode('=22quoted=20words=22', '"quoted words"') 4422 4423 def test_decode_uppercase_quoting(self): 4424 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4425 4426 def test_decode_lowercase_quoting(self): 4427 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4428 4429 def test_decode_soft_line_break(self): 4430 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4431 4432 def test_decode_false_quoting(self): 4433 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4434 4435 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4436 kwargs = {} 4437 if maxlinelen is None: 4438 # Use body_encode's default. 4439 maxlinelen = 76 4440 else: 4441 kwargs['maxlinelen'] = maxlinelen 4442 if eol is None: 4443 # Use body_encode's default. 4444 eol = '\n' 4445 else: 4446 kwargs['eol'] = eol 4447 encoded_body = quoprimime.body_encode(body, **kwargs) 4448 self.assertEqual(encoded_body, expected_encoded_body) 4449 if eol == '\n' or eol == '\r\n': 4450 # We know how to split the result back into lines, so maxlinelen 4451 # can be checked. 4452 for line in encoded_body.splitlines(): 4453 self.assertLessEqual(len(line), maxlinelen) 4454 4455 def test_encode_null(self): 4456 self._test_encode('', '') 4457 4458 def test_encode_null_lines(self): 4459 self._test_encode('\n\n', '\n\n') 4460 4461 def test_encode_one_line(self): 4462 self._test_encode('hello\n', 'hello\n') 4463 4464 def test_encode_one_line_crlf(self): 4465 self._test_encode('hello\r\n', 'hello\n') 4466 4467 def test_encode_one_line_eol(self): 4468 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4469 4470 def test_encode_one_line_eol_after_non_ascii(self): 4471 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4472 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4473 'hello=CF=85\r\n', eol='\r\n') 4474 4475 def test_encode_one_space(self): 4476 self._test_encode(' ', '=20') 4477 4478 def test_encode_one_line_one_space(self): 4479 self._test_encode(' \n', '=20\n') 4480 4481 # XXX: body_encode() expect strings, but uses ord(char) from these strings 4482 # to index into a 256-entry list. For code points above 255, this will fail. 4483 # Should there be a check for 8-bit only ord() values in body, or at least 4484 # a comment about the expected input? 4485 4486 def test_encode_two_lines_one_space(self): 4487 self._test_encode(' \n \n', '=20\n=20\n') 4488 4489 def test_encode_one_word_trailing_spaces(self): 4490 self._test_encode('hello ', 'hello =20') 4491 4492 def test_encode_one_line_trailing_spaces(self): 4493 self._test_encode('hello \n', 'hello =20\n') 4494 4495 def test_encode_one_word_trailing_tab(self): 4496 self._test_encode('hello \t', 'hello =09') 4497 4498 def test_encode_one_line_trailing_tab(self): 4499 self._test_encode('hello \t\n', 'hello =09\n') 4500 4501 def test_encode_trailing_space_before_maxlinelen(self): 4502 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4503 4504 def test_encode_trailing_space_at_maxlinelen(self): 4505 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4506 4507 def test_encode_trailing_space_beyond_maxlinelen(self): 4508 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4509 4510 def test_encode_whitespace_lines(self): 4511 self._test_encode(' \n' * 5, '=20\n' * 5) 4512 4513 def test_encode_quoted_equals(self): 4514 self._test_encode('a = b', 'a =3D b') 4515 4516 def test_encode_one_long_string(self): 4517 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4518 4519 def test_encode_one_long_line(self): 4520 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4521 4522 def test_encode_one_very_long_line(self): 4523 self._test_encode('x' * 200 + '\n', 4524 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4525 4526 def test_encode_shortest_maxlinelen(self): 4527 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4528 4529 def test_encode_maxlinelen_too_small(self): 4530 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4531 4532 def test_encode(self): 4533 eq = self.assertEqual 4534 eq(quoprimime.body_encode(''), '') 4535 eq(quoprimime.body_encode('hello'), 'hello') 4536 # Test the binary flag 4537 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4538 # Test the maxlinelen arg 4539 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4540 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4541 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4542 x xxxx xxxx xxxx xxxx=20""") 4543 # Test the eol argument 4544 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4545 """\ 4546 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4547 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4548 x xxxx xxxx xxxx xxxx=20""") 4549 eq(quoprimime.body_encode("""\ 4550 one line 4551 4552 two line"""), """\ 4553 one line 4554 4555 two line""") 4556 4557 4558 4559 # Test the Charset class 4560 class TestCharset(unittest.TestCase): 4561 def tearDown(self): 4562 from email import charset as CharsetModule 4563 try: 4564 del CharsetModule.CHARSETS['fake'] 4565 except KeyError: 4566 pass 4567 4568 def test_codec_encodeable(self): 4569 eq = self.assertEqual 4570 # Make sure us-ascii = no Unicode conversion 4571 c = Charset('us-ascii') 4572 eq(c.header_encode('Hello World!'), 'Hello World!') 4573 # Test 8-bit idempotency with us-ascii 4574 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4575 self.assertRaises(UnicodeError, c.header_encode, s) 4576 c = Charset('utf-8') 4577 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4578 4579 def test_body_encode(self): 4580 eq = self.assertEqual 4581 # Try a charset with QP body encoding 4582 c = Charset('iso-8859-1') 4583 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4584 # Try a charset with Base64 body encoding 4585 c = Charset('utf-8') 4586 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4587 # Try a charset with None body encoding 4588 c = Charset('us-ascii') 4589 eq('hello world', c.body_encode('hello world')) 4590 # Try the convert argument, where input codec != output codec 4591 c = Charset('euc-jp') 4592 # With apologies to Tokio Kikuchi ;) 4593 # XXX FIXME 4594 ## try: 4595 ## eq('\x1b$B5FCO;~IW\x1b(B', 4596 ## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4597 ## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4598 ## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4599 ## except LookupError: 4600 ## # We probably don't have the Japanese codecs installed 4601 ## pass 4602 # Testing SF bug #625509, which we have to fake, since there are no 4603 # built-in encodings where the header encoding is QP but the body 4604 # encoding is not. 4605 from email import charset as CharsetModule 4606 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4607 c = Charset('fake') 4608 eq('hello world', c.body_encode('hello world')) 4609 4610 def test_unicode_charset_name(self): 4611 charset = Charset('us-ascii') 4612 self.assertEqual(str(charset), 'us-ascii') 4613 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4614 4615 4616 4617 # Test multilingual MIME headers. 4618 class TestHeader(TestEmailBase): 4619 def test_simple(self): 4620 eq = self.ndiffAssertEqual 4621 h = Header('Hello World!') 4622 eq(h.encode(), 'Hello World!') 4623 h.append(' Goodbye World!') 4624 eq(h.encode(), 'Hello World! Goodbye World!') 4625 4626 def test_simple_surprise(self): 4627 eq = self.ndiffAssertEqual 4628 h = Header('Hello World!') 4629 eq(h.encode(), 'Hello World!') 4630 h.append('Goodbye World!') 4631 eq(h.encode(), 'Hello World! Goodbye World!') 4632 4633 def test_header_needs_no_decoding(self): 4634 h = 'no decoding needed' 4635 self.assertEqual(decode_header(h), [(h, None)]) 4636 4637 def test_long(self): 4638 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4639 maxlinelen=76) 4640 for l in h.encode(splitchars=' ').split('\n '): 4641 self.assertLessEqual(len(l), 76) 4642 4643 def test_multilingual(self): 4644 eq = self.ndiffAssertEqual 4645 g = Charset("iso-8859-1") 4646 cz = Charset("iso-8859-2") 4647 utf8 = Charset("utf-8") 4648 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4649 b'Foerderband komfortabel den Korridor entlang, ' 4650 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4651 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4652 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4653 b'd\xf9vtipu.. ') 4654 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4655 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4656 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4657 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4658 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4659 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4660 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4661 '\u3044\u307e\u3059\u3002') 4662 h = Header(g_head, g) 4663 h.append(cz_head, cz) 4664 h.append(utf8_head, utf8) 4665 enc = h.encode(maxlinelen=76) 4666 eq(enc, """\ 4667 =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4668 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4669 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4670 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4671 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4672 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4673 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4674 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4675 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4676 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4677 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4678 decoded = decode_header(enc) 4679 eq(len(decoded), 3) 4680 eq(decoded[0], (g_head, 'iso-8859-1')) 4681 eq(decoded[1], (cz_head, 'iso-8859-2')) 4682 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4683 ustr = str(h) 4684 eq(ustr, 4685 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4686 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4687 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4688 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4689 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4690 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4691 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4692 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4693 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4694 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4695 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4696 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4697 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4698 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4699 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4700 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4701 ).decode('utf-8')) 4702 # Test make_header() 4703 newh = make_header(decode_header(enc)) 4704 eq(newh, h) 4705 4706 def test_empty_header_encode(self): 4707 h = Header() 4708 self.assertEqual(h.encode(), '') 4709 4710 def test_header_ctor_default_args(self): 4711 eq = self.ndiffAssertEqual 4712 h = Header() 4713 eq(h, '') 4714 h.append('foo', Charset('iso-8859-1')) 4715 eq(h, 'foo') 4716 4717 def test_explicit_maxlinelen(self): 4718 eq = self.ndiffAssertEqual 4719 hstr = ('A very long line that must get split to something other ' 4720 'than at the 76th character boundary to test the non-default ' 4721 'behavior') 4722 h = Header(hstr) 4723 eq(h.encode(), '''\ 4724 A very long line that must get split to something other than at the 76th 4725 character boundary to test the non-default behavior''') 4726 eq(str(h), hstr) 4727 h = Header(hstr, header_name='Subject') 4728 eq(h.encode(), '''\ 4729 A very long line that must get split to something other than at the 4730 76th character boundary to test the non-default behavior''') 4731 eq(str(h), hstr) 4732 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4733 eq(h.encode(), hstr) 4734 eq(str(h), hstr) 4735 4736 def test_quopri_splittable(self): 4737 eq = self.ndiffAssertEqual 4738 h = Header(charset='iso-8859-1', maxlinelen=20) 4739 x = 'xxxx ' * 20 4740 h.append(x) 4741 s = h.encode() 4742 eq(s, """\ 4743 =?iso-8859-1?q?xxx?= 4744 =?iso-8859-1?q?x_?= 4745 =?iso-8859-1?q?xx?= 4746 =?iso-8859-1?q?xx?= 4747 =?iso-8859-1?q?_x?= 4748 =?iso-8859-1?q?xx?= 4749 =?iso-8859-1?q?x_?= 4750 =?iso-8859-1?q?xx?= 4751 =?iso-8859-1?q?xx?= 4752 =?iso-8859-1?q?_x?= 4753 =?iso-8859-1?q?xx?= 4754 =?iso-8859-1?q?x_?= 4755 =?iso-8859-1?q?xx?= 4756 =?iso-8859-1?q?xx?= 4757 =?iso-8859-1?q?_x?= 4758 =?iso-8859-1?q?xx?= 4759 =?iso-8859-1?q?x_?= 4760 =?iso-8859-1?q?xx?= 4761 =?iso-8859-1?q?xx?= 4762 =?iso-8859-1?q?_x?= 4763 =?iso-8859-1?q?xx?= 4764 =?iso-8859-1?q?x_?= 4765 =?iso-8859-1?q?xx?= 4766 =?iso-8859-1?q?xx?= 4767 =?iso-8859-1?q?_x?= 4768 =?iso-8859-1?q?xx?= 4769 =?iso-8859-1?q?x_?= 4770 =?iso-8859-1?q?xx?= 4771 =?iso-8859-1?q?xx?= 4772 =?iso-8859-1?q?_x?= 4773 =?iso-8859-1?q?xx?= 4774 =?iso-8859-1?q?x_?= 4775 =?iso-8859-1?q?xx?= 4776 =?iso-8859-1?q?xx?= 4777 =?iso-8859-1?q?_x?= 4778 =?iso-8859-1?q?xx?= 4779 =?iso-8859-1?q?x_?= 4780 =?iso-8859-1?q?xx?= 4781 =?iso-8859-1?q?xx?= 4782 =?iso-8859-1?q?_x?= 4783 =?iso-8859-1?q?xx?= 4784 =?iso-8859-1?q?x_?= 4785 =?iso-8859-1?q?xx?= 4786 =?iso-8859-1?q?xx?= 4787 =?iso-8859-1?q?_x?= 4788 =?iso-8859-1?q?xx?= 4789 =?iso-8859-1?q?x_?= 4790 =?iso-8859-1?q?xx?= 4791 =?iso-8859-1?q?xx?= 4792 =?iso-8859-1?q?_?=""") 4793 eq(x, str(make_header(decode_header(s)))) 4794 h = Header(charset='iso-8859-1', maxlinelen=40) 4795 h.append('xxxx ' * 20) 4796 s = h.encode() 4797 eq(s, """\ 4798 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4799 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4800 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4801 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4802 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4803 eq(x, str(make_header(decode_header(s)))) 4804 4805 def test_base64_splittable(self): 4806 eq = self.ndiffAssertEqual 4807 h = Header(charset='koi8-r', maxlinelen=20) 4808 x = 'xxxx ' * 20 4809 h.append(x) 4810 s = h.encode() 4811 eq(s, """\ 4812 =?koi8-r?b?eHh4?= 4813 =?koi8-r?b?eCB4?= 4814 =?koi8-r?b?eHh4?= 4815 =?koi8-r?b?IHh4?= 4816 =?koi8-r?b?eHgg?= 4817 =?koi8-r?b?eHh4?= 4818 =?koi8-r?b?eCB4?= 4819 =?koi8-r?b?eHh4?= 4820 =?koi8-r?b?IHh4?= 4821 =?koi8-r?b?eHgg?= 4822 =?koi8-r?b?eHh4?= 4823 =?koi8-r?b?eCB4?= 4824 =?koi8-r?b?eHh4?= 4825 =?koi8-r?b?IHh4?= 4826 =?koi8-r?b?eHgg?= 4827 =?koi8-r?b?eHh4?= 4828 =?koi8-r?b?eCB4?= 4829 =?koi8-r?b?eHh4?= 4830 =?koi8-r?b?IHh4?= 4831 =?koi8-r?b?eHgg?= 4832 =?koi8-r?b?eHh4?= 4833 =?koi8-r?b?eCB4?= 4834 =?koi8-r?b?eHh4?= 4835 =?koi8-r?b?IHh4?= 4836 =?koi8-r?b?eHgg?= 4837 =?koi8-r?b?eHh4?= 4838 =?koi8-r?b?eCB4?= 4839 =?koi8-r?b?eHh4?= 4840 =?koi8-r?b?IHh4?= 4841 =?koi8-r?b?eHgg?= 4842 =?koi8-r?b?eHh4?= 4843 =?koi8-r?b?eCB4?= 4844 =?koi8-r?b?eHh4?= 4845 =?koi8-r?b?IA==?=""") 4846 eq(x, str(make_header(decode_header(s)))) 4847 h = Header(charset='koi8-r', maxlinelen=40) 4848 h.append(x) 4849 s = h.encode() 4850 eq(s, """\ 4851 =?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4852 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4853 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4854 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4855 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4856 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4857 eq(x, str(make_header(decode_header(s)))) 4858 4859 def test_us_ascii_header(self): 4860 eq = self.assertEqual 4861 s = 'hello' 4862 x = decode_header(s) 4863 eq(x, [('hello', None)]) 4864 h = make_header(x) 4865 eq(s, h.encode()) 4866 4867 def test_string_charset(self): 4868 eq = self.assertEqual 4869 h = Header() 4870 h.append('hello', 'iso-8859-1') 4871 eq(h, 'hello') 4872 4873 ## def test_unicode_error(self): 4874 ## raises = self.assertRaises 4875 ## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4876 ## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4877 ## h = Header() 4878 ## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4879 ## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4880 ## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4881 4882 def test_utf8_shortest(self): 4883 eq = self.assertEqual 4884 h = Header('p\xf6stal', 'utf-8') 4885 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4886 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4887 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4888 4889 def test_bad_8bit_header(self): 4890 raises = self.assertRaises 4891 eq = self.assertEqual 4892 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4893 raises(UnicodeError, Header, x) 4894 h = Header() 4895 raises(UnicodeError, h.append, x) 4896 e = x.decode('utf-8', 'replace') 4897 eq(str(Header(x, errors='replace')), e) 4898 h.append(x, errors='replace') 4899 eq(str(h), e) 4900 4901 def test_escaped_8bit_header(self): 4902 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4903 e = x.decode('ascii', 'surrogateescape') 4904 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4905 self.assertEqual(str(h), 4906 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4907 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4908 4909 def test_header_handles_binary_unknown8bit(self): 4910 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4911 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4912 self.assertEqual(str(h), 4913 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4914 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4915 4916 def test_make_header_handles_binary_unknown8bit(self): 4917 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4918 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4919 h2 = email.header.make_header(email.header.decode_header(h)) 4920 self.assertEqual(str(h2), 4921 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4922 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 4923 4924 def test_modify_returned_list_does_not_change_header(self): 4925 h = Header('test') 4926 chunks = email.header.decode_header(h) 4927 chunks.append(('ascii', 'test2')) 4928 self.assertEqual(str(h), 'test') 4929 4930 def test_encoded_adjacent_nonencoded(self): 4931 eq = self.assertEqual 4932 h = Header() 4933 h.append('hello', 'iso-8859-1') 4934 h.append('world') 4935 s = h.encode() 4936 eq(s, '=?iso-8859-1?q?hello?= world') 4937 h = make_header(decode_header(s)) 4938 eq(h.encode(), s) 4939 4940 def test_whitespace_keeper(self): 4941 eq = self.assertEqual 4942 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 4943 parts = decode_header(s) 4944 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 4945 hdr = make_header(parts) 4946 eq(hdr.encode(), 4947 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 4948 4949 def test_broken_base64_header(self): 4950 raises = self.assertRaises 4951 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 4952 raises(errors.HeaderParseError, decode_header, s) 4953 4954 def test_shift_jis_charset(self): 4955 h = Header('', charset='shift_jis') 4956 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 4957 4958 def test_flatten_header_with_no_value(self): 4959 # Issue 11401 (regression from email 4.x) Note that the space after 4960 # the header doesn't reflect the input, but this is also the way 4961 # email 4.x behaved. At some point it would be nice to fix that. 4962 msg = email.message_from_string("EmptyHeader:") 4963 self.assertEqual(str(msg), "EmptyHeader: \n\n") 4964 4965 def test_encode_preserves_leading_ws_on_value(self): 4966 msg = Message() 4967 msg['SomeHeader'] = ' value with leading ws' 4968 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 4969 4970 4971 4972 # Test RFC 2231 header parameters (en/de)coding 4973 class TestRFC2231(TestEmailBase): 4974 4975 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 4976 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 4977 def test_get_param(self): 4978 eq = self.assertEqual 4979 msg = self._msgobj('msg_29.txt') 4980 eq(msg.get_param('title'), 4981 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 4982 eq(msg.get_param('title', unquote=False), 4983 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 4984 4985 def test_set_param(self): 4986 eq = self.ndiffAssertEqual 4987 msg = Message() 4988 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 4989 charset='us-ascii') 4990 eq(msg.get_param('title'), 4991 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 4992 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 4993 charset='us-ascii', language='en') 4994 eq(msg.get_param('title'), 4995 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 4996 msg = self._msgobj('msg_01.txt') 4997 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 4998 charset='us-ascii', language='en') 4999 eq(msg.as_string(maxheaderlen=78), """\ 5000 Return-Path: <bbb (at] zzz.org> 5001 Delivered-To: bbb (at] zzz.org 5002 Received: by mail.zzz.org (Postfix, from userid 889) 5003 \tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5004 MIME-Version: 1.0 5005 Content-Transfer-Encoding: 7bit 5006 Message-ID: <15090.61304.110929.45684 (at] aaa.zzz.org> 5007 From: bbb (at] ddd.com (John X. Doe) 5008 To: bbb (at] zzz.org 5009 Subject: This is a test message 5010 Date: Fri, 4 May 2001 14:05:44 -0400 5011 Content-Type: text/plain; charset=us-ascii; 5012 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5013 5014 5015 Hi, 5016 5017 Do you like this message? 5018 5019 -Me 5020 """) 5021 5022 def test_set_param_requote(self): 5023 msg = Message() 5024 msg.set_param('title', 'foo') 5025 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5026 msg.set_param('title', 'bar', requote=False) 5027 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5028 # tspecial is still quoted. 5029 msg.set_param('title', "(bar)bell", requote=False) 5030 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5031 5032 def test_del_param(self): 5033 eq = self.ndiffAssertEqual 5034 msg = self._msgobj('msg_01.txt') 5035 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5036 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5037 charset='us-ascii', language='en') 5038 msg.del_param('foo', header='Content-Type') 5039 eq(msg.as_string(maxheaderlen=78), """\ 5040 Return-Path: <bbb (at] zzz.org> 5041 Delivered-To: bbb (at] zzz.org 5042 Received: by mail.zzz.org (Postfix, from userid 889) 5043 \tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5044 MIME-Version: 1.0 5045 Content-Transfer-Encoding: 7bit 5046 Message-ID: <15090.61304.110929.45684 (at] aaa.zzz.org> 5047 From: bbb (at] ddd.com (John X. Doe) 5048 To: bbb (at] zzz.org 5049 Subject: This is a test message 5050 Date: Fri, 4 May 2001 14:05:44 -0400 5051 Content-Type: text/plain; charset="us-ascii"; 5052 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5053 5054 5055 Hi, 5056 5057 Do you like this message? 5058 5059 -Me 5060 """) 5061 5062 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5063 # I changed the charset name, though, because the one in the file isn't 5064 # a legal charset name. Should add a test for an illegal charset. 5065 def test_rfc2231_get_content_charset(self): 5066 eq = self.assertEqual 5067 msg = self._msgobj('msg_32.txt') 5068 eq(msg.get_content_charset(), 'us-ascii') 5069 5070 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5071 def test_rfc2231_parse_rfc_quoting(self): 5072 m = textwrap.dedent('''\ 5073 Content-Disposition: inline; 5074 \tfilename*0*=''This%20is%20even%20more%20; 5075 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5076 \tfilename*2="is it not.pdf" 5077 5078 ''') 5079 msg = email.message_from_string(m) 5080 self.assertEqual(msg.get_filename(), 5081 'This is even more ***fun*** is it not.pdf') 5082 self.assertEqual(m, msg.as_string()) 5083 5084 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5085 def test_rfc2231_parse_extra_quoting(self): 5086 m = textwrap.dedent('''\ 5087 Content-Disposition: inline; 5088 \tfilename*0*="''This%20is%20even%20more%20"; 5089 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5090 \tfilename*2="is it not.pdf" 5091 5092 ''') 5093 msg = email.message_from_string(m) 5094 self.assertEqual(msg.get_filename(), 5095 'This is even more ***fun*** is it not.pdf') 5096 self.assertEqual(m, msg.as_string()) 5097 5098 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5099 # but new test uses *0* because otherwise lang/charset is not valid. 5100 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5101 def test_rfc2231_no_language_or_charset(self): 5102 m = '''\ 5103 Content-Transfer-Encoding: 8bit 5104 Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5105 Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5106 5107 ''' 5108 msg = email.message_from_string(m) 5109 param = msg.get_param('NAME') 5110 self.assertNotIsInstance(param, tuple) 5111 self.assertEqual( 5112 param, 5113 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5114 5115 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5116 def test_rfc2231_no_language_or_charset_in_filename(self): 5117 m = '''\ 5118 Content-Disposition: inline; 5119 \tfilename*0*="''This%20is%20even%20more%20"; 5120 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5121 \tfilename*2="is it not.pdf" 5122 5123 ''' 5124 msg = email.message_from_string(m) 5125 self.assertEqual(msg.get_filename(), 5126 'This is even more ***fun*** is it not.pdf') 5127 5128 # Duplicate of previous test? 5129 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5130 m = '''\ 5131 Content-Disposition: inline; 5132 \tfilename*0*="''This%20is%20even%20more%20"; 5133 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5134 \tfilename*2="is it not.pdf" 5135 5136 ''' 5137 msg = email.message_from_string(m) 5138 self.assertEqual(msg.get_filename(), 5139 'This is even more ***fun*** is it not.pdf') 5140 5141 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5142 # but the test below is wrong (the first part should be decoded). 5143 def test_rfc2231_partly_encoded(self): 5144 m = '''\ 5145 Content-Disposition: inline; 5146 \tfilename*0="''This%20is%20even%20more%20"; 5147 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5148 \tfilename*2="is it not.pdf" 5149 5150 ''' 5151 msg = email.message_from_string(m) 5152 self.assertEqual( 5153 msg.get_filename(), 5154 'This%20is%20even%20more%20***fun*** is it not.pdf') 5155 5156 def test_rfc2231_partly_nonencoded(self): 5157 m = '''\ 5158 Content-Disposition: inline; 5159 \tfilename*0="This%20is%20even%20more%20"; 5160 \tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5161 \tfilename*2="is it not.pdf" 5162 5163 ''' 5164 msg = email.message_from_string(m) 5165 self.assertEqual( 5166 msg.get_filename(), 5167 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5168 5169 def test_rfc2231_no_language_or_charset_in_boundary(self): 5170 m = '''\ 5171 Content-Type: multipart/alternative; 5172 \tboundary*0*="''This%20is%20even%20more%20"; 5173 \tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5174 \tboundary*2="is it not.pdf" 5175 5176 ''' 5177 msg = email.message_from_string(m) 5178 self.assertEqual(msg.get_boundary(), 5179 'This is even more ***fun*** is it not.pdf') 5180 5181 def test_rfc2231_no_language_or_charset_in_charset(self): 5182 # This is a nonsensical charset value, but tests the code anyway 5183 m = '''\ 5184 Content-Type: text/plain; 5185 \tcharset*0*="This%20is%20even%20more%20"; 5186 \tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5187 \tcharset*2="is it not.pdf" 5188 5189 ''' 5190 msg = email.message_from_string(m) 5191 self.assertEqual(msg.get_content_charset(), 5192 'this is even more ***fun*** is it not.pdf') 5193 5194 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5195 def test_rfc2231_bad_encoding_in_filename(self): 5196 m = '''\ 5197 Content-Disposition: inline; 5198 \tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5199 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5200 \tfilename*2="is it not.pdf" 5201 5202 ''' 5203 msg = email.message_from_string(m) 5204 self.assertEqual(msg.get_filename(), 5205 'This is even more ***fun*** is it not.pdf') 5206 5207 def test_rfc2231_bad_encoding_in_charset(self): 5208 m = """\ 5209 Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5210 5211 """ 5212 msg = email.message_from_string(m) 5213 # This should return None because non-ascii characters in the charset 5214 # are not allowed. 5215 self.assertEqual(msg.get_content_charset(), None) 5216 5217 def test_rfc2231_bad_character_in_charset(self): 5218 m = """\ 5219 Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5220 5221 """ 5222 msg = email.message_from_string(m) 5223 # This should return None because non-ascii characters in the charset 5224 # are not allowed. 5225 self.assertEqual(msg.get_content_charset(), None) 5226 5227 def test_rfc2231_bad_character_in_filename(self): 5228 m = '''\ 5229 Content-Disposition: inline; 5230 \tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5231 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5232 \tfilename*2*="is it not.pdf%E2" 5233 5234 ''' 5235 msg = email.message_from_string(m) 5236 self.assertEqual(msg.get_filename(), 5237 'This is even more ***fun*** is it not.pdf\ufffd') 5238 5239 def test_rfc2231_unknown_encoding(self): 5240 m = """\ 5241 Content-Transfer-Encoding: 8bit 5242 Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5243 5244 """ 5245 msg = email.message_from_string(m) 5246 self.assertEqual(msg.get_filename(), 'myfile.txt') 5247 5248 def test_rfc2231_single_tick_in_filename_extended(self): 5249 eq = self.assertEqual 5250 m = """\ 5251 Content-Type: application/x-foo; 5252 \tname*0*=\"Frank's\"; name*1*=\" Document\" 5253 5254 """ 5255 msg = email.message_from_string(m) 5256 charset, language, s = msg.get_param('name') 5257 eq(charset, None) 5258 eq(language, None) 5259 eq(s, "Frank's Document") 5260 5261 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5262 def test_rfc2231_single_tick_in_filename(self): 5263 m = """\ 5264 Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5265 5266 """ 5267 msg = email.message_from_string(m) 5268 param = msg.get_param('name') 5269 self.assertNotIsInstance(param, tuple) 5270 self.assertEqual(param, "Frank's Document") 5271 5272 def test_rfc2231_missing_tick(self): 5273 m = '''\ 5274 Content-Disposition: inline; 5275 \tfilename*0*="'This%20is%20broken"; 5276 ''' 5277 msg = email.message_from_string(m) 5278 self.assertEqual( 5279 msg.get_filename(), 5280 "'This is broken") 5281 5282 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5283 m = '''\ 5284 Content-Disposition: inline; 5285 \tfilename*0*="'This%20is%E2broken"; 5286 ''' 5287 msg = email.message_from_string(m) 5288 self.assertEqual( 5289 msg.get_filename(), 5290 "'This is\ufffdbroken") 5291 5292 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5293 def test_rfc2231_tick_attack_extended(self): 5294 eq = self.assertEqual 5295 m = """\ 5296 Content-Type: application/x-foo; 5297 \tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5298 5299 """ 5300 msg = email.message_from_string(m) 5301 charset, language, s = msg.get_param('name') 5302 eq(charset, 'us-ascii') 5303 eq(language, 'en-us') 5304 eq(s, "Frank's Document") 5305 5306 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5307 def test_rfc2231_tick_attack(self): 5308 m = """\ 5309 Content-Type: application/x-foo; 5310 \tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5311 5312 """ 5313 msg = email.message_from_string(m) 5314 param = msg.get_param('name') 5315 self.assertNotIsInstance(param, tuple) 5316 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5317 5318 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5319 def test_rfc2231_no_extended_values(self): 5320 eq = self.assertEqual 5321 m = """\ 5322 Content-Type: application/x-foo; name=\"Frank's Document\" 5323 5324 """ 5325 msg = email.message_from_string(m) 5326 eq(msg.get_param('name'), "Frank's Document") 5327 5328 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5329 def test_rfc2231_encoded_then_unencoded_segments(self): 5330 eq = self.assertEqual 5331 m = """\ 5332 Content-Type: application/x-foo; 5333 \tname*0*=\"us-ascii'en-us'My\"; 5334 \tname*1=\" Document\"; 5335 \tname*2*=\" For You\" 5336 5337 """ 5338 msg = email.message_from_string(m) 5339 charset, language, s = msg.get_param('name') 5340 eq(charset, 'us-ascii') 5341 eq(language, 'en-us') 5342 eq(s, 'My Document For You') 5343 5344 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5345 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5346 def test_rfc2231_unencoded_then_encoded_segments(self): 5347 eq = self.assertEqual 5348 m = """\ 5349 Content-Type: application/x-foo; 5350 \tname*0=\"us-ascii'en-us'My\"; 5351 \tname*1*=\" Document\"; 5352 \tname*2*=\" For You\" 5353 5354 """ 5355 msg = email.message_from_string(m) 5356 charset, language, s = msg.get_param('name') 5357 eq(charset, 'us-ascii') 5358 eq(language, 'en-us') 5359 eq(s, 'My Document For You') 5360 5361 5362 5363 # Tests to ensure that signed parts of an email are completely preserved, as 5364 # required by RFC1847 section 2.1. Note that these are incomplete, because the 5365 # email package does not currently always preserve the body. See issue 1670765. 5366 class TestSigned(TestEmailBase): 5367 5368 def _msg_and_obj(self, filename): 5369 with openfile(filename) as fp: 5370 original = fp.read() 5371 msg = email.message_from_string(original) 5372 return original, msg 5373 5374 def _signed_parts_eq(self, original, result): 5375 # Extract the first mime part of each message 5376 import re 5377 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5378 inpart = repart.search(original).group(2) 5379 outpart = repart.search(result).group(2) 5380 self.assertEqual(outpart, inpart) 5381 5382 def test_long_headers_as_string(self): 5383 original, msg = self._msg_and_obj('msg_45.txt') 5384 result = msg.as_string() 5385 self._signed_parts_eq(original, result) 5386 5387 def test_long_headers_as_string_maxheaderlen(self): 5388 original, msg = self._msg_and_obj('msg_45.txt') 5389 result = msg.as_string(maxheaderlen=60) 5390 self._signed_parts_eq(original, result) 5391 5392 def test_long_headers_flatten(self): 5393 original, msg = self._msg_and_obj('msg_45.txt') 5394 fp = StringIO() 5395 Generator(fp).flatten(msg) 5396 result = fp.getvalue() 5397 self._signed_parts_eq(original, result) 5398 5399 5400 5401 if __name__ == '__main__': 5402 unittest.main() 5403