1 # Copyright (C) 2001-2007 Python Software Foundation 2 # Author: Barry Warsaw 3 # Contact: email-sig (at] python.org 4 5 """Basic message object for the email package object model.""" 6 7 __all__ = ['Message', 'EmailMessage'] 8 9 import re 10 import uu 11 import quopri 12 from io import BytesIO, StringIO 13 14 # Intrapackage imports 15 from email import utils 16 from email import errors 17 from email._policybase import Policy, compat32 18 from email import charset as _charset 19 from email._encoded_words import decode_b 20 Charset = _charset.Charset 21 22 SEMISPACE = '; ' 23 24 # Regular expression that matches `special' characters in parameters, the 25 # existence of which force quoting of the parameter value. 26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 27 28 29 def _splitparam(param): 30 # Split header parameters. BAW: this may be too simple. It isn't 31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 32 # found in the wild. We may eventually need a full fledged parser. 33 # RDM: we might have a Header here; for now just stringify it. 34 a, sep, b = str(param).partition(';') 35 if not sep: 36 return a.strip(), None 37 return a.strip(), b.strip() 38 40 def _formatparam(param, value=None, quote=True): 41 """Convenience function to format and return a key=value pair. 42 43 This will quote the value if needed or if quote is true. If value is a 44 three tuple (charset, language, value), it will be encoded according 45 to RFC2231 rules. If it contains non-ascii characters it will likewise 46 be encoded according to RFC2231 rules, using the utf-8 charset and 47 a null language. 48 """ 49 if value is not None and len(value) > 0: 50 # A tuple is used for RFC 2231 encoded parameter values where items 51 # are (charset, language, value). charset is a string, not a Charset 52 # instance. RFC 2231 encoded values are never quoted, per RFC. 53 if isinstance(value, tuple): 54 # Encode as per RFC 2231 55 param += '*' 56 value = utils.encode_rfc2231(value[2], value[0], value[1]) 57 return '%s=%s' % (param, value) 58 else: 59 try: 60 value.encode('ascii') 61 except UnicodeEncodeError: 62 param += '*' 63 value = utils.encode_rfc2231(value, 'utf-8', '') 64 return '%s=%s' % (param, value) 65 # BAW: Please check this. I think that if quote is set it should 66 # force quoting even if not necessary. 67 if quote or tspecials.search(value): 68 return '%s="%s"' % (param, utils.quote(value)) 69 else: 70 return '%s=%s' % (param, value) 71 else: 72 return param 73 74 def _parseparam(s): 75 # RDM This might be a Header, so for now stringify it. 76 s = ';' + str(s) 77 plist = [] 78 while s[:1] == ';': 79 s = s[1:] 80 end = s.find(';') 81 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 82 end = s.find(';', end + 1) 83 if end < 0: 84 end = len(s) 85 f = s[:end] 86 if '=' in f: 87 i = f.index('=') 88 f = f[:i].strip().lower() + '=' + f[i+1:].strip() 89 plist.append(f.strip()) 90 s = s[end:] 91 return plist 92 93 94 def _unquotevalue(value): 95 # This is different than utils.collapse_rfc2231_value() because it doesn't 96 # try to convert the value to a unicode. Message.get_param() and 97 # Message.get_params() are both currently defined to return the tuple in 98 # the face of RFC 2231 parameters. 99 if isinstance(value, tuple): 100 return value[0], value[1], utils.unquote(value[2]) 101 else: 102 return utils.unquote(value) 103 104 105 107 class Message: 108 """Basic message object. 109 110 A message object is defined as something that has a bunch of RFC 2822 111 headers and a payload. It may optionally have an envelope header 112 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 113 multipart or a message/rfc822), then the payload is a list of Message 114 objects, otherwise it is a string. 115 116 Message objects implement part of the `mapping' interface, which assumes 117 there is exactly one occurrence of the header per message. Some headers 118 do in fact appear multiple times (e.g. Received) and for those headers, 119 you must use the explicit API to set or get all the headers. Not all of 120 the mapping methods are implemented. 121 """ 122 def __init__(self, policy=compat32): 123 self.policy = policy 124 self._headers = [] 125 self._unixfrom = None 126 self._payload = None 127 self._charset = None 128 # Defaults for multipart messages 129 self.preamble = self.epilogue = None 130 self.defects = [] 131 # Default content type 132 self._default_type = 'text/plain' 133 134 def __str__(self): 135 """Return the entire formatted message as a string. 136 """ 137 return self.as_string() 138 139 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): 140 """Return the entire formatted message as a string. 141 142 Optional 'unixfrom', when true, means include the Unix From_ envelope 143 header. For backward compatibility reasons, if maxheaderlen is 144 not specified it defaults to 0, so you must override it explicitly 145 if you want a different maxheaderlen. 'policy' is passed to the 146 Generator instance used to serialize the mesasge; if it is not 147 specified the policy associated with the message instance is used. 148 149 If the message object contains binary data that is not encoded 150 according to RFC standards, the non-compliant data will be replaced by 151 unicode "unknown character" code points. 152 """ 153 from email.generator import Generator 154 policy = self.policy if policy is None else policy 155 fp = StringIO() 156 g = Generator(fp, 157 mangle_from_=False, 158 maxheaderlen=maxheaderlen, 159 policy=policy) 160 g.flatten(self, unixfrom=unixfrom) 161 return fp.getvalue() 162 163 def __bytes__(self): 164 """Return the entire formatted message as a bytes object. 165 """ 166 return self.as_bytes() 167 168 def as_bytes(self, unixfrom=False, policy=None): 169 """Return the entire formatted message as a bytes object. 170 171 Optional 'unixfrom', when true, means include the Unix From_ envelope 172 header. 'policy' is passed to the BytesGenerator instance used to 173 serialize the message; if not specified the policy associated with 174 the message instance is used. 175 """ 176 from email.generator import BytesGenerator 177 policy = self.policy if policy is None else policy 178 fp = BytesIO() 179 g = BytesGenerator(fp, mangle_from_=False, policy=policy) 180 g.flatten(self, unixfrom=unixfrom) 181 return fp.getvalue() 182 183 def is_multipart(self): 184 """Return True if the message consists of multiple parts.""" 185 return isinstance(self._payload, list) 186 187 # 188 # Unix From_ line 189 # 190 def set_unixfrom(self, unixfrom): 191 self._unixfrom = unixfrom 192 193 def get_unixfrom(self): 194 return self._unixfrom 195 196 # 197 # Payload manipulation. 198 # 199 def attach(self, payload): 200 """Add the given payload to the current payload. 201 202 The current payload will always be a list of objects after this method 203 is called. If you want to set the payload to a scalar object, use 204 set_payload() instead. 205 """ 206 if self._payload is None: 207 self._payload = [payload] 208 else: 209 try: 210 self._payload.append(payload) 211 except AttributeError: 212 raise TypeError("Attach is not valid on a message with a" 213 " non-multipart payload") 214 215 def get_payload(self, i=None, decode=False): 216 """Return a reference to the payload. 217 218 The payload will either be a list object or a string. If you mutate 219 the list object, you modify the message's payload in place. Optional 220 i returns that index into the payload. 221 222 Optional decode is a flag indicating whether the payload should be 223 decoded or not, according to the Content-Transfer-Encoding header 224 (default is False). 225 226 When True and the message is not a multipart, the payload will be 227 decoded if this header's value is `quoted-printable' or `base64'. If 228 some other encoding is used, or the header is missing, or if the 229 payload has bogus data (i.e. bogus base64 or uuencoded data), the 230 payload is returned as-is. 231 232 If the message is a multipart and the decode flag is True, then None 233 is returned. 234 """ 235 # Here is the logic table for this code, based on the email5.0.0 code: 236 # i decode is_multipart result 237 # ------ ------ ------------ ------------------------------ 238 # None True True None 239 # i True True None 240 # None False True _payload (a list) 241 # i False True _payload element i (a Message) 242 # i False False error (not a list) 243 # i True False error (not a list) 244 # None False False _payload 245 # None True False _payload decoded (bytes) 246 # Note that Barry planned to factor out the 'decode' case, but that 247 # isn't so easy now that we handle the 8 bit data, which needs to be 248 # converted in both the decode and non-decode path. 249 if self.is_multipart(): 250 if decode: 251 return None 252 if i is None: 253 return self._payload 254 else: 255 return self._payload[i] 256 # For backward compatibility, Use isinstance and this error message 257 # instead of the more logical is_multipart test. 258 if i is not None and not isinstance(self._payload, list): 259 raise TypeError('Expected list, got %s' % type(self._payload)) 260 payload = self._payload 261 # cte might be a Header, so for now stringify it. 262 cte = str(self.get('content-transfer-encoding', '')).lower() 263 # payload may be bytes here. 264 if isinstance(payload, str): 265 if utils._has_surrogates(payload): 266 bpayload = payload.encode('ascii', 'surrogateescape') 267 if not decode: 268 try: 269 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') 270 except LookupError: 271 payload = bpayload.decode('ascii', 'replace') 272 elif decode: 273 try: 274 bpayload = payload.encode('ascii') 275 except UnicodeError: 276 # This won't happen for RFC compliant messages (messages 277 # containing only ASCII code points in the unicode input). 278 # If it does happen, turn the string into bytes in a way 279 # guaranteed not to fail. 280 bpayload = payload.encode('raw-unicode-escape') 281 if not decode: 282 return payload 283 if cte == 'quoted-printable': 284 return quopri.decodestring(bpayload) 285 elif cte == 'base64': 286 # XXX: this is a bit of a hack; decode_b should probably be factored 287 # out somewhere, but I haven't figured out where yet. 288 value, defects = decode_b(b''.join(bpayload.splitlines())) 289 for defect in defects: 290 self.policy.handle_defect(self, defect) 291 return value 292 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 293 in_file = BytesIO(bpayload) 294 out_file = BytesIO() 295 try: 296 uu.decode(in_file, out_file, quiet=True) 297 return out_file.getvalue() 298 except uu.Error: 299 # Some decoding problem 300 return bpayload 301 if isinstance(payload, str): 302 return bpayload 303 return payload 304 305 def set_payload(self, payload, charset=None): 306 """Set the payload to the given value. 307 308 Optional charset sets the message's default character set. See 309 set_charset() for details. 310 """ 311 if hasattr(payload, 'encode'): 312 if charset is None: 313 self._payload = payload 314 return 315 if not isinstance(charset, Charset): 316 charset = Charset(charset) 317 payload = payload.encode(charset.output_charset) 318 if hasattr(payload, 'decode'): 319 self._payload = payload.decode('ascii', 'surrogateescape') 320 else: 321 self._payload = payload 322 if charset is not None: 323 self.set_charset(charset) 324 325 def set_charset(self, charset): 326 """Set the charset of the payload to a given character set. 327 328 charset can be a Charset instance, a string naming a character set, or 329 None. If it is a string it will be converted to a Charset instance. 330 If charset is None, the charset parameter will be removed from the 331 Content-Type field. Anything else will generate a TypeError. 332 333 The message will be assumed to be of type text/* encoded with 334 charset.input_charset. It will be converted to charset.output_charset 335 and encoded properly, if needed, when generating the plain text 336 representation of the message. MIME headers (MIME-Version, 337 Content-Type, Content-Transfer-Encoding) will be added as needed. 338 """ 339 if charset is None: 340 self.del_param('charset') 341 self._charset = None 342 return 343 if not isinstance(charset, Charset): 344 charset = Charset(charset) 345 self._charset = charset 346 if 'MIME-Version' not in self: 347 self.add_header('MIME-Version', '1.0') 348 if 'Content-Type' not in self: 349 self.add_header('Content-Type', 'text/plain', 350 charset=charset.get_output_charset()) 351 else: 352 self.set_param('charset', charset.get_output_charset()) 353 if charset != charset.get_output_charset(): 354 self._payload = charset.body_encode(self._payload) 355 if 'Content-Transfer-Encoding' not in self: 356 cte = charset.get_body_encoding() 357 try: 358 cte(self) 359 except TypeError: 360 # This 'if' is for backward compatibility, it allows unicode 361 # through even though that won't work correctly if the 362 # message is serialized. 363 payload = self._payload 364 if payload: 365 try: 366 payload = payload.encode('ascii', 'surrogateescape') 367 except UnicodeError: 368 payload = payload.encode(charset.output_charset) 369 self._payload = charset.body_encode(payload) 370 self.add_header('Content-Transfer-Encoding', cte) 371 372 def get_charset(self): 373 """Return the Charset instance associated with the message's payload. 374 """ 375 return self._charset 376 377 # 378 # MAPPING INTERFACE (partial) 379 # 380 def __len__(self): 381 """Return the total number of headers, including duplicates.""" 382 return len(self._headers) 383 384 def __getitem__(self, name): 385 """Get a header value. 386 387 Return None if the header is missing instead of raising an exception. 388 389 Note that if the header appeared multiple times, exactly which 390 occurrence gets returned is undefined. Use get_all() to get all 391 the values matching a header field name. 392 """ 393 return self.get(name) 394 395 def __setitem__(self, name, val): 396 """Set the value of a header. 397 398 Note: this does not overwrite an existing header with the same field 399 name. Use __delitem__() first to delete any existing headers. 400 """ 401 max_count = self.policy.header_max_count(name) 402 if max_count: 403 lname = name.lower() 404 found = 0 405 for k, v in self._headers: 406 if k.lower() == lname: 407 found += 1 408 if found >= max_count: 409 raise ValueError("There may be at most {} {} headers " 410 "in a message".format(max_count, name)) 411 self._headers.append(self.policy.header_store_parse(name, val)) 412 413 def __delitem__(self, name): 414 """Delete all occurrences of a header, if present. 415 416 Does not raise an exception if the header is missing. 417 """ 418 name = name.lower() 419 newheaders = [] 420 for k, v in self._headers: 421 if k.lower() != name: 422 newheaders.append((k, v)) 423 self._headers = newheaders 424 425 def __contains__(self, name): 426 return name.lower() in [k.lower() for k, v in self._headers] 427 428 def __iter__(self): 429 for field, value in self._headers: 430 yield field 431 432 def keys(self): 433 """Return a list of all the message's header field names. 434 435 These will be sorted in the order they appeared in the original 436 message, or were added to the message, and may contain duplicates. 437 Any fields deleted and re-inserted are always appended to the header 438 list. 439 """ 440 return [k for k, v in self._headers] 441 442 def values(self): 443 """Return a list of all the message's header values. 444 445 These will be sorted in the order they appeared in the original 446 message, or were added to the message, and may contain duplicates. 447 Any fields deleted and re-inserted are always appended to the header 448 list. 449 """ 450 return [self.policy.header_fetch_parse(k, v) 451 for k, v in self._headers] 452 453 def items(self): 454 """Get all the message's header fields and values. 455 456 These will be sorted in the order they appeared in the original 457 message, or were added to the message, and may contain duplicates. 458 Any fields deleted and re-inserted are always appended to the header 459 list. 460 """ 461 return [(k, self.policy.header_fetch_parse(k, v)) 462 for k, v in self._headers] 463 464 def get(self, name, failobj=None): 465 """Get a header value. 466 467 Like __getitem__() but return failobj instead of None when the field 468 is missing. 469 """ 470 name = name.lower() 471 for k, v in self._headers: 472 if k.lower() == name: 473 return self.policy.header_fetch_parse(k, v) 474 return failobj 475 476 # 477 # "Internal" methods (public API, but only intended for use by a parser 478 # or generator, not normal application code. 479 # 480 481 def set_raw(self, name, value): 482 """Store name and value in the model without modification. 483 484 This is an "internal" API, intended only for use by a parser. 485 """ 486 self._headers.append((name, value)) 487 488 def raw_items(self): 489 """Return the (name, value) header pairs without modification. 490 491 This is an "internal" API, intended only for use by a generator. 492 """ 493 return iter(self._headers.copy()) 494 495 # 496 # Additional useful stuff 497 # 498 499 def get_all(self, name, failobj=None): 500 """Return a list of all the values for the named field. 501 502 These will be sorted in the order they appeared in the original 503 message, and may contain duplicates. Any fields deleted and 504 re-inserted are always appended to the header list. 505 506 If no such fields exist, failobj is returned (defaults to None). 507 """ 508 values = [] 509 name = name.lower() 510 for k, v in self._headers: 511 if k.lower() == name: 512 values.append(self.policy.header_fetch_parse(k, v)) 513 if not values: 514 return failobj 515 return values 516 517 def add_header(self, _name, _value, **_params): 518 """Extended header setting. 519 520 name is the header field to add. keyword arguments can be used to set 521 additional parameters for the header field, with underscores converted 522 to dashes. Normally the parameter will be added as key="value" unless 523 value is None, in which case only the key will be added. If a 524 parameter value contains non-ASCII characters it can be specified as a 525 three-tuple of (charset, language, value), in which case it will be 526 encoded according to RFC2231 rules. Otherwise it will be encoded using 527 the utf-8 charset and a language of ''. 528 529 Examples: 530 531 msg.add_header('content-disposition', 'attachment', filename='bud.gif') 532 msg.add_header('content-disposition', 'attachment', 533 filename=('utf-8', '', Fuballer.ppt')) 534 msg.add_header('content-disposition', 'attachment', 535 filename='Fuballer.ppt')) 536 """ 537 parts = [] 538 for k, v in _params.items(): 539 if v is None: 540 parts.append(k.replace('_', '-')) 541 else: 542 parts.append(_formatparam(k.replace('_', '-'), v)) 543 if _value is not None: 544 parts.insert(0, _value) 545 self[_name] = SEMISPACE.join(parts) 546 547 def replace_header(self, _name, _value): 548 """Replace a header. 549 550 Replace the first matching header found in the message, retaining 551 header order and case. If no matching header was found, a KeyError is 552 raised. 553 """ 554 _name = _name.lower() 555 for i, (k, v) in zip(range(len(self._headers)), self._headers): 556 if k.lower() == _name: 557 self._headers[i] = self.policy.header_store_parse(k, _value) 558 break 559 else: 560 raise KeyError(_name) 561 562 # 563 # Use these three methods instead of the three above. 564 # 565 566 def get_content_type(self): 567 """Return the message's content type. 568 569 The returned string is coerced to lower case of the form 570 `maintype/subtype'. If there was no Content-Type header in the 571 message, the default type as given by get_default_type() will be 572 returned. Since according to RFC 2045, messages always have a default 573 type this will always return a value. 574 575 RFC 2045 defines a message's default type to be text/plain unless it 576 appears inside a multipart/digest container, in which case it would be 577 message/rfc822. 578 """ 579 missing = object() 580 value = self.get('content-type', missing) 581 if value is missing: 582 # This should have no parameters 583 return self.get_default_type() 584 ctype = _splitparam(value)[0].lower() 585 # RFC 2045, section 5.2 says if its invalid, use text/plain 586 if ctype.count('/') != 1: 587 return 'text/plain' 588 return ctype 589 590 def get_content_maintype(self): 591 """Return the message's main content type. 592 593 This is the `maintype' part of the string returned by 594 get_content_type(). 595 """ 596 ctype = self.get_content_type() 597 return ctype.split('/')[0] 598 599 def get_content_subtype(self): 600 """Returns the message's sub-content type. 601 602 This is the `subtype' part of the string returned by 603 get_content_type(). 604 """ 605 ctype = self.get_content_type() 606 return ctype.split('/')[1] 607 608 def get_default_type(self): 609 """Return the `default' content type. 610 611 Most messages have a default content type of text/plain, except for 612 messages that are subparts of multipart/digest containers. Such 613 subparts have a default content type of message/rfc822. 614 """ 615 return self._default_type 616 617 def set_default_type(self, ctype): 618 """Set the `default' content type. 619 620 ctype should be either "text/plain" or "message/rfc822", although this 621 is not enforced. The default content type is not stored in the 622 Content-Type header. 623 """ 624 self._default_type = ctype 625 626 def _get_params_preserve(self, failobj, header): 627 # Like get_params() but preserves the quoting of values. BAW: 628 # should this be part of the public interface? 629 missing = object() 630 value = self.get(header, missing) 631 if value is missing: 632 return failobj 633 params = [] 634 for p in _parseparam(value): 635 try: 636 name, val = p.split('=', 1) 637 name = name.strip() 638 val = val.strip() 639 except ValueError: 640 # Must have been a bare attribute 641 name = p.strip() 642 val = '' 643 params.append((name, val)) 644 params = utils.decode_params(params) 645 return params 646 647 def get_params(self, failobj=None, header='content-type', unquote=True): 648 """Return the message's Content-Type parameters, as a list. 649 650 The elements of the returned list are 2-tuples of key/value pairs, as 651 split on the `=' sign. The left hand side of the `=' is the key, 652 while the right hand side is the value. If there is no `=' sign in 653 the parameter the value is the empty string. The value is as 654 described in the get_param() method. 655 656 Optional failobj is the object to return if there is no Content-Type 657 header. Optional header is the header to search instead of 658 Content-Type. If unquote is True, the value is unquoted. 659 """ 660 missing = object() 661 params = self._get_params_preserve(missing, header) 662 if params is missing: 663 return failobj 664 if unquote: 665 return [(k, _unquotevalue(v)) for k, v in params] 666 else: 667 return params 668 669 def get_param(self, param, failobj=None, header='content-type', 670 unquote=True): 671 """Return the parameter value if found in the Content-Type header. 672 673 Optional failobj is the object to return if there is no Content-Type 674 header, or the Content-Type header has no such parameter. Optional 675 header is the header to search instead of Content-Type. 676 677 Parameter keys are always compared case insensitively. The return 678 value can either be a string, or a 3-tuple if the parameter was RFC 679 2231 encoded. When it's a 3-tuple, the elements of the value are of 680 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 681 LANGUAGE can be None, in which case you should consider VALUE to be 682 encoded in the us-ascii charset. You can usually ignore LANGUAGE. 683 The parameter value (either the returned string, or the VALUE item in 684 the 3-tuple) is always unquoted, unless unquote is set to False. 685 686 If your application doesn't care whether the parameter was RFC 2231 687 encoded, it can turn the return value into a string as follows: 688 689 rawparam = msg.get_param('foo') 690 param = email.utils.collapse_rfc2231_value(rawparam) 691 692 """ 693 if header not in self: 694 return failobj 695 for k, v in self._get_params_preserve(failobj, header): 696 if k.lower() == param.lower(): 697 if unquote: 698 return _unquotevalue(v) 699 else: 700 return v 701 return failobj 702 703 def set_param(self, param, value, header='Content-Type', requote=True, 704 charset=None, language='', replace=False): 705 """Set a parameter in the Content-Type header. 706 707 If the parameter already exists in the header, its value will be 708 replaced with the new value. 709 710 If header is Content-Type and has not yet been defined for this 711 message, it will be set to "text/plain" and the new parameter and 712 value will be appended as per RFC 2045. 713 714 An alternate header can be specified in the header argument, and all 715 parameters will be quoted as necessary unless requote is False. 716 717 If charset is specified, the parameter will be encoded according to RFC 718 2231. Optional language specifies the RFC 2231 language, defaulting 719 to the empty string. Both charset and language should be strings. 720 """ 721 if not isinstance(value, tuple) and charset: 722 value = (charset, language, value) 723 724 if header not in self and header.lower() == 'content-type': 725 ctype = 'text/plain' 726 else: 727 ctype = self.get(header) 728 if not self.get_param(param, header=header): 729 if not ctype: 730 ctype = _formatparam(param, value, requote) 731 else: 732 ctype = SEMISPACE.join( 733 [ctype, _formatparam(param, value, requote)]) 734 else: 735 ctype = '' 736 for old_param, old_value in self.get_params(header=header, 737 unquote=requote): 738 append_param = '' 739 if old_param.lower() == param.lower(): 740 append_param = _formatparam(param, value, requote) 741 else: 742 append_param = _formatparam(old_param, old_value, requote) 743 if not ctype: 744 ctype = append_param 745 else: 746 ctype = SEMISPACE.join([ctype, append_param]) 747 if ctype != self.get(header): 748 if replace: 749 self.replace_header(header, ctype) 750 else: 751 del self[header] 752 self[header] = ctype 753 754 def del_param(self, param, header='content-type', requote=True): 755 """Remove the given parameter completely from the Content-Type header. 756 757 The header will be re-written in place without the parameter or its 758 value. All values will be quoted as necessary unless requote is 759 False. Optional header specifies an alternative to the Content-Type 760 header. 761 """ 762 if header not in self: 763 return 764 new_ctype = '' 765 for p, v in self.get_params(header=header, unquote=requote): 766 if p.lower() != param.lower(): 767 if not new_ctype: 768 new_ctype = _formatparam(p, v, requote) 769 else: 770 new_ctype = SEMISPACE.join([new_ctype, 771 _formatparam(p, v, requote)]) 772 if new_ctype != self.get(header): 773 del self[header] 774 self[header] = new_ctype 775 776 def set_type(self, type, header='Content-Type', requote=True): 777 """Set the main type and subtype for the Content-Type header. 778 779 type must be a string in the form "maintype/subtype", otherwise a 780 ValueError is raised. 781 782 This method replaces the Content-Type header, keeping all the 783 parameters in place. If requote is False, this leaves the existing 784 header's quoting as is. Otherwise, the parameters will be quoted (the 785 default). 786 787 An alternative header can be specified in the header argument. When 788 the Content-Type header is set, we'll always also add a MIME-Version 789 header. 790 """ 791 # BAW: should we be strict? 792 if not type.count('/') == 1: 793 raise ValueError 794 # Set the Content-Type, you get a MIME-Version 795 if header.lower() == 'content-type': 796 del self['mime-version'] 797 self['MIME-Version'] = '1.0' 798 if header not in self: 799 self[header] = type 800 return 801 params = self.get_params(header=header, unquote=requote) 802 del self[header] 803 self[header] = type 804 # Skip the first param; it's the old type. 805 for p, v in params[1:]: 806 self.set_param(p, v, header, requote) 807 808 def get_filename(self, failobj=None): 809 """Return the filename associated with the payload if present. 810 811 The filename is extracted from the Content-Disposition header's 812 `filename' parameter, and it is unquoted. If that header is missing 813 the `filename' parameter, this method falls back to looking for the 814 `name' parameter. 815 """ 816 missing = object() 817 filename = self.get_param('filename', missing, 'content-disposition') 818 if filename is missing: 819 filename = self.get_param('name', missing, 'content-type') 820 if filename is missing: 821 return failobj 822 return utils.collapse_rfc2231_value(filename).strip() 823 824 def get_boundary(self, failobj=None): 825 """Return the boundary associated with the payload if present. 826 827 The boundary is extracted from the Content-Type header's `boundary' 828 parameter, and it is unquoted. 829 """ 830 missing = object() 831 boundary = self.get_param('boundary', missing) 832 if boundary is missing: 833 return failobj 834 # RFC 2046 says that boundaries may begin but not end in w/s 835 return utils.collapse_rfc2231_value(boundary).rstrip() 836 837 def set_boundary(self, boundary): 838 """Set the boundary parameter in Content-Type to 'boundary'. 839 840 This is subtly different than deleting the Content-Type header and 841 adding a new one with a new boundary parameter via add_header(). The 842 main difference is that using the set_boundary() method preserves the 843 order of the Content-Type header in the original message. 844 845 HeaderParseError is raised if the message has no Content-Type header. 846 """ 847 missing = object() 848 params = self._get_params_preserve(missing, 'content-type') 849 if params is missing: 850 # There was no Content-Type header, and we don't know what type 851 # to set it to, so raise an exception. 852 raise errors.HeaderParseError('No Content-Type header found') 853 newparams = [] 854 foundp = False 855 for pk, pv in params: 856 if pk.lower() == 'boundary': 857 newparams.append(('boundary', '"%s"' % boundary)) 858 foundp = True 859 else: 860 newparams.append((pk, pv)) 861 if not foundp: 862 # The original Content-Type header had no boundary attribute. 863 # Tack one on the end. BAW: should we raise an exception 864 # instead??? 865 newparams.append(('boundary', '"%s"' % boundary)) 866 # Replace the existing Content-Type header with the new value 867 newheaders = [] 868 for h, v in self._headers: 869 if h.lower() == 'content-type': 870 parts = [] 871 for k, v in newparams: 872 if v == '': 873 parts.append(k) 874 else: 875 parts.append('%s=%s' % (k, v)) 876 val = SEMISPACE.join(parts) 877 newheaders.append(self.policy.header_store_parse(h, val)) 878 879 else: 880 newheaders.append((h, v)) 881 self._headers = newheaders 882 883 def get_content_charset(self, failobj=None): 884 """Return the charset parameter of the Content-Type header. 885 886 The returned string is always coerced to lower case. If there is no 887 Content-Type header, or if that header has no charset parameter, 888 failobj is returned. 889 """ 890 missing = object() 891 charset = self.get_param('charset', missing) 892 if charset is missing: 893 return failobj 894 if isinstance(charset, tuple): 895 # RFC 2231 encoded, so decode it, and it better end up as ascii. 896 pcharset = charset[0] or 'us-ascii' 897 try: 898 # LookupError will be raised if the charset isn't known to 899 # Python. UnicodeError will be raised if the encoded text 900 # contains a character not in the charset. 901 as_bytes = charset[2].encode('raw-unicode-escape') 902 charset = str(as_bytes, pcharset) 903 except (LookupError, UnicodeError): 904 charset = charset[2] 905 # charset characters must be in us-ascii range 906 try: 907 charset.encode('us-ascii') 908 except UnicodeError: 909 return failobj 910 # RFC 2046, $4.1.2 says charsets are not case sensitive 911 return charset.lower() 912 913 def get_charsets(self, failobj=None): 914 """Return a list containing the charset(s) used in this message. 915 916 The returned list of items describes the Content-Type headers' 917 charset parameter for this message and all the subparts in its 918 payload. 919 920 Each item will either be a string (the value of the charset parameter 921 in the Content-Type header of that part) or the value of the 922 'failobj' parameter (defaults to None), if the part does not have a 923 main MIME type of "text", or the charset is not defined. 924 925 The list will contain one string for each part of the message, plus 926 one for the container message (i.e. self), so that a non-multipart 927 message will still return a list of length 1. 928 """ 929 return [part.get_content_charset(failobj) for part in self.walk()] 930 931 def get_content_disposition(self): 932 """Return the message's content-disposition if it exists, or None. 933 934 The return values can be either 'inline', 'attachment' or None 935 according to the rfc2183. 936 """ 937 value = self.get('content-disposition') 938 if value is None: 939 return None 940 c_d = _splitparam(value)[0].lower() 941 return c_d 942 943 # I.e. def walk(self): ... 944 from email.iterators import walk 945 946 947 class MIMEPart(Message): 948 949 def __init__(self, policy=None): 950 if policy is None: 951 from email.policy import default 952 policy = default 953 Message.__init__(self, policy) 954 955 956 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): 957 """Return the entire formatted message as a string. 958 959 Optional 'unixfrom', when true, means include the Unix From_ envelope 960 header. maxheaderlen is retained for backward compatibility with the 961 base Message class, but defaults to None, meaning that the policy value 962 for max_line_length controls the header maximum length. 'policy' is 963 passed to the Generator instance used to serialize the mesasge; if it 964 is not specified the policy associated with the message instance is 965 used. 966 """ 967 policy = self.policy if policy is None else policy 968 if maxheaderlen is None: 969 maxheaderlen = policy.max_line_length 970 return super().as_string(maxheaderlen=maxheaderlen, policy=policy) 971 972 def __str__(self): 973 return self.as_string(policy=self.policy.clone(utf8=True)) 974 975 def is_attachment(self): 976 c_d = self.get('content-disposition') 977 return False if c_d is None else c_d.content_disposition == 'attachment' 978 979 def _find_body(self, part, preferencelist): 980 if part.is_attachment(): 981 return 982 maintype, subtype = part.get_content_type().split('/') 983 if maintype == 'text': 984 if subtype in preferencelist: 985 yield (preferencelist.index(subtype), part) 986 return 987 if maintype != 'multipart': 988 return 989 if subtype != 'related': 990 for subpart in part.iter_parts(): 991 yield from self._find_body(subpart, preferencelist) 992 return 993 if 'related' in preferencelist: 994 yield (preferencelist.index('related'), part) 995 candidate = None 996 start = part.get_param('start') 997 if start: 998 for subpart in part.iter_parts(): 999 if subpart['content-id'] == start: 1000 candidate = subpart 1001 break 1002 if candidate is None: 1003 subparts = part.get_payload() 1004 candidate = subparts[0] if subparts else None 1005 if candidate is not None: 1006 yield from self._find_body(candidate, preferencelist) 1007 1008 def get_body(self, preferencelist=('related', 'html', 'plain')): 1009 """Return best candidate mime part for display as 'body' of message. 1010 1011 Do a depth first search, starting with self, looking for the first part 1012 matching each of the items in preferencelist, and return the part 1013 corresponding to the first item that has a match, or None if no items 1014 have a match. If 'related' is not included in preferencelist, consider 1015 the root part of any multipart/related encountered as a candidate 1016 match. Ignore parts with 'Content-Disposition: attachment'. 1017 """ 1018 best_prio = len(preferencelist) 1019 body = None 1020 for prio, part in self._find_body(self, preferencelist): 1021 if prio < best_prio: 1022 best_prio = prio 1023 body = part 1024 if prio == 0: 1025 break 1026 return body 1027 1028 _body_types = {('text', 'plain'), 1029 ('text', 'html'), 1030 ('multipart', 'related'), 1031 ('multipart', 'alternative')} 1032 def iter_attachments(self): 1033 """Return an iterator over the non-main parts of a multipart. 1034 1035 Skip the first of each occurrence of text/plain, text/html, 1036 multipart/related, or multipart/alternative in the multipart (unless 1037 they have a 'Content-Disposition: attachment' header) and include all 1038 remaining subparts in the returned iterator. When applied to a 1039 multipart/related, return all parts except the root part. Return an 1040 empty iterator when applied to a multipart/alternative or a 1041 non-multipart. 1042 """ 1043 maintype, subtype = self.get_content_type().split('/') 1044 if maintype != 'multipart' or subtype == 'alternative': 1045 return 1046 parts = self.get_payload().copy() 1047 if maintype == 'multipart' and subtype == 'related': 1048 # For related, we treat everything but the root as an attachment. 1049 # The root may be indicated by 'start'; if there's no start or we 1050 # can't find the named start, treat the first subpart as the root. 1051 start = self.get_param('start') 1052 if start: 1053 found = False 1054 attachments = [] 1055 for part in parts: 1056 if part.get('content-id') == start: 1057 found = True 1058 else: 1059 attachments.append(part) 1060 if found: 1061 yield from attachments 1062 return 1063 parts.pop(0) 1064 yield from parts 1065 return 1066 # Otherwise we more or less invert the remaining logic in get_body. 1067 # This only really works in edge cases (ex: non-text related or 1068 # alternatives) if the sending agent sets content-disposition. 1069 seen = [] # Only skip the first example of each candidate type. 1070 for part in parts: 1071 maintype, subtype = part.get_content_type().split('/') 1072 if ((maintype, subtype) in self._body_types and 1073 not part.is_attachment() and subtype not in seen): 1074 seen.append(subtype) 1075 continue 1076 yield part 1077 1078 def iter_parts(self): 1079 """Return an iterator over all immediate subparts of a multipart. 1080 1081 Return an empty iterator for a non-multipart. 1082 """ 1083 if self.get_content_maintype() == 'multipart': 1084 yield from self.get_payload() 1085 1086 def get_content(self, *args, content_manager=None, **kw): 1087 if content_manager is None: 1088 content_manager = self.policy.content_manager 1089 return content_manager.get_content(self, *args, **kw) 1090 1091 def set_content(self, *args, content_manager=None, **kw): 1092 if content_manager is None: 1093 content_manager = self.policy.content_manager 1094 content_manager.set_content(self, *args, **kw) 1095 1096 def _make_multipart(self, subtype, disallowed_subtypes, boundary): 1097 if self.get_content_maintype() == 'multipart': 1098 existing_subtype = self.get_content_subtype() 1099 disallowed_subtypes = disallowed_subtypes + (subtype,) 1100 if existing_subtype in disallowed_subtypes: 1101 raise ValueError("Cannot convert {} to {}".format( 1102 existing_subtype, subtype)) 1103 keep_headers = [] 1104 part_headers = [] 1105 for name, value in self._headers: 1106 if name.lower().startswith('content-'): 1107 part_headers.append((name, value)) 1108 else: 1109 keep_headers.append((name, value)) 1110 if part_headers: 1111 # There is existing content, move it to the first subpart. 1112 part = type(self)(policy=self.policy) 1113 part._headers = part_headers 1114 part._payload = self._payload 1115 self._payload = [part] 1116 else: 1117 self._payload = [] 1118 self._headers = keep_headers 1119 self['Content-Type'] = 'multipart/' + subtype 1120 if boundary is not None: 1121 self.set_param('boundary', boundary) 1122 1123 def make_related(self, boundary=None): 1124 self._make_multipart('related', ('alternative', 'mixed'), boundary) 1125 1126 def make_alternative(self, boundary=None): 1127 self._make_multipart('alternative', ('mixed',), boundary) 1128 1129 def make_mixed(self, boundary=None): 1130 self._make_multipart('mixed', (), boundary) 1131 1132 def _add_multipart(self, _subtype, *args, _disp=None, **kw): 1133 if (self.get_content_maintype() != 'multipart' or 1134 self.get_content_subtype() != _subtype): 1135 getattr(self, 'make_' + _subtype)() 1136 part = type(self)(policy=self.policy) 1137 part.set_content(*args, **kw) 1138 if _disp and 'content-disposition' not in part: 1139 part['Content-Disposition'] = _disp 1140 self.attach(part) 1141 1142 def add_related(self, *args, **kw): 1143 self._add_multipart('related', *args, _disp='inline', **kw) 1144 1145 def add_alternative(self, *args, **kw): 1146 self._add_multipart('alternative', *args, **kw) 1147 1148 def add_attachment(self, *args, **kw): 1149 self._add_multipart('mixed', *args, _disp='attachment', **kw) 1150 1151 def clear(self): 1152 self._headers = [] 1153 self._payload = None 1154 1155 def clear_content(self): 1156 self._headers = [(n, v) for n, v in self._headers 1157 if not n.lower().startswith('content-')] 1158 self._payload = None 1159 1160 1161 class EmailMessage(MIMEPart): 1162 1163 def set_content(self, *args, **kw): 1164 super().set_content(*args, **kw) 1165 if 'MIME-Version' not in self: 1166 self['MIME-Version'] = '1.0' 1167