Home | History | Annotate | Download | only in email
      1 # Copyright (C) 2001-2007 Python Software Foundation
      2 # Author: Barry Warsaw
      3 # Contact: email-sig (at] python.org
      4 
      5 """Basic message object for the email package object model."""
      6 
      7 __all__ = ['Message', 'EmailMessage']
      8 
      9 import re
     10 import uu
     11 import quopri
     12 from io import BytesIO, StringIO
     13 
     14 # Intrapackage imports
     15 from email import utils
     16 from email import errors
     17 from email._policybase import Policy, compat32
     18 from email import charset as _charset
     19 from email._encoded_words import decode_b
     20 Charset = _charset.Charset
     21 
     22 SEMISPACE = '; '
     23 
     24 # Regular expression that matches `special' characters in parameters, the
     25 # existence of which force quoting of the parameter value.
     26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
     27 
     28 
     29 def _splitparam(param):
     30     # Split header parameters.  BAW: this may be too simple.  It isn't
     31     # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
     32     # found in the wild.  We may eventually need a full fledged parser.
     33     # RDM: we might have a Header here; for now just stringify it.
     34     a, sep, b = str(param).partition(';')
     35     if not sep:
     36         return a.strip(), None
     37     return a.strip(), b.strip()
     38 
     40 def _formatparam(param, value=None, quote=True):
     41     """Convenience function to format and return a key=value pair.
     42 
     43     This will quote the value if needed or if quote is true.  If value is a
     44     three tuple (charset, language, value), it will be encoded according
     45     to RFC2231 rules.  If it contains non-ascii characters it will likewise
     46     be encoded according to RFC2231 rules, using the utf-8 charset and
     47     a null language.
     48     """
     49     if value is not None and len(value) > 0:
     50         # A tuple is used for RFC 2231 encoded parameter values where items
     51         # are (charset, language, value).  charset is a string, not a Charset
     52         # instance.  RFC 2231 encoded values are never quoted, per RFC.
     53         if isinstance(value, tuple):
     54             # Encode as per RFC 2231
     55             param += '*'
     56             value = utils.encode_rfc2231(value[2], value[0], value[1])
     57             return '%s=%s' % (param, value)
     58         else:
     59             try:
     60                 value.encode('ascii')
     61             except UnicodeEncodeError:
     62                 param += '*'
     63                 value = utils.encode_rfc2231(value, 'utf-8', '')
     64                 return '%s=%s' % (param, value)
     65         # BAW: Please check this.  I think that if quote is set it should
     66         # force quoting even if not necessary.
     67         if quote or tspecials.search(value):
     68             return '%s="%s"' % (param, utils.quote(value))
     69         else:
     70             return '%s=%s' % (param, value)
     71     else:
     72         return param
     73 
     74 def _parseparam(s):
     75     # RDM This might be a Header, so for now stringify it.
     76     s = ';' + str(s)
     77     plist = []
     78     while s[:1] == ';':
     79         s = s[1:]
     80         end = s.find(';')
     81         while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
     82             end = s.find(';', end + 1)
     83         if end < 0:
     84             end = len(s)
     85         f = s[:end]
     86         if '=' in f:
     87             i = f.index('=')
     88             f = f[:i].strip().lower() + '=' + f[i+1:].strip()
     89         plist.append(f.strip())
     90         s = s[end:]
     91     return plist
     92 
     93 
     94 def _unquotevalue(value):
     95     # This is different than utils.collapse_rfc2231_value() because it doesn't
     96     # try to convert the value to a unicode.  Message.get_param() and
     97     # Message.get_params() are both currently defined to return the tuple in
     98     # the face of RFC 2231 parameters.
     99     if isinstance(value, tuple):
    100         return value[0], value[1], utils.unquote(value[2])
    101     else:
    102         return utils.unquote(value)
    103 
    104 
    105 
    107 class Message:
    108     """Basic message object.
    109 
    110     A message object is defined as something that has a bunch of RFC 2822
    111     headers and a payload.  It may optionally have an envelope header
    112     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
    113     multipart or a message/rfc822), then the payload is a list of Message
    114     objects, otherwise it is a string.
    115 
    116     Message objects implement part of the `mapping' interface, which assumes
    117     there is exactly one occurrence of the header per message.  Some headers
    118     do in fact appear multiple times (e.g. Received) and for those headers,
    119     you must use the explicit API to set or get all the headers.  Not all of
    120     the mapping methods are implemented.
    121     """
    122     def __init__(self, policy=compat32):
    123         self.policy = policy
    124         self._headers = []
    125         self._unixfrom = None
    126         self._payload = None
    127         self._charset = None
    128         # Defaults for multipart messages
    129         self.preamble = self.epilogue = None
    130         self.defects = []
    131         # Default content type
    132         self._default_type = 'text/plain'
    133 
    134     def __str__(self):
    135         """Return the entire formatted message as a string.
    136         """
    137         return self.as_string()
    138 
    139     def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
    140         """Return the entire formatted message as a string.
    141 
    142         Optional 'unixfrom', when true, means include the Unix From_ envelope
    143         header.  For backward compatibility reasons, if maxheaderlen is
    144         not specified it defaults to 0, so you must override it explicitly
    145         if you want a different maxheaderlen.  'policy' is passed to the
    146         Generator instance used to serialize the mesasge; if it is not
    147         specified the policy associated with the message instance is used.
    148 
    149         If the message object contains binary data that is not encoded
    150         according to RFC standards, the non-compliant data will be replaced by
    151         unicode "unknown character" code points.
    152         """
    153         from email.generator import Generator
    154         policy = self.policy if policy is None else policy
    155         fp = StringIO()
    156         g = Generator(fp,
    157                       mangle_from_=False,
    158                       maxheaderlen=maxheaderlen,
    159                       policy=policy)
    160         g.flatten(self, unixfrom=unixfrom)
    161         return fp.getvalue()
    162 
    163     def __bytes__(self):
    164         """Return the entire formatted message as a bytes object.
    165         """
    166         return self.as_bytes()
    167 
    168     def as_bytes(self, unixfrom=False, policy=None):
    169         """Return the entire formatted message as a bytes object.
    170 
    171         Optional 'unixfrom', when true, means include the Unix From_ envelope
    172         header.  'policy' is passed to the BytesGenerator instance used to
    173         serialize the message; if not specified the policy associated with
    174         the message instance is used.
    175         """
    176         from email.generator import BytesGenerator
    177         policy = self.policy if policy is None else policy
    178         fp = BytesIO()
    179         g = BytesGenerator(fp, mangle_from_=False, policy=policy)
    180         g.flatten(self, unixfrom=unixfrom)
    181         return fp.getvalue()
    182 
    183     def is_multipart(self):
    184         """Return True if the message consists of multiple parts."""
    185         return isinstance(self._payload, list)
    186 
    187     #
    188     # Unix From_ line
    189     #
    190     def set_unixfrom(self, unixfrom):
    191         self._unixfrom = unixfrom
    192 
    193     def get_unixfrom(self):
    194         return self._unixfrom
    195 
    196     #
    197     # Payload manipulation.
    198     #
    199     def attach(self, payload):
    200         """Add the given payload to the current payload.
    201 
    202         The current payload will always be a list of objects after this method
    203         is called.  If you want to set the payload to a scalar object, use
    204         set_payload() instead.
    205         """
    206         if self._payload is None:
    207             self._payload = [payload]
    208         else:
    209             try:
    210                 self._payload.append(payload)
    211             except AttributeError:
    212                 raise TypeError("Attach is not valid on a message with a"
    213                                 " non-multipart payload")
    214 
    215     def get_payload(self, i=None, decode=False):
    216         """Return a reference to the payload.
    217 
    218         The payload will either be a list object or a string.  If you mutate
    219         the list object, you modify the message's payload in place.  Optional
    220         i returns that index into the payload.
    221 
    222         Optional decode is a flag indicating whether the payload should be
    223         decoded or not, according to the Content-Transfer-Encoding header
    224         (default is False).
    225 
    226         When True and the message is not a multipart, the payload will be
    227         decoded if this header's value is `quoted-printable' or `base64'.  If
    228         some other encoding is used, or the header is missing, or if the
    229         payload has bogus data (i.e. bogus base64 or uuencoded data), the
    230         payload is returned as-is.
    231 
    232         If the message is a multipart and the decode flag is True, then None
    233         is returned.
    234         """
    235         # Here is the logic table for this code, based on the email5.0.0 code:
    236         #   i     decode  is_multipart  result
    237         # ------  ------  ------------  ------------------------------
    238         #  None   True    True          None
    239         #   i     True    True          None
    240         #  None   False   True          _payload (a list)
    241         #   i     False   True          _payload element i (a Message)
    242         #   i     False   False         error (not a list)
    243         #   i     True    False         error (not a list)
    244         #  None   False   False         _payload
    245         #  None   True    False         _payload decoded (bytes)
    246         # Note that Barry planned to factor out the 'decode' case, but that
    247         # isn't so easy now that we handle the 8 bit data, which needs to be
    248         # converted in both the decode and non-decode path.
    249         if self.is_multipart():
    250             if decode:
    251                 return None
    252             if i is None:
    253                 return self._payload
    254             else:
    255                 return self._payload[i]
    256         # For backward compatibility, Use isinstance and this error message
    257         # instead of the more logical is_multipart test.
    258         if i is not None and not isinstance(self._payload, list):
    259             raise TypeError('Expected list, got %s' % type(self._payload))
    260         payload = self._payload
    261         # cte might be a Header, so for now stringify it.
    262         cte = str(self.get('content-transfer-encoding', '')).lower()
    263         # payload may be bytes here.
    264         if isinstance(payload, str):
    265             if utils._has_surrogates(payload):
    266                 bpayload = payload.encode('ascii', 'surrogateescape')
    267                 if not decode:
    268                     try:
    269                         payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
    270                     except LookupError:
    271                         payload = bpayload.decode('ascii', 'replace')
    272             elif decode:
    273                 try:
    274                     bpayload = payload.encode('ascii')
    275                 except UnicodeError:
    276                     # This won't happen for RFC compliant messages (messages
    277                     # containing only ASCII code points in the unicode input).
    278                     # If it does happen, turn the string into bytes in a way
    279                     # guaranteed not to fail.
    280                     bpayload = payload.encode('raw-unicode-escape')
    281         if not decode:
    282             return payload
    283         if cte == 'quoted-printable':
    284             return quopri.decodestring(bpayload)
    285         elif cte == 'base64':
    286             # XXX: this is a bit of a hack; decode_b should probably be factored
    287             # out somewhere, but I haven't figured out where yet.
    288             value, defects = decode_b(b''.join(bpayload.splitlines()))
    289             for defect in defects:
    290                 self.policy.handle_defect(self, defect)
    291             return value
    292         elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
    293             in_file = BytesIO(bpayload)
    294             out_file = BytesIO()
    295             try:
    296                 uu.decode(in_file, out_file, quiet=True)
    297                 return out_file.getvalue()
    298             except uu.Error:
    299                 # Some decoding problem
    300                 return bpayload
    301         if isinstance(payload, str):
    302             return bpayload
    303         return payload
    304 
    305     def set_payload(self, payload, charset=None):
    306         """Set the payload to the given value.
    307 
    308         Optional charset sets the message's default character set.  See
    309         set_charset() for details.
    310         """
    311         if hasattr(payload, 'encode'):
    312             if charset is None:
    313                 self._payload = payload
    314                 return
    315             if not isinstance(charset, Charset):
    316                 charset = Charset(charset)
    317             payload = payload.encode(charset.output_charset)
    318         if hasattr(payload, 'decode'):
    319             self._payload = payload.decode('ascii', 'surrogateescape')
    320         else:
    321             self._payload = payload
    322         if charset is not None:
    323             self.set_charset(charset)
    324 
    325     def set_charset(self, charset):
    326         """Set the charset of the payload to a given character set.
    327 
    328         charset can be a Charset instance, a string naming a character set, or
    329         None.  If it is a string it will be converted to a Charset instance.
    330         If charset is None, the charset parameter will be removed from the
    331         Content-Type field.  Anything else will generate a TypeError.
    332 
    333         The message will be assumed to be of type text/* encoded with
    334         charset.input_charset.  It will be converted to charset.output_charset
    335         and encoded properly, if needed, when generating the plain text
    336         representation of the message.  MIME headers (MIME-Version,
    337         Content-Type, Content-Transfer-Encoding) will be added as needed.
    338         """
    339         if charset is None:
    340             self.del_param('charset')
    341             self._charset = None
    342             return
    343         if not isinstance(charset, Charset):
    344             charset = Charset(charset)
    345         self._charset = charset
    346         if 'MIME-Version' not in self:
    347             self.add_header('MIME-Version', '1.0')
    348         if 'Content-Type' not in self:
    349             self.add_header('Content-Type', 'text/plain',
    350                             charset=charset.get_output_charset())
    351         else:
    352             self.set_param('charset', charset.get_output_charset())
    353         if charset != charset.get_output_charset():
    354             self._payload = charset.body_encode(self._payload)
    355         if 'Content-Transfer-Encoding' not in self:
    356             cte = charset.get_body_encoding()
    357             try:
    358                 cte(self)
    359             except TypeError:
    360                 # This 'if' is for backward compatibility, it allows unicode
    361                 # through even though that won't work correctly if the
    362                 # message is serialized.
    363                 payload = self._payload
    364                 if payload:
    365                     try:
    366                         payload = payload.encode('ascii', 'surrogateescape')
    367                     except UnicodeError:
    368                         payload = payload.encode(charset.output_charset)
    369                 self._payload = charset.body_encode(payload)
    370                 self.add_header('Content-Transfer-Encoding', cte)
    371 
    372     def get_charset(self):
    373         """Return the Charset instance associated with the message's payload.
    374         """
    375         return self._charset
    376 
    377     #
    378     # MAPPING INTERFACE (partial)
    379     #
    380     def __len__(self):
    381         """Return the total number of headers, including duplicates."""
    382         return len(self._headers)
    383 
    384     def __getitem__(self, name):
    385         """Get a header value.
    386 
    387         Return None if the header is missing instead of raising an exception.
    388 
    389         Note that if the header appeared multiple times, exactly which
    390         occurrence gets returned is undefined.  Use get_all() to get all
    391         the values matching a header field name.
    392         """
    393         return self.get(name)
    394 
    395     def __setitem__(self, name, val):
    396         """Set the value of a header.
    397 
    398         Note: this does not overwrite an existing header with the same field
    399         name.  Use __delitem__() first to delete any existing headers.
    400         """
    401         max_count = self.policy.header_max_count(name)
    402         if max_count:
    403             lname = name.lower()
    404             found = 0
    405             for k, v in self._headers:
    406                 if k.lower() == lname:
    407                     found += 1
    408                     if found >= max_count:
    409                         raise ValueError("There may be at most {} {} headers "
    410                                          "in a message".format(max_count, name))
    411         self._headers.append(self.policy.header_store_parse(name, val))
    412 
    413     def __delitem__(self, name):
    414         """Delete all occurrences of a header, if present.
    415 
    416         Does not raise an exception if the header is missing.
    417         """
    418         name = name.lower()
    419         newheaders = []
    420         for k, v in self._headers:
    421             if k.lower() != name:
    422                 newheaders.append((k, v))
    423         self._headers = newheaders
    424 
    425     def __contains__(self, name):
    426         return name.lower() in [k.lower() for k, v in self._headers]
    427 
    428     def __iter__(self):
    429         for field, value in self._headers:
    430             yield field
    431 
    432     def keys(self):
    433         """Return a list of all the message's header field names.
    434 
    435         These will be sorted in the order they appeared in the original
    436         message, or were added to the message, and may contain duplicates.
    437         Any fields deleted and re-inserted are always appended to the header
    438         list.
    439         """
    440         return [k for k, v in self._headers]
    441 
    442     def values(self):
    443         """Return a list of all the message's header values.
    444 
    445         These will be sorted in the order they appeared in the original
    446         message, or were added to the message, and may contain duplicates.
    447         Any fields deleted and re-inserted are always appended to the header
    448         list.
    449         """
    450         return [self.policy.header_fetch_parse(k, v)
    451                 for k, v in self._headers]
    452 
    453     def items(self):
    454         """Get all the message's header fields and values.
    455 
    456         These will be sorted in the order they appeared in the original
    457         message, or were added to the message, and may contain duplicates.
    458         Any fields deleted and re-inserted are always appended to the header
    459         list.
    460         """
    461         return [(k, self.policy.header_fetch_parse(k, v))
    462                 for k, v in self._headers]
    463 
    464     def get(self, name, failobj=None):
    465         """Get a header value.
    466 
    467         Like __getitem__() but return failobj instead of None when the field
    468         is missing.
    469         """
    470         name = name.lower()
    471         for k, v in self._headers:
    472             if k.lower() == name:
    473                 return self.policy.header_fetch_parse(k, v)
    474         return failobj
    475 
    476     #
    477     # "Internal" methods (public API, but only intended for use by a parser
    478     # or generator, not normal application code.
    479     #
    480 
    481     def set_raw(self, name, value):
    482         """Store name and value in the model without modification.
    483 
    484         This is an "internal" API, intended only for use by a parser.
    485         """
    486         self._headers.append((name, value))
    487 
    488     def raw_items(self):
    489         """Return the (name, value) header pairs without modification.
    490 
    491         This is an "internal" API, intended only for use by a generator.
    492         """
    493         return iter(self._headers.copy())
    494 
    495     #
    496     # Additional useful stuff
    497     #
    498 
    499     def get_all(self, name, failobj=None):
    500         """Return a list of all the values for the named field.
    501 
    502         These will be sorted in the order they appeared in the original
    503         message, and may contain duplicates.  Any fields deleted and
    504         re-inserted are always appended to the header list.
    505 
    506         If no such fields exist, failobj is returned (defaults to None).
    507         """
    508         values = []
    509         name = name.lower()
    510         for k, v in self._headers:
    511             if k.lower() == name:
    512                 values.append(self.policy.header_fetch_parse(k, v))
    513         if not values:
    514             return failobj
    515         return values
    516 
    517     def add_header(self, _name, _value, **_params):
    518         """Extended header setting.
    519 
    520         name is the header field to add.  keyword arguments can be used to set
    521         additional parameters for the header field, with underscores converted
    522         to dashes.  Normally the parameter will be added as key="value" unless
    523         value is None, in which case only the key will be added.  If a
    524         parameter value contains non-ASCII characters it can be specified as a
    525         three-tuple of (charset, language, value), in which case it will be
    526         encoded according to RFC2231 rules.  Otherwise it will be encoded using
    527         the utf-8 charset and a language of ''.
    528 
    529         Examples:
    530 
    531         msg.add_header('content-disposition', 'attachment', filename='bud.gif')
    532         msg.add_header('content-disposition', 'attachment',
    533                        filename=('utf-8', '', Fuballer.ppt'))
    534         msg.add_header('content-disposition', 'attachment',
    535                        filename='Fuballer.ppt'))
    536         """
    537         parts = []
    538         for k, v in _params.items():
    539             if v is None:
    540                 parts.append(k.replace('_', '-'))
    541             else:
    542                 parts.append(_formatparam(k.replace('_', '-'), v))
    543         if _value is not None:
    544             parts.insert(0, _value)
    545         self[_name] = SEMISPACE.join(parts)
    546 
    547     def replace_header(self, _name, _value):
    548         """Replace a header.
    549 
    550         Replace the first matching header found in the message, retaining
    551         header order and case.  If no matching header was found, a KeyError is
    552         raised.
    553         """
    554         _name = _name.lower()
    555         for i, (k, v) in zip(range(len(self._headers)), self._headers):
    556             if k.lower() == _name:
    557                 self._headers[i] = self.policy.header_store_parse(k, _value)
    558                 break
    559         else:
    560             raise KeyError(_name)
    561 
    562     #
    563     # Use these three methods instead of the three above.
    564     #
    565 
    566     def get_content_type(self):
    567         """Return the message's content type.
    568 
    569         The returned string is coerced to lower case of the form
    570         `maintype/subtype'.  If there was no Content-Type header in the
    571         message, the default type as given by get_default_type() will be
    572         returned.  Since according to RFC 2045, messages always have a default
    573         type this will always return a value.
    574 
    575         RFC 2045 defines a message's default type to be text/plain unless it
    576         appears inside a multipart/digest container, in which case it would be
    577         message/rfc822.
    578         """
    579         missing = object()
    580         value = self.get('content-type', missing)
    581         if value is missing:
    582             # This should have no parameters
    583             return self.get_default_type()
    584         ctype = _splitparam(value)[0].lower()
    585         # RFC 2045, section 5.2 says if its invalid, use text/plain
    586         if ctype.count('/') != 1:
    587             return 'text/plain'
    588         return ctype
    589 
    590     def get_content_maintype(self):
    591         """Return the message's main content type.
    592 
    593         This is the `maintype' part of the string returned by
    594         get_content_type().
    595         """
    596         ctype = self.get_content_type()
    597         return ctype.split('/')[0]
    598 
    599     def get_content_subtype(self):
    600         """Returns the message's sub-content type.
    601 
    602         This is the `subtype' part of the string returned by
    603         get_content_type().
    604         """
    605         ctype = self.get_content_type()
    606         return ctype.split('/')[1]
    607 
    608     def get_default_type(self):
    609         """Return the `default' content type.
    610 
    611         Most messages have a default content type of text/plain, except for
    612         messages that are subparts of multipart/digest containers.  Such
    613         subparts have a default content type of message/rfc822.
    614         """
    615         return self._default_type
    616 
    617     def set_default_type(self, ctype):
    618         """Set the `default' content type.
    619 
    620         ctype should be either "text/plain" or "message/rfc822", although this
    621         is not enforced.  The default content type is not stored in the
    622         Content-Type header.
    623         """
    624         self._default_type = ctype
    625 
    626     def _get_params_preserve(self, failobj, header):
    627         # Like get_params() but preserves the quoting of values.  BAW:
    628         # should this be part of the public interface?
    629         missing = object()
    630         value = self.get(header, missing)
    631         if value is missing:
    632             return failobj
    633         params = []
    634         for p in _parseparam(value):
    635             try:
    636                 name, val = p.split('=', 1)
    637                 name = name.strip()
    638                 val = val.strip()
    639             except ValueError:
    640                 # Must have been a bare attribute
    641                 name = p.strip()
    642                 val = ''
    643             params.append((name, val))
    644         params = utils.decode_params(params)
    645         return params
    646 
    647     def get_params(self, failobj=None, header='content-type', unquote=True):
    648         """Return the message's Content-Type parameters, as a list.
    649 
    650         The elements of the returned list are 2-tuples of key/value pairs, as
    651         split on the `=' sign.  The left hand side of the `=' is the key,
    652         while the right hand side is the value.  If there is no `=' sign in
    653         the parameter the value is the empty string.  The value is as
    654         described in the get_param() method.
    655 
    656         Optional failobj is the object to return if there is no Content-Type
    657         header.  Optional header is the header to search instead of
    658         Content-Type.  If unquote is True, the value is unquoted.
    659         """
    660         missing = object()
    661         params = self._get_params_preserve(missing, header)
    662         if params is missing:
    663             return failobj
    664         if unquote:
    665             return [(k, _unquotevalue(v)) for k, v in params]
    666         else:
    667             return params
    668 
    669     def get_param(self, param, failobj=None, header='content-type',
    670                   unquote=True):
    671         """Return the parameter value if found in the Content-Type header.
    672 
    673         Optional failobj is the object to return if there is no Content-Type
    674         header, or the Content-Type header has no such parameter.  Optional
    675         header is the header to search instead of Content-Type.
    676 
    677         Parameter keys are always compared case insensitively.  The return
    678         value can either be a string, or a 3-tuple if the parameter was RFC
    679         2231 encoded.  When it's a 3-tuple, the elements of the value are of
    680         the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
    681         LANGUAGE can be None, in which case you should consider VALUE to be
    682         encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
    683         The parameter value (either the returned string, or the VALUE item in
    684         the 3-tuple) is always unquoted, unless unquote is set to False.
    685 
    686         If your application doesn't care whether the parameter was RFC 2231
    687         encoded, it can turn the return value into a string as follows:
    688 
    689             rawparam = msg.get_param('foo')
    690             param = email.utils.collapse_rfc2231_value(rawparam)
    691 
    692         """
    693         if header not in self:
    694             return failobj
    695         for k, v in self._get_params_preserve(failobj, header):
    696             if k.lower() == param.lower():
    697                 if unquote:
    698                     return _unquotevalue(v)
    699                 else:
    700                     return v
    701         return failobj
    702 
    703     def set_param(self, param, value, header='Content-Type', requote=True,
    704                   charset=None, language='', replace=False):
    705         """Set a parameter in the Content-Type header.
    706 
    707         If the parameter already exists in the header, its value will be
    708         replaced with the new value.
    709 
    710         If header is Content-Type and has not yet been defined for this
    711         message, it will be set to "text/plain" and the new parameter and
    712         value will be appended as per RFC 2045.
    713 
    714         An alternate header can be specified in the header argument, and all
    715         parameters will be quoted as necessary unless requote is False.
    716 
    717         If charset is specified, the parameter will be encoded according to RFC
    718         2231.  Optional language specifies the RFC 2231 language, defaulting
    719         to the empty string.  Both charset and language should be strings.
    720         """
    721         if not isinstance(value, tuple) and charset:
    722             value = (charset, language, value)
    723 
    724         if header not in self and header.lower() == 'content-type':
    725             ctype = 'text/plain'
    726         else:
    727             ctype = self.get(header)
    728         if not self.get_param(param, header=header):
    729             if not ctype:
    730                 ctype = _formatparam(param, value, requote)
    731             else:
    732                 ctype = SEMISPACE.join(
    733                     [ctype, _formatparam(param, value, requote)])
    734         else:
    735             ctype = ''
    736             for old_param, old_value in self.get_params(header=header,
    737                                                         unquote=requote):
    738                 append_param = ''
    739                 if old_param.lower() == param.lower():
    740                     append_param = _formatparam(param, value, requote)
    741                 else:
    742                     append_param = _formatparam(old_param, old_value, requote)
    743                 if not ctype:
    744                     ctype = append_param
    745                 else:
    746                     ctype = SEMISPACE.join([ctype, append_param])
    747         if ctype != self.get(header):
    748             if replace:
    749                 self.replace_header(header, ctype)
    750             else:
    751                 del self[header]
    752                 self[header] = ctype
    753 
    754     def del_param(self, param, header='content-type', requote=True):
    755         """Remove the given parameter completely from the Content-Type header.
    756 
    757         The header will be re-written in place without the parameter or its
    758         value. All values will be quoted as necessary unless requote is
    759         False.  Optional header specifies an alternative to the Content-Type
    760         header.
    761         """
    762         if header not in self:
    763             return
    764         new_ctype = ''
    765         for p, v in self.get_params(header=header, unquote=requote):
    766             if p.lower() != param.lower():
    767                 if not new_ctype:
    768                     new_ctype = _formatparam(p, v, requote)
    769                 else:
    770                     new_ctype = SEMISPACE.join([new_ctype,
    771                                                 _formatparam(p, v, requote)])
    772         if new_ctype != self.get(header):
    773             del self[header]
    774             self[header] = new_ctype
    775 
    776     def set_type(self, type, header='Content-Type', requote=True):
    777         """Set the main type and subtype for the Content-Type header.
    778 
    779         type must be a string in the form "maintype/subtype", otherwise a
    780         ValueError is raised.
    781 
    782         This method replaces the Content-Type header, keeping all the
    783         parameters in place.  If requote is False, this leaves the existing
    784         header's quoting as is.  Otherwise, the parameters will be quoted (the
    785         default).
    786 
    787         An alternative header can be specified in the header argument.  When
    788         the Content-Type header is set, we'll always also add a MIME-Version
    789         header.
    790         """
    791         # BAW: should we be strict?
    792         if not type.count('/') == 1:
    793             raise ValueError
    794         # Set the Content-Type, you get a MIME-Version
    795         if header.lower() == 'content-type':
    796             del self['mime-version']
    797             self['MIME-Version'] = '1.0'
    798         if header not in self:
    799             self[header] = type
    800             return
    801         params = self.get_params(header=header, unquote=requote)
    802         del self[header]
    803         self[header] = type
    804         # Skip the first param; it's the old type.
    805         for p, v in params[1:]:
    806             self.set_param(p, v, header, requote)
    807 
    808     def get_filename(self, failobj=None):
    809         """Return the filename associated with the payload if present.
    810 
    811         The filename is extracted from the Content-Disposition header's
    812         `filename' parameter, and it is unquoted.  If that header is missing
    813         the `filename' parameter, this method falls back to looking for the
    814         `name' parameter.
    815         """
    816         missing = object()
    817         filename = self.get_param('filename', missing, 'content-disposition')
    818         if filename is missing:
    819             filename = self.get_param('name', missing, 'content-type')
    820         if filename is missing:
    821             return failobj
    822         return utils.collapse_rfc2231_value(filename).strip()
    823 
    824     def get_boundary(self, failobj=None):
    825         """Return the boundary associated with the payload if present.
    826 
    827         The boundary is extracted from the Content-Type header's `boundary'
    828         parameter, and it is unquoted.
    829         """
    830         missing = object()
    831         boundary = self.get_param('boundary', missing)
    832         if boundary is missing:
    833             return failobj
    834         # RFC 2046 says that boundaries may begin but not end in w/s
    835         return utils.collapse_rfc2231_value(boundary).rstrip()
    836 
    837     def set_boundary(self, boundary):
    838         """Set the boundary parameter in Content-Type to 'boundary'.
    839 
    840         This is subtly different than deleting the Content-Type header and
    841         adding a new one with a new boundary parameter via add_header().  The
    842         main difference is that using the set_boundary() method preserves the
    843         order of the Content-Type header in the original message.
    844 
    845         HeaderParseError is raised if the message has no Content-Type header.
    846         """
    847         missing = object()
    848         params = self._get_params_preserve(missing, 'content-type')
    849         if params is missing:
    850             # There was no Content-Type header, and we don't know what type
    851             # to set it to, so raise an exception.
    852             raise errors.HeaderParseError('No Content-Type header found')
    853         newparams = []
    854         foundp = False
    855         for pk, pv in params:
    856             if pk.lower() == 'boundary':
    857                 newparams.append(('boundary', '"%s"' % boundary))
    858                 foundp = True
    859             else:
    860                 newparams.append((pk, pv))
    861         if not foundp:
    862             # The original Content-Type header had no boundary attribute.
    863             # Tack one on the end.  BAW: should we raise an exception
    864             # instead???
    865             newparams.append(('boundary', '"%s"' % boundary))
    866         # Replace the existing Content-Type header with the new value
    867         newheaders = []
    868         for h, v in self._headers:
    869             if h.lower() == 'content-type':
    870                 parts = []
    871                 for k, v in newparams:
    872                     if v == '':
    873                         parts.append(k)
    874                     else:
    875                         parts.append('%s=%s' % (k, v))
    876                 val = SEMISPACE.join(parts)
    877                 newheaders.append(self.policy.header_store_parse(h, val))
    878 
    879             else:
    880                 newheaders.append((h, v))
    881         self._headers = newheaders
    882 
    883     def get_content_charset(self, failobj=None):
    884         """Return the charset parameter of the Content-Type header.
    885 
    886         The returned string is always coerced to lower case.  If there is no
    887         Content-Type header, or if that header has no charset parameter,
    888         failobj is returned.
    889         """
    890         missing = object()
    891         charset = self.get_param('charset', missing)
    892         if charset is missing:
    893             return failobj
    894         if isinstance(charset, tuple):
    895             # RFC 2231 encoded, so decode it, and it better end up as ascii.
    896             pcharset = charset[0] or 'us-ascii'
    897             try:
    898                 # LookupError will be raised if the charset isn't known to
    899                 # Python.  UnicodeError will be raised if the encoded text
    900                 # contains a character not in the charset.
    901                 as_bytes = charset[2].encode('raw-unicode-escape')
    902                 charset = str(as_bytes, pcharset)
    903             except (LookupError, UnicodeError):
    904                 charset = charset[2]
    905         # charset characters must be in us-ascii range
    906         try:
    907             charset.encode('us-ascii')
    908         except UnicodeError:
    909             return failobj
    910         # RFC 2046, $4.1.2 says charsets are not case sensitive
    911         return charset.lower()
    912 
    913     def get_charsets(self, failobj=None):
    914         """Return a list containing the charset(s) used in this message.
    915 
    916         The returned list of items describes the Content-Type headers'
    917         charset parameter for this message and all the subparts in its
    918         payload.
    919 
    920         Each item will either be a string (the value of the charset parameter
    921         in the Content-Type header of that part) or the value of the
    922         'failobj' parameter (defaults to None), if the part does not have a
    923         main MIME type of "text", or the charset is not defined.
    924 
    925         The list will contain one string for each part of the message, plus
    926         one for the container message (i.e. self), so that a non-multipart
    927         message will still return a list of length 1.
    928         """
    929         return [part.get_content_charset(failobj) for part in self.walk()]
    930 
    931     def get_content_disposition(self):
    932         """Return the message's content-disposition if it exists, or None.
    933 
    934         The return values can be either 'inline', 'attachment' or None
    935         according to the rfc2183.
    936         """
    937         value = self.get('content-disposition')
    938         if value is None:
    939             return None
    940         c_d = _splitparam(value)[0].lower()
    941         return c_d
    942 
    943     # I.e. def walk(self): ...
    944     from email.iterators import walk
    945 
    946 
    947 class MIMEPart(Message):
    948 
    949     def __init__(self, policy=None):
    950         if policy is None:
    951             from email.policy import default
    952             policy = default
    953         Message.__init__(self, policy)
    954 
    955 
    956     def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
    957         """Return the entire formatted message as a string.
    958 
    959         Optional 'unixfrom', when true, means include the Unix From_ envelope
    960         header.  maxheaderlen is retained for backward compatibility with the
    961         base Message class, but defaults to None, meaning that the policy value
    962         for max_line_length controls the header maximum length.  'policy' is
    963         passed to the Generator instance used to serialize the mesasge; if it
    964         is not specified the policy associated with the message instance is
    965         used.
    966         """
    967         policy = self.policy if policy is None else policy
    968         if maxheaderlen is None:
    969             maxheaderlen = policy.max_line_length
    970         return super().as_string(maxheaderlen=maxheaderlen, policy=policy)
    971 
    972     def __str__(self):
    973         return self.as_string(policy=self.policy.clone(utf8=True))
    974 
    975     def is_attachment(self):
    976         c_d = self.get('content-disposition')
    977         return False if c_d is None else c_d.content_disposition == 'attachment'
    978 
    979     def _find_body(self, part, preferencelist):
    980         if part.is_attachment():
    981             return
    982         maintype, subtype = part.get_content_type().split('/')
    983         if maintype == 'text':
    984             if subtype in preferencelist:
    985                 yield (preferencelist.index(subtype), part)
    986             return
    987         if maintype != 'multipart':
    988             return
    989         if subtype != 'related':
    990             for subpart in part.iter_parts():
    991                 yield from self._find_body(subpart, preferencelist)
    992             return
    993         if 'related' in preferencelist:
    994             yield (preferencelist.index('related'), part)
    995         candidate = None
    996         start = part.get_param('start')
    997         if start:
    998             for subpart in part.iter_parts():
    999                 if subpart['content-id'] == start:
   1000                     candidate = subpart
   1001                     break
   1002         if candidate is None:
   1003             subparts = part.get_payload()
   1004             candidate = subparts[0] if subparts else None
   1005         if candidate is not None:
   1006             yield from self._find_body(candidate, preferencelist)
   1007 
   1008     def get_body(self, preferencelist=('related', 'html', 'plain')):
   1009         """Return best candidate mime part for display as 'body' of message.
   1010 
   1011         Do a depth first search, starting with self, looking for the first part
   1012         matching each of the items in preferencelist, and return the part
   1013         corresponding to the first item that has a match, or None if no items
   1014         have a match.  If 'related' is not included in preferencelist, consider
   1015         the root part of any multipart/related encountered as a candidate
   1016         match.  Ignore parts with 'Content-Disposition: attachment'.
   1017         """
   1018         best_prio = len(preferencelist)
   1019         body = None
   1020         for prio, part in self._find_body(self, preferencelist):
   1021             if prio < best_prio:
   1022                 best_prio = prio
   1023                 body = part
   1024                 if prio == 0:
   1025                     break
   1026         return body
   1027 
   1028     _body_types = {('text', 'plain'),
   1029                    ('text', 'html'),
   1030                    ('multipart', 'related'),
   1031                    ('multipart', 'alternative')}
   1032     def iter_attachments(self):
   1033         """Return an iterator over the non-main parts of a multipart.
   1034 
   1035         Skip the first of each occurrence of text/plain, text/html,
   1036         multipart/related, or multipart/alternative in the multipart (unless
   1037         they have a 'Content-Disposition: attachment' header) and include all
   1038         remaining subparts in the returned iterator.  When applied to a
   1039         multipart/related, return all parts except the root part.  Return an
   1040         empty iterator when applied to a multipart/alternative or a
   1041         non-multipart.
   1042         """
   1043         maintype, subtype = self.get_content_type().split('/')
   1044         if maintype != 'multipart' or subtype == 'alternative':
   1045             return
   1046         parts = self.get_payload().copy()
   1047         if maintype == 'multipart' and subtype == 'related':
   1048             # For related, we treat everything but the root as an attachment.
   1049             # The root may be indicated by 'start'; if there's no start or we
   1050             # can't find the named start, treat the first subpart as the root.
   1051             start = self.get_param('start')
   1052             if start:
   1053                 found = False
   1054                 attachments = []
   1055                 for part in parts:
   1056                     if part.get('content-id') == start:
   1057                         found = True
   1058                     else:
   1059                         attachments.append(part)
   1060                 if found:
   1061                     yield from attachments
   1062                     return
   1063             parts.pop(0)
   1064             yield from parts
   1065             return
   1066         # Otherwise we more or less invert the remaining logic in get_body.
   1067         # This only really works in edge cases (ex: non-text related or
   1068         # alternatives) if the sending agent sets content-disposition.
   1069         seen = []   # Only skip the first example of each candidate type.
   1070         for part in parts:
   1071             maintype, subtype = part.get_content_type().split('/')
   1072             if ((maintype, subtype) in self._body_types and
   1073                     not part.is_attachment() and subtype not in seen):
   1074                 seen.append(subtype)
   1075                 continue
   1076             yield part
   1077 
   1078     def iter_parts(self):
   1079         """Return an iterator over all immediate subparts of a multipart.
   1080 
   1081         Return an empty iterator for a non-multipart.
   1082         """
   1083         if self.get_content_maintype() == 'multipart':
   1084             yield from self.get_payload()
   1085 
   1086     def get_content(self, *args, content_manager=None, **kw):
   1087         if content_manager is None:
   1088             content_manager = self.policy.content_manager
   1089         return content_manager.get_content(self, *args, **kw)
   1090 
   1091     def set_content(self, *args, content_manager=None, **kw):
   1092         if content_manager is None:
   1093             content_manager = self.policy.content_manager
   1094         content_manager.set_content(self, *args, **kw)
   1095 
   1096     def _make_multipart(self, subtype, disallowed_subtypes, boundary):
   1097         if self.get_content_maintype() == 'multipart':
   1098             existing_subtype = self.get_content_subtype()
   1099             disallowed_subtypes = disallowed_subtypes + (subtype,)
   1100             if existing_subtype in disallowed_subtypes:
   1101                 raise ValueError("Cannot convert {} to {}".format(
   1102                     existing_subtype, subtype))
   1103         keep_headers = []
   1104         part_headers = []
   1105         for name, value in self._headers:
   1106             if name.lower().startswith('content-'):
   1107                 part_headers.append((name, value))
   1108             else:
   1109                 keep_headers.append((name, value))
   1110         if part_headers:
   1111             # There is existing content, move it to the first subpart.
   1112             part = type(self)(policy=self.policy)
   1113             part._headers = part_headers
   1114             part._payload = self._payload
   1115             self._payload = [part]
   1116         else:
   1117             self._payload = []
   1118         self._headers = keep_headers
   1119         self['Content-Type'] = 'multipart/' + subtype
   1120         if boundary is not None:
   1121             self.set_param('boundary', boundary)
   1122 
   1123     def make_related(self, boundary=None):
   1124         self._make_multipart('related', ('alternative', 'mixed'), boundary)
   1125 
   1126     def make_alternative(self, boundary=None):
   1127         self._make_multipart('alternative', ('mixed',), boundary)
   1128 
   1129     def make_mixed(self, boundary=None):
   1130         self._make_multipart('mixed', (), boundary)
   1131 
   1132     def _add_multipart(self, _subtype, *args, _disp=None, **kw):
   1133         if (self.get_content_maintype() != 'multipart' or
   1134                 self.get_content_subtype() != _subtype):
   1135             getattr(self, 'make_' + _subtype)()
   1136         part = type(self)(policy=self.policy)
   1137         part.set_content(*args, **kw)
   1138         if _disp and 'content-disposition' not in part:
   1139             part['Content-Disposition'] = _disp
   1140         self.attach(part)
   1141 
   1142     def add_related(self, *args, **kw):
   1143         self._add_multipart('related', *args, _disp='inline', **kw)
   1144 
   1145     def add_alternative(self, *args, **kw):
   1146         self._add_multipart('alternative', *args, **kw)
   1147 
   1148     def add_attachment(self, *args, **kw):
   1149         self._add_multipart('mixed', *args, _disp='attachment', **kw)
   1150 
   1151     def clear(self):
   1152         self._headers = []
   1153         self._payload = None
   1154 
   1155     def clear_content(self):
   1156         self._headers = [(n, v) for n, v in self._headers
   1157                          if not n.lower().startswith('content-')]
   1158         self._payload = None
   1159 
   1160 
   1161 class EmailMessage(MIMEPart):
   1162 
   1163     def set_content(self, *args, **kw):
   1164         super().set_content(*args, **kw)
   1165         if 'MIME-Version' not in self:
   1166             self['MIME-Version'] = '1.0'
   1167