Home | History | Annotate | Download | only in email
      1 """Representing and manipulating email headers via custom objects.
      2 
      3 This module provides an implementation of the HeaderRegistry API.
      4 The implementation is designed to flexibly follow RFC5322 rules.
      5 
      6 Eventually HeaderRegistry will be a public API, but it isn't yet,
      7 and will probably change some before that happens.
      8 
      9 """
     10 from types import MappingProxyType
     11 
     12 from email import utils
     13 from email import errors
     14 from email import _header_value_parser as parser
     15 
     16 class Address:
     17 
     18     def __init__(self, display_name='', username='', domain='', addr_spec=None):
     19         """Create an object representing a full email address.
     20 
     21         An address can have a 'display_name', a 'username', and a 'domain'.  In
     22         addition to specifying the username and domain separately, they may be
     23         specified together by using the addr_spec keyword *instead of* the
     24         username and domain keywords.  If an addr_spec string is specified it
     25         must be properly quoted according to RFC 5322 rules; an error will be
     26         raised if it is not.
     27 
     28         An Address object has display_name, username, domain, and addr_spec
     29         attributes, all of which are read-only.  The addr_spec and the string
     30         value of the object are both quoted according to RFC5322 rules, but
     31         without any Content Transfer Encoding.
     32 
     33         """
     34         # This clause with its potential 'raise' may only happen when an
     35         # application program creates an Address object using an addr_spec
     36         # keyword.  The email library code itself must always supply username
     37         # and domain.
     38         if addr_spec is not None:
     39             if username or domain:
     40                 raise TypeError("addrspec specified when username and/or "
     41                                 "domain also specified")
     42             a_s, rest = parser.get_addr_spec(addr_spec)
     43             if rest:
     44                 raise ValueError("Invalid addr_spec; only '{}' "
     45                                  "could be parsed from '{}'".format(
     46                                     a_s, addr_spec))
     47             if a_s.all_defects:
     48                 raise a_s.all_defects[0]
     49             username = a_s.local_part
     50             domain = a_s.domain
     51         self._display_name = display_name
     52         self._username = username
     53         self._domain = domain
     54 
     55     @property
     56     def display_name(self):
     57         return self._display_name
     58 
     59     @property
     60     def username(self):
     61         return self._username
     62 
     63     @property
     64     def domain(self):
     65         return self._domain
     66 
     67     @property
     68     def addr_spec(self):
     69         """The addr_spec (username@domain) portion of the address, quoted
     70         according to RFC 5322 rules, but with no Content Transfer Encoding.
     71         """
     72         nameset = set(self.username)
     73         if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
     74             lp = parser.quote_string(self.username)
     75         else:
     76             lp = self.username
     77         if self.domain:
     78             return lp + '@' + self.domain
     79         if not lp:
     80             return '<>'
     81         return lp
     82 
     83     def __repr__(self):
     84         return "{}(display_name={!r}, username={!r}, domain={!r})".format(
     85                         self.__class__.__name__,
     86                         self.display_name, self.username, self.domain)
     87 
     88     def __str__(self):
     89         nameset = set(self.display_name)
     90         if len(nameset) > len(nameset-parser.SPECIALS):
     91             disp = parser.quote_string(self.display_name)
     92         else:
     93             disp = self.display_name
     94         if disp:
     95             addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
     96             return "{} <{}>".format(disp, addr_spec)
     97         return self.addr_spec
     98 
     99     def __eq__(self, other):
    100         if type(other) != type(self):
    101             return False
    102         return (self.display_name == other.display_name and
    103                 self.username == other.username and
    104                 self.domain == other.domain)
    105 
    106 
    107 class Group:
    108 
    109     def __init__(self, display_name=None, addresses=None):
    110         """Create an object representing an address group.
    111 
    112         An address group consists of a display_name followed by colon and a
    113         list of addresses (see Address) terminated by a semi-colon.  The Group
    114         is created by specifying a display_name and a possibly empty list of
    115         Address objects.  A Group can also be used to represent a single
    116         address that is not in a group, which is convenient when manipulating
    117         lists that are a combination of Groups and individual Addresses.  In
    118         this case the display_name should be set to None.  In particular, the
    119         string representation of a Group whose display_name is None is the same
    120         as the Address object, if there is one and only one Address object in
    121         the addresses list.
    122 
    123         """
    124         self._display_name = display_name
    125         self._addresses = tuple(addresses) if addresses else tuple()
    126 
    127     @property
    128     def display_name(self):
    129         return self._display_name
    130 
    131     @property
    132     def addresses(self):
    133         return self._addresses
    134 
    135     def __repr__(self):
    136         return "{}(display_name={!r}, addresses={!r}".format(
    137                  self.__class__.__name__,
    138                  self.display_name, self.addresses)
    139 
    140     def __str__(self):
    141         if self.display_name is None and len(self.addresses)==1:
    142             return str(self.addresses[0])
    143         disp = self.display_name
    144         if disp is not None:
    145             nameset = set(disp)
    146             if len(nameset) > len(nameset-parser.SPECIALS):
    147                 disp = parser.quote_string(disp)
    148         adrstr = ", ".join(str(x) for x in self.addresses)
    149         adrstr = ' ' + adrstr if adrstr else adrstr
    150         return "{}:{};".format(disp, adrstr)
    151 
    152     def __eq__(self, other):
    153         if type(other) != type(self):
    154             return False
    155         return (self.display_name == other.display_name and
    156                 self.addresses == other.addresses)
    157 
    158 
    159 # Header Classes #
    160 
    161 class BaseHeader(str):
    162 
    163     """Base class for message headers.
    164 
    165     Implements generic behavior and provides tools for subclasses.
    166 
    167     A subclass must define a classmethod named 'parse' that takes an unfolded
    168     value string and a dictionary as its arguments.  The dictionary will
    169     contain one key, 'defects', initialized to an empty list.  After the call
    170     the dictionary must contain two additional keys: parse_tree, set to the
    171     parse tree obtained from parsing the header, and 'decoded', set to the
    172     string value of the idealized representation of the data from the value.
    173     (That is, encoded words are decoded, and values that have canonical
    174     representations are so represented.)
    175 
    176     The defects key is intended to collect parsing defects, which the message
    177     parser will subsequently dispose of as appropriate.  The parser should not,
    178     insofar as practical, raise any errors.  Defects should be added to the
    179     list instead.  The standard header parsers register defects for RFC
    180     compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
    181     errors.
    182 
    183     The parse method may add additional keys to the dictionary.  In this case
    184     the subclass must define an 'init' method, which will be passed the
    185     dictionary as its keyword arguments.  The method should use (usually by
    186     setting them as the value of similarly named attributes) and remove all the
    187     extra keys added by its parse method, and then use super to call its parent
    188     class with the remaining arguments and keywords.
    189 
    190     The subclass should also make sure that a 'max_count' attribute is defined
    191     that is either None or 1. XXX: need to better define this API.
    192 
    193     """
    194 
    195     def __new__(cls, name, value):
    196         kwds = {'defects': []}
    197         cls.parse(value, kwds)
    198         if utils._has_surrogates(kwds['decoded']):
    199             kwds['decoded'] = utils._sanitize(kwds['decoded'])
    200         self = str.__new__(cls, kwds['decoded'])
    201         del kwds['decoded']
    202         self.init(name, **kwds)
    203         return self
    204 
    205     def init(self, name, *, parse_tree, defects):
    206         self._name = name
    207         self._parse_tree = parse_tree
    208         self._defects = defects
    209 
    210     @property
    211     def name(self):
    212         return self._name
    213 
    214     @property
    215     def defects(self):
    216         return tuple(self._defects)
    217 
    218     def __reduce__(self):
    219         return (
    220             _reconstruct_header,
    221             (
    222                 self.__class__.__name__,
    223                 self.__class__.__bases__,
    224                 str(self),
    225             ),
    226             self.__dict__)
    227 
    228     @classmethod
    229     def _reconstruct(cls, value):
    230         return str.__new__(cls, value)
    231 
    232     def fold(self, *, policy):
    233         """Fold header according to policy.
    234 
    235         The parsed representation of the header is folded according to
    236         RFC5322 rules, as modified by the policy.  If the parse tree
    237         contains surrogateescaped bytes, the bytes are CTE encoded using
    238         the charset 'unknown-8bit".
    239 
    240         Any non-ASCII characters in the parse tree are CTE encoded using
    241         charset utf-8. XXX: make this a policy setting.
    242 
    243         The returned value is an ASCII-only string possibly containing linesep
    244         characters, and ending with a linesep character.  The string includes
    245         the header name and the ': ' separator.
    246 
    247         """
    248         # At some point we need to put fws here iif it was in the source.
    249         header = parser.Header([
    250             parser.HeaderLabel([
    251                 parser.ValueTerminal(self.name, 'header-name'),
    252                 parser.ValueTerminal(':', 'header-sep')]),
    253             ])
    254         if self._parse_tree:
    255             header.append(
    256                 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
    257         header.append(self._parse_tree)
    258         return header.fold(policy=policy)
    259 
    260 
    261 def _reconstruct_header(cls_name, bases, value):
    262     return type(cls_name, bases, {})._reconstruct(value)
    263 
    264 
    265 class UnstructuredHeader:
    266 
    267     max_count = None
    268     value_parser = staticmethod(parser.get_unstructured)
    269 
    270     @classmethod
    271     def parse(cls, value, kwds):
    272         kwds['parse_tree'] = cls.value_parser(value)
    273         kwds['decoded'] = str(kwds['parse_tree'])
    274 
    275 
    276 class UniqueUnstructuredHeader(UnstructuredHeader):
    277 
    278     max_count = 1
    279 
    280 
    281 class DateHeader:
    282 
    283     """Header whose value consists of a single timestamp.
    284 
    285     Provides an additional attribute, datetime, which is either an aware
    286     datetime using a timezone, or a naive datetime if the timezone
    287     in the input string is -0000.  Also accepts a datetime as input.
    288     The 'value' attribute is the normalized form of the timestamp,
    289     which means it is the output of format_datetime on the datetime.
    290     """
    291 
    292     max_count = None
    293 
    294     # This is used only for folding, not for creating 'decoded'.
    295     value_parser = staticmethod(parser.get_unstructured)
    296 
    297     @classmethod
    298     def parse(cls, value, kwds):
    299         if not value:
    300             kwds['defects'].append(errors.HeaderMissingRequiredValue())
    301             kwds['datetime'] = None
    302             kwds['decoded'] = ''
    303             kwds['parse_tree'] = parser.TokenList()
    304             return
    305         if isinstance(value, str):
    306             value = utils.parsedate_to_datetime(value)
    307         kwds['datetime'] = value
    308         kwds['decoded'] = utils.format_datetime(kwds['datetime'])
    309         kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
    310 
    311     def init(self, *args, **kw):
    312         self._datetime = kw.pop('datetime')
    313         super().init(*args, **kw)
    314 
    315     @property
    316     def datetime(self):
    317         return self._datetime
    318 
    319 
    320 class UniqueDateHeader(DateHeader):
    321 
    322     max_count = 1
    323 
    324 
    325 class AddressHeader:
    326 
    327     max_count = None
    328 
    329     @staticmethod
    330     def value_parser(value):
    331         address_list, value = parser.get_address_list(value)
    332         assert not value, 'this should not happen'
    333         return address_list
    334 
    335     @classmethod
    336     def parse(cls, value, kwds):
    337         if isinstance(value, str):
    338             # We are translating here from the RFC language (address/mailbox)
    339             # to our API language (group/address).
    340             kwds['parse_tree'] = address_list = cls.value_parser(value)
    341             groups = []
    342             for addr in address_list.addresses:
    343                 groups.append(Group(addr.display_name,
    344                                     [Address(mb.display_name or '',
    345                                              mb.local_part or '',
    346                                              mb.domain or '')
    347                                      for mb in addr.all_mailboxes]))
    348             defects = list(address_list.all_defects)
    349         else:
    350             # Assume it is Address/Group stuff
    351             if not hasattr(value, '__iter__'):
    352                 value = [value]
    353             groups = [Group(None, [item]) if not hasattr(item, 'addresses')
    354                                           else item
    355                                     for item in value]
    356             defects = []
    357         kwds['groups'] = groups
    358         kwds['defects'] = defects
    359         kwds['decoded'] = ', '.join([str(item) for item in groups])
    360         if 'parse_tree' not in kwds:
    361             kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
    362 
    363     def init(self, *args, **kw):
    364         self._groups = tuple(kw.pop('groups'))
    365         self._addresses = None
    366         super().init(*args, **kw)
    367 
    368     @property
    369     def groups(self):
    370         return self._groups
    371 
    372     @property
    373     def addresses(self):
    374         if self._addresses is None:
    375             self._addresses = tuple(address for group in self._groups
    376                                             for address in group.addresses)
    377         return self._addresses
    378 
    379 
    380 class UniqueAddressHeader(AddressHeader):
    381 
    382     max_count = 1
    383 
    384 
    385 class SingleAddressHeader(AddressHeader):
    386 
    387     @property
    388     def address(self):
    389         if len(self.addresses)!=1:
    390             raise ValueError(("value of single address header {} is not "
    391                 "a single address").format(self.name))
    392         return self.addresses[0]
    393 
    394 
    395 class UniqueSingleAddressHeader(SingleAddressHeader):
    396 
    397     max_count = 1
    398 
    399 
    400 class MIMEVersionHeader:
    401 
    402     max_count = 1
    403 
    404     value_parser = staticmethod(parser.parse_mime_version)
    405 
    406     @classmethod
    407     def parse(cls, value, kwds):
    408         kwds['parse_tree'] = parse_tree = cls.value_parser(value)
    409         kwds['decoded'] = str(parse_tree)
    410         kwds['defects'].extend(parse_tree.all_defects)
    411         kwds['major'] = None if parse_tree.minor is None else parse_tree.major
    412         kwds['minor'] = parse_tree.minor
    413         if parse_tree.minor is not None:
    414             kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
    415         else:
    416             kwds['version'] = None
    417 
    418     def init(self, *args, **kw):
    419         self._version = kw.pop('version')
    420         self._major = kw.pop('major')
    421         self._minor = kw.pop('minor')
    422         super().init(*args, **kw)
    423 
    424     @property
    425     def major(self):
    426         return self._major
    427 
    428     @property
    429     def minor(self):
    430         return self._minor
    431 
    432     @property
    433     def version(self):
    434         return self._version
    435 
    436 
    437 class ParameterizedMIMEHeader:
    438 
    439     # Mixin that handles the params dict.  Must be subclassed and
    440     # a property value_parser for the specific header provided.
    441 
    442     max_count = 1
    443 
    444     @classmethod
    445     def parse(cls, value, kwds):
    446         kwds['parse_tree'] = parse_tree = cls.value_parser(value)
    447         kwds['decoded'] = str(parse_tree)
    448         kwds['defects'].extend(parse_tree.all_defects)
    449         if parse_tree.params is None:
    450             kwds['params'] = {}
    451         else:
    452             # The MIME RFCs specify that parameter ordering is arbitrary.
    453             kwds['params'] = {utils._sanitize(name).lower():
    454                                     utils._sanitize(value)
    455                                for name, value in parse_tree.params}
    456 
    457     def init(self, *args, **kw):
    458         self._params = kw.pop('params')
    459         super().init(*args, **kw)
    460 
    461     @property
    462     def params(self):
    463         return MappingProxyType(self._params)
    464 
    465 
    466 class ContentTypeHeader(ParameterizedMIMEHeader):
    467 
    468     value_parser = staticmethod(parser.parse_content_type_header)
    469 
    470     def init(self, *args, **kw):
    471         super().init(*args, **kw)
    472         self._maintype = utils._sanitize(self._parse_tree.maintype)
    473         self._subtype = utils._sanitize(self._parse_tree.subtype)
    474 
    475     @property
    476     def maintype(self):
    477         return self._maintype
    478 
    479     @property
    480     def subtype(self):
    481         return self._subtype
    482 
    483     @property
    484     def content_type(self):
    485         return self.maintype + '/' + self.subtype
    486 
    487 
    488 class ContentDispositionHeader(ParameterizedMIMEHeader):
    489 
    490     value_parser = staticmethod(parser.parse_content_disposition_header)
    491 
    492     def init(self, *args, **kw):
    493         super().init(*args, **kw)
    494         cd = self._parse_tree.content_disposition
    495         self._content_disposition = cd if cd is None else utils._sanitize(cd)
    496 
    497     @property
    498     def content_disposition(self):
    499         return self._content_disposition
    500 
    501 
    502 class ContentTransferEncodingHeader:
    503 
    504     max_count = 1
    505 
    506     value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
    507 
    508     @classmethod
    509     def parse(cls, value, kwds):
    510         kwds['parse_tree'] = parse_tree = cls.value_parser(value)
    511         kwds['decoded'] = str(parse_tree)
    512         kwds['defects'].extend(parse_tree.all_defects)
    513 
    514     def init(self, *args, **kw):
    515         super().init(*args, **kw)
    516         self._cte = utils._sanitize(self._parse_tree.cte)
    517 
    518     @property
    519     def cte(self):
    520         return self._cte
    521 
    522 
    523 # The header factory #
    524 
    525 _default_header_map = {
    526     'subject':                      UniqueUnstructuredHeader,
    527     'date':                         UniqueDateHeader,
    528     'resent-date':                  DateHeader,
    529     'orig-date':                    UniqueDateHeader,
    530     'sender':                       UniqueSingleAddressHeader,
    531     'resent-sender':                SingleAddressHeader,
    532     'to':                           UniqueAddressHeader,
    533     'resent-to':                    AddressHeader,
    534     'cc':                           UniqueAddressHeader,
    535     'resent-cc':                    AddressHeader,
    536     'bcc':                          UniqueAddressHeader,
    537     'resent-bcc':                   AddressHeader,
    538     'from':                         UniqueAddressHeader,
    539     'resent-from':                  AddressHeader,
    540     'reply-to':                     UniqueAddressHeader,
    541     'mime-version':                 MIMEVersionHeader,
    542     'content-type':                 ContentTypeHeader,
    543     'content-disposition':          ContentDispositionHeader,
    544     'content-transfer-encoding':    ContentTransferEncodingHeader,
    545     }
    546 
    547 class HeaderRegistry:
    548 
    549     """A header_factory and header registry."""
    550 
    551     def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
    552                        use_default_map=True):
    553         """Create a header_factory that works with the Policy API.
    554 
    555         base_class is the class that will be the last class in the created
    556         header class's __bases__ list.  default_class is the class that will be
    557         used if "name" (see __call__) does not appear in the registry.
    558         use_default_map controls whether or not the default mapping of names to
    559         specialized classes is copied in to the registry when the factory is
    560         created.  The default is True.
    561 
    562         """
    563         self.registry = {}
    564         self.base_class = base_class
    565         self.default_class = default_class
    566         if use_default_map:
    567             self.registry.update(_default_header_map)
    568 
    569     def map_to_type(self, name, cls):
    570         """Register cls as the specialized class for handling "name" headers.
    571 
    572         """
    573         self.registry[name.lower()] = cls
    574 
    575     def __getitem__(self, name):
    576         cls = self.registry.get(name.lower(), self.default_class)
    577         return type('_'+cls.__name__, (cls, self.base_class), {})
    578 
    579     def __call__(self, name, value):
    580         """Create a header instance for header 'name' from 'value'.
    581 
    582         Creates a header instance by creating a specialized class for parsing
    583         and representing the specified header by combining the factory
    584         base_class with a specialized class from the registry or the
    585         default_class, and passing the name and value to the constructed
    586         class's constructor.
    587 
    588         """
    589         return self[name](name, value)
    590