Home | History | Annotate | Download | only in email
      1 # Copyright (C) 2001-2010 Python Software Foundation
      2 # Author: Barry Warsaw
      3 # Contact: email-sig (at] python.org
      4 
      5 """Miscellaneous utilities."""
      6 
      7 __all__ = [
      8     'collapse_rfc2231_value',
      9     'decode_params',
     10     'decode_rfc2231',
     11     'encode_rfc2231',
     12     'formataddr',
     13     'formatdate',
     14     'getaddresses',
     15     'make_msgid',
     16     'mktime_tz',
     17     'parseaddr',
     18     'parsedate',
     19     'parsedate_tz',
     20     'unquote',
     21     ]
     22 
     23 import os
     24 import re
     25 import time
     26 import base64
     27 import random
     28 import socket
     29 import urllib
     30 import warnings
     31 
     32 from email._parseaddr import quote
     33 from email._parseaddr import AddressList as _AddressList
     34 from email._parseaddr import mktime_tz
     35 
     36 # We need wormarounds for bugs in these methods in older Pythons (see below)
     37 from email._parseaddr import parsedate as _parsedate
     38 from email._parseaddr import parsedate_tz as _parsedate_tz
     39 
     40 from quopri import decodestring as _qdecode
     41 
     42 # Intrapackage imports
     43 from email.encoders import _bencode, _qencode
     44 
     45 COMMASPACE = ', '
     46 EMPTYSTRING = ''
     47 UEMPTYSTRING = u''
     48 CRLF = '\r\n'
     49 TICK = "'"
     50 
     51 specialsre = re.compile(r'[][\\()<>@,:;".]')
     52 escapesre = re.compile(r'[][\\()"]')
     53 
     54 
     55 
     57 # Helpers
     58 
     59 def _identity(s):
     60     return s
     61 
     62 
     63 def _bdecode(s):
     64     """Decodes a base64 string.
     65 
     66     This function is equivalent to base64.decodestring and it's retained only
     67     for backward compatibility. It used to remove the last \\n of the decoded
     68     string, if it had any (see issue 7143).
     69     """
     70     if not s:
     71         return s
     72     return base64.decodestring(s)
     73 
     74 
     75 
     77 def fix_eols(s):
     78     """Replace all line-ending characters with \\r\\n."""
     79     # Fix newlines with no preceding carriage return
     80     s = re.sub(r'(?<!\r)\n', CRLF, s)
     81     # Fix carriage returns with no following newline
     82     s = re.sub(r'\r(?!\n)', CRLF, s)
     83     return s
     84 
     85 
     86 
     88 def formataddr(pair):
     89     """The inverse of parseaddr(), this takes a 2-tuple of the form
     90     (realname, email_address) and returns the string value suitable
     91     for an RFC 2822 From, To or Cc header.
     92 
     93     If the first element of pair is false, then the second element is
     94     returned unmodified.
     95     """
     96     name, address = pair
     97     if name:
     98         quotes = ''
     99         if specialsre.search(name):
    100             quotes = '"'
    101         name = escapesre.sub(r'\\\g<0>', name)
    102         return '%s%s%s <%s>' % (quotes, name, quotes, address)
    103     return address
    104 
    105 
    106 
    108 def getaddresses(fieldvalues):
    109     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    110     all = COMMASPACE.join(fieldvalues)
    111     a = _AddressList(all)
    112     return a.addresslist
    113 
    114 
    115 
    117 ecre = re.compile(r'''
    118   =\?                   # literal =?
    119   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
    120   \?                    # literal ?
    121   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
    122   \?                    # literal ?
    123   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
    124   \?=                   # literal ?=
    125   ''', re.VERBOSE | re.IGNORECASE)
    126 
    127 
    128 
    130 def formatdate(timeval=None, localtime=False, usegmt=False):
    131     """Returns a date string as specified by RFC 2822, e.g.:
    132 
    133     Fri, 09 Nov 2001 01:08:47 -0000
    134 
    135     Optional timeval if given is a floating point time value as accepted by
    136     gmtime() and localtime(), otherwise the current time is used.
    137 
    138     Optional localtime is a flag that when True, interprets timeval, and
    139     returns a date relative to the local timezone instead of UTC, properly
    140     taking daylight savings time into account.
    141 
    142     Optional argument usegmt means that the timezone is written out as
    143     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
    144     is needed for HTTP, and is only used when localtime==False.
    145     """
    146     # Note: we cannot use strftime() because that honors the locale and RFC
    147     # 2822 requires that day and month names be the English abbreviations.
    148     if timeval is None:
    149         timeval = time.time()
    150     if localtime:
    151         now = time.localtime(timeval)
    152         # Calculate timezone offset, based on whether the local zone has
    153         # daylight savings time, and whether DST is in effect.
    154         if time.daylight and now[-1]:
    155             offset = time.altzone
    156         else:
    157             offset = time.timezone
    158         hours, minutes = divmod(abs(offset), 3600)
    159         # Remember offset is in seconds west of UTC, but the timezone is in
    160         # minutes east of UTC, so the signs differ.
    161         if offset > 0:
    162             sign = '-'
    163         else:
    164             sign = '+'
    165         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
    166     else:
    167         now = time.gmtime(timeval)
    168         # Timezone offset is always -0000
    169         if usegmt:
    170             zone = 'GMT'
    171         else:
    172             zone = '-0000'
    173     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
    174         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
    175         now[2],
    176         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    177          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
    178         now[0], now[3], now[4], now[5],
    179         zone)
    180 
    181 
    182 
    184 def make_msgid(idstring=None):
    185     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
    186 
    187     <20020201195627.33539.96671 (at] nightshade.la.mastaler.com>
    188 
    189     Optional idstring if given is a string used to strengthen the
    190     uniqueness of the message id.
    191     """
    192     timeval = time.time()
    193     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
    194     pid = os.getpid()
    195     randint = random.randrange(100000)
    196     if idstring is None:
    197         idstring = ''
    198     else:
    199         idstring = '.' + idstring
    200     idhost = socket.getfqdn()
    201     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
    202     return msgid
    203 
    204 
    205 
    207 # These functions are in the standalone mimelib version only because they've
    208 # subsequently been fixed in the latest Python versions.  We use this to worm
    209 # around broken older Pythons.
    210 def parsedate(data):
    211     if not data:
    212         return None
    213     return _parsedate(data)
    214 
    215 
    216 def parsedate_tz(data):
    217     if not data:
    218         return None
    219     return _parsedate_tz(data)
    220 
    221 
    222 def parseaddr(addr):
    223     addrs = _AddressList(addr).addresslist
    224     if not addrs:
    225         return '', ''
    226     return addrs[0]
    227 
    228 
    229 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
    230 def unquote(str):
    231     """Remove quotes from a string."""
    232     if len(str) > 1:
    233         if str.startswith('"') and str.endswith('"'):
    234             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
    235         if str.startswith('<') and str.endswith('>'):
    236             return str[1:-1]
    237     return str
    238 
    239 
    240 
    242 # RFC2231-related functions - parameter encoding and decoding
    243 def decode_rfc2231(s):
    244     """Decode string according to RFC 2231"""
    245     parts = s.split(TICK, 2)
    246     if len(parts) <= 2:
    247         return None, None, s
    248     return parts
    249 
    250 
    251 def encode_rfc2231(s, charset=None, language=None):
    252     """Encode string according to RFC 2231.
    253 
    254     If neither charset nor language is given, then s is returned as-is.  If
    255     charset is given but not language, the string is encoded using the empty
    256     string for language.
    257     """
    258     import urllib
    259     s = urllib.quote(s, safe='')
    260     if charset is None and language is None:
    261         return s
    262     if language is None:
    263         language = ''
    264     return "%s'%s'%s" % (charset, language, s)
    265 
    266 
    267 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
    268 
    269 def decode_params(params):
    270     """Decode parameters list according to RFC 2231.
    271 
    272     params is a sequence of 2-tuples containing (param name, string value).
    273     """
    274     # Copy params so we don't mess with the original
    275     params = params[:]
    276     new_params = []
    277     # Map parameter's name to a list of continuations.  The values are a
    278     # 3-tuple of the continuation number, the string value, and a flag
    279     # specifying whether a particular segment is %-encoded.
    280     rfc2231_params = {}
    281     name, value = params.pop(0)
    282     new_params.append((name, value))
    283     while params:
    284         name, value = params.pop(0)
    285         if name.endswith('*'):
    286             encoded = True
    287         else:
    288             encoded = False
    289         value = unquote(value)
    290         mo = rfc2231_continuation.match(name)
    291         if mo:
    292             name, num = mo.group('name', 'num')
    293             if num is not None:
    294                 num = int(num)
    295             rfc2231_params.setdefault(name, []).append((num, value, encoded))
    296         else:
    297             new_params.append((name, '"%s"' % quote(value)))
    298     if rfc2231_params:
    299         for name, continuations in rfc2231_params.items():
    300             value = []
    301             extended = False
    302             # Sort by number
    303             continuations.sort()
    304             # And now append all values in numerical order, converting
    305             # %-encodings for the encoded segments.  If any of the
    306             # continuation names ends in a *, then the entire string, after
    307             # decoding segments and concatenating, must have the charset and
    308             # language specifiers at the beginning of the string.
    309             for num, s, encoded in continuations:
    310                 if encoded:
    311                     s = urllib.unquote(s)
    312                     extended = True
    313                 value.append(s)
    314             value = quote(EMPTYSTRING.join(value))
    315             if extended:
    316                 charset, language, value = decode_rfc2231(value)
    317                 new_params.append((name, (charset, language, '"%s"' % value)))
    318             else:
    319                 new_params.append((name, '"%s"' % value))
    320     return new_params
    321 
    322 def collapse_rfc2231_value(value, errors='replace',
    323                            fallback_charset='us-ascii'):
    324     if isinstance(value, tuple):
    325         rawval = unquote(value[2])
    326         charset = value[0] or 'us-ascii'
    327         try:
    328             return unicode(rawval, charset, errors)
    329         except LookupError:
    330             # XXX charset is unknown to Python.
    331             return unicode(rawval, fallback_charset, errors)
    332     else:
    333         return unquote(value)
    334