Home | History | Annotate | Download | only in email
      1 # Copyright (C) 2001-2010 Python Software Foundation
      2 # Author: Barry Warsaw
      3 # Contact: email-sig (at] python.org
      4 
      5 """Miscellaneous utilities."""
      6 
      7 __all__ = [
      8     'collapse_rfc2231_value',
      9     'decode_params',
     10     'decode_rfc2231',
     11     'encode_rfc2231',
     12     'formataddr',
     13     'formatdate',
     14     'getaddresses',
     15     'make_msgid',
     16     'mktime_tz',
     17     'parseaddr',
     18     'parsedate',
     19     'parsedate_tz',
     20     'unquote',
     21     ]
     22 
     23 import os
     24 import re
     25 import time
     26 import base64
     27 import random
     28 import socket
     29 import urllib
     30 import warnings
     31 
     32 from email._parseaddr import quote
     33 from email._parseaddr import AddressList as _AddressList
     34 from email._parseaddr import mktime_tz
     35 
     36 # We need wormarounds for bugs in these methods in older Pythons (see below)
     37 from email._parseaddr import parsedate as _parsedate
     38 from email._parseaddr import parsedate_tz as _parsedate_tz
     39 
     40 from quopri import decodestring as _qdecode
     41 
     42 # Intrapackage imports
     43 from email.encoders import _bencode, _qencode
     44 
     45 COMMASPACE = ', '
     46 EMPTYSTRING = ''
     47 UEMPTYSTRING = u''
     48 CRLF = '\r\n'
     49 TICK = "'"
     50 
     51 specialsre = re.compile(r'[][\\()<>@,:;".]')
     52 escapesre = re.compile(r'[][\\()"]')
     53 
     54 
     55 
     57 # Helpers
     58 
     59 def _identity(s):
     60     return s
     61 
     62 
     63 def _bdecode(s):
     64     """Decodes a base64 string.
     65 
     66     This function is equivalent to base64.decodestring and it's retained only
     67     for backward compatibility. It used to remove the last \\n of the decoded
     68     string, if it had any (see issue 7143).
     69     """
     70     if not s:
     71         return s
     72     return base64.decodestring(s)
     73 
     74 
     75 
     77 def fix_eols(s):
     78     """Replace all line-ending characters with \\r\\n."""
     79     # Fix newlines with no preceding carriage return
     80     s = re.sub(r'(?<!\r)\n', CRLF, s)
     81     # Fix carriage returns with no following newline
     82     s = re.sub(r'\r(?!\n)', CRLF, s)
     83     return s
     84 
     85 
     86 
     88 def formataddr(pair):
     89     """The inverse of parseaddr(), this takes a 2-tuple of the form
     90     (realname, email_address) and returns the string value suitable
     91     for an RFC 2822 From, To or Cc header.
     92 
     93     If the first element of pair is false, then the second element is
     94     returned unmodified.
     95     """
     96     name, address = pair
     97     if name:
     98         quotes = ''
     99         if specialsre.search(name):
    100             quotes = '"'
    101         name = escapesre.sub(r'\\\g<0>', name)
    102         return '%s%s%s <%s>' % (quotes, name, quotes, address)
    103     return address
    104 
    105 
    106 
    108 def getaddresses(fieldvalues):
    109     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    110     all = COMMASPACE.join(fieldvalues)
    111     a = _AddressList(all)
    112     return a.addresslist
    113 
    114 
    115 
    117 ecre = re.compile(r'''
    118   =\?                   # literal =?
    119   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
    120   \?                    # literal ?
    121   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
    122   \?                    # literal ?
    123   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
    124   \?=                   # literal ?=
    125   ''', re.VERBOSE | re.IGNORECASE)
    126 
    127 
    128 
    130 def formatdate(timeval=None, localtime=False, usegmt=False):
    131     """Returns a date string as specified by RFC 2822, e.g.:
    132 
    133     Fri, 09 Nov 2001 01:08:47 -0000
    134 
    135     Optional timeval if given is a floating point time value as accepted by
    136     gmtime() and localtime(), otherwise the current time is used.
    137 
    138     Optional localtime is a flag that when True, interprets timeval, and
    139     returns a date relative to the local timezone instead of UTC, properly
    140     taking daylight savings time into account.
    141 
    142     Optional argument usegmt means that the timezone is written out as
    143     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
    144     is needed for HTTP, and is only used when localtime==False.
    145     """
    146     # Note: we cannot use strftime() because that honors the locale and RFC
    147     # 2822 requires that day and month names be the English abbreviations.
    148     if timeval is None:
    149         timeval = time.time()
    150     if localtime:
    151         now = time.localtime(timeval)
    152         # Calculate timezone offset, based on whether the local zone has
    153         # daylight savings time, and whether DST is in effect.
    154         if time.daylight and now[-1]:
    155             offset = time.altzone
    156         else:
    157             offset = time.timezone
    158         hours, minutes = divmod(abs(offset), 3600)
    159         # Remember offset is in seconds west of UTC, but the timezone is in
    160         # minutes east of UTC, so the signs differ.
    161         if offset > 0:
    162             sign = '-'
    163         else:
    164             sign = '+'
    165         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
    166     else:
    167         now = time.gmtime(timeval)
    168         # Timezone offset is always -0000
    169         if usegmt:
    170             zone = 'GMT'
    171         else:
    172             zone = '-0000'
    173     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
    174         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
    175         now[2],
    176         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    177          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
    178         now[0], now[3], now[4], now[5],
    179         zone)
    180 
    181 
    182 
    184 def make_msgid(idstring=None):
    185     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
    186 
    187     <142480216486.20800.16526388040877946887 (at] nightshade.la.mastaler.com>
    188 
    189     Optional idstring if given is a string used to strengthen the
    190     uniqueness of the message id.
    191     """
    192     timeval = int(time.time()*100)
    193     pid = os.getpid()
    194     randint = random.getrandbits(64)
    195     if idstring is None:
    196         idstring = ''
    197     else:
    198         idstring = '.' + idstring
    199     idhost = socket.getfqdn()
    200     msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost)
    201     return msgid
    202 
    203 
    204 
    206 # These functions are in the standalone mimelib version only because they've
    207 # subsequently been fixed in the latest Python versions.  We use this to worm
    208 # around broken older Pythons.
    209 def parsedate(data):
    210     if not data:
    211         return None
    212     return _parsedate(data)
    213 
    214 
    215 def parsedate_tz(data):
    216     if not data:
    217         return None
    218     return _parsedate_tz(data)
    219 
    220 
    221 def parseaddr(addr):
    222     addrs = _AddressList(addr).addresslist
    223     if not addrs:
    224         return '', ''
    225     return addrs[0]
    226 
    227 
    228 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
    229 def unquote(str):
    230     """Remove quotes from a string."""
    231     if len(str) > 1:
    232         if str.startswith('"') and str.endswith('"'):
    233             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
    234         if str.startswith('<') and str.endswith('>'):
    235             return str[1:-1]
    236     return str
    237 
    238 
    239 
    241 # RFC2231-related functions - parameter encoding and decoding
    242 def decode_rfc2231(s):
    243     """Decode string according to RFC 2231"""
    244     parts = s.split(TICK, 2)
    245     if len(parts) <= 2:
    246         return None, None, s
    247     return parts
    248 
    249 
    250 def encode_rfc2231(s, charset=None, language=None):
    251     """Encode string according to RFC 2231.
    252 
    253     If neither charset nor language is given, then s is returned as-is.  If
    254     charset is given but not language, the string is encoded using the empty
    255     string for language.
    256     """
    257     import urllib
    258     s = urllib.quote(s, safe='')
    259     if charset is None and language is None:
    260         return s
    261     if language is None:
    262         language = ''
    263     return "%s'%s'%s" % (charset, language, s)
    264 
    265 
    266 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
    267 
    268 def decode_params(params):
    269     """Decode parameters list according to RFC 2231.
    270 
    271     params is a sequence of 2-tuples containing (param name, string value).
    272     """
    273     # Copy params so we don't mess with the original
    274     params = params[:]
    275     new_params = []
    276     # Map parameter's name to a list of continuations.  The values are a
    277     # 3-tuple of the continuation number, the string value, and a flag
    278     # specifying whether a particular segment is %-encoded.
    279     rfc2231_params = {}
    280     name, value = params.pop(0)
    281     new_params.append((name, value))
    282     while params:
    283         name, value = params.pop(0)
    284         if name.endswith('*'):
    285             encoded = True
    286         else:
    287             encoded = False
    288         value = unquote(value)
    289         mo = rfc2231_continuation.match(name)
    290         if mo:
    291             name, num = mo.group('name', 'num')
    292             if num is not None:
    293                 num = int(num)
    294             rfc2231_params.setdefault(name, []).append((num, value, encoded))
    295         else:
    296             new_params.append((name, '"%s"' % quote(value)))
    297     if rfc2231_params:
    298         for name, continuations in rfc2231_params.items():
    299             value = []
    300             extended = False
    301             # Sort by number
    302             continuations.sort()
    303             # And now append all values in numerical order, converting
    304             # %-encodings for the encoded segments.  If any of the
    305             # continuation names ends in a *, then the entire string, after
    306             # decoding segments and concatenating, must have the charset and
    307             # language specifiers at the beginning of the string.
    308             for num, s, encoded in continuations:
    309                 if encoded:
    310                     s = urllib.unquote(s)
    311                     extended = True
    312                 value.append(s)
    313             value = quote(EMPTYSTRING.join(value))
    314             if extended:
    315                 charset, language, value = decode_rfc2231(value)
    316                 new_params.append((name, (charset, language, '"%s"' % value)))
    317             else:
    318                 new_params.append((name, '"%s"' % value))
    319     return new_params
    320 
    321 def collapse_rfc2231_value(value, errors='replace',
    322                            fallback_charset='us-ascii'):
    323     if isinstance(value, tuple):
    324         rawval = unquote(value[2])
    325         charset = value[0] or 'us-ascii'
    326         try:
    327             return unicode(rawval, charset, errors)
    328         except LookupError:
    329             # XXX charset is unknown to Python.
    330             return unicode(rawval, fallback_charset, errors)
    331     else:
    332         return unquote(value)
    333