1 # Copyright (C) 2001-2010 Python Software Foundation 2 # Author: Barry Warsaw 3 # Contact: email-sig (at] python.org 4 5 """Miscellaneous utilities.""" 6 7 __all__ = [ 8 'collapse_rfc2231_value', 9 'decode_params', 10 'decode_rfc2231', 11 'encode_rfc2231', 12 'formataddr', 13 'formatdate', 14 'getaddresses', 15 'make_msgid', 16 'mktime_tz', 17 'parseaddr', 18 'parsedate', 19 'parsedate_tz', 20 'unquote', 21 ] 22 23 import os 24 import re 25 import time 26 import base64 27 import random 28 import socket 29 import urllib 30 import warnings 31 32 from email._parseaddr import quote 33 from email._parseaddr import AddressList as _AddressList 34 from email._parseaddr import mktime_tz 35 36 # We need wormarounds for bugs in these methods in older Pythons (see below) 37 from email._parseaddr import parsedate as _parsedate 38 from email._parseaddr import parsedate_tz as _parsedate_tz 39 40 from quopri import decodestring as _qdecode 41 42 # Intrapackage imports 43 from email.encoders import _bencode, _qencode 44 45 COMMASPACE = ', ' 46 EMPTYSTRING = '' 47 UEMPTYSTRING = u'' 48 CRLF = '\r\n' 49 TICK = "'" 50 51 specialsre = re.compile(r'[][\\()<>@,:;".]') 52 escapesre = re.compile(r'[][\\()"]') 53 54 55 57 # Helpers 58 59 def _identity(s): 60 return s 61 62 63 def _bdecode(s): 64 """Decodes a base64 string. 65 66 This function is equivalent to base64.decodestring and it's retained only 67 for backward compatibility. It used to remove the last \\n of the decoded 68 string, if it had any (see issue 7143). 69 """ 70 if not s: 71 return s 72 return base64.decodestring(s) 73 74 75 77 def fix_eols(s): 78 """Replace all line-ending characters with \\r\\n.""" 79 # Fix newlines with no preceding carriage return 80 s = re.sub(r'(?<!\r)\n', CRLF, s) 81 # Fix carriage returns with no following newline 82 s = re.sub(r'\r(?!\n)', CRLF, s) 83 return s 84 85 86 88 def formataddr(pair): 89 """The inverse of parseaddr(), this takes a 2-tuple of the form 90 (realname, email_address) and returns the string value suitable 91 for an RFC 2822 From, To or Cc header. 92 93 If the first element of pair is false, then the second element is 94 returned unmodified. 95 """ 96 name, address = pair 97 if name: 98 quotes = '' 99 if specialsre.search(name): 100 quotes = '"' 101 name = escapesre.sub(r'\\\g<0>', name) 102 return '%s%s%s <%s>' % (quotes, name, quotes, address) 103 return address 104 105 106 108 def getaddresses(fieldvalues): 109 """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" 110 all = COMMASPACE.join(fieldvalues) 111 a = _AddressList(all) 112 return a.addresslist 113 114 115 117 ecre = re.compile(r''' 118 =\? # literal =? 119 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset 120 \? # literal ? 121 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive 122 \? # literal ? 123 (?P<atom>.*?) # non-greedy up to the next ?= is the atom 124 \?= # literal ?= 125 ''', re.VERBOSE | re.IGNORECASE) 126 127 128 130 def formatdate(timeval=None, localtime=False, usegmt=False): 131 """Returns a date string as specified by RFC 2822, e.g.: 132 133 Fri, 09 Nov 2001 01:08:47 -0000 134 135 Optional timeval if given is a floating point time value as accepted by 136 gmtime() and localtime(), otherwise the current time is used. 137 138 Optional localtime is a flag that when True, interprets timeval, and 139 returns a date relative to the local timezone instead of UTC, properly 140 taking daylight savings time into account. 141 142 Optional argument usegmt means that the timezone is written out as 143 an ascii string, not numeric one (so "GMT" instead of "+0000"). This 144 is needed for HTTP, and is only used when localtime==False. 145 """ 146 # Note: we cannot use strftime() because that honors the locale and RFC 147 # 2822 requires that day and month names be the English abbreviations. 148 if timeval is None: 149 timeval = time.time() 150 if localtime: 151 now = time.localtime(timeval) 152 # Calculate timezone offset, based on whether the local zone has 153 # daylight savings time, and whether DST is in effect. 154 if time.daylight and now[-1]: 155 offset = time.altzone 156 else: 157 offset = time.timezone 158 hours, minutes = divmod(abs(offset), 3600) 159 # Remember offset is in seconds west of UTC, but the timezone is in 160 # minutes east of UTC, so the signs differ. 161 if offset > 0: 162 sign = '-' 163 else: 164 sign = '+' 165 zone = '%s%02d%02d' % (sign, hours, minutes // 60) 166 else: 167 now = time.gmtime(timeval) 168 # Timezone offset is always -0000 169 if usegmt: 170 zone = 'GMT' 171 else: 172 zone = '-0000' 173 return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( 174 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], 175 now[2], 176 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 177 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], 178 now[0], now[3], now[4], now[5], 179 zone) 180 181 182 184 def make_msgid(idstring=None): 185 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: 186 187 <142480216486.20800.16526388040877946887 (at] nightshade.la.mastaler.com> 188 189 Optional idstring if given is a string used to strengthen the 190 uniqueness of the message id. 191 """ 192 timeval = int(time.time()*100) 193 pid = os.getpid() 194 randint = random.getrandbits(64) 195 if idstring is None: 196 idstring = '' 197 else: 198 idstring = '.' + idstring 199 idhost = socket.getfqdn() 200 msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost) 201 return msgid 202 203 204 206 # These functions are in the standalone mimelib version only because they've 207 # subsequently been fixed in the latest Python versions. We use this to worm 208 # around broken older Pythons. 209 def parsedate(data): 210 if not data: 211 return None 212 return _parsedate(data) 213 214 215 def parsedate_tz(data): 216 if not data: 217 return None 218 return _parsedate_tz(data) 219 220 221 def parseaddr(addr): 222 addrs = _AddressList(addr).addresslist 223 if not addrs: 224 return '', '' 225 return addrs[0] 226 227 228 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. 229 def unquote(str): 230 """Remove quotes from a string.""" 231 if len(str) > 1: 232 if str.startswith('"') and str.endswith('"'): 233 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') 234 if str.startswith('<') and str.endswith('>'): 235 return str[1:-1] 236 return str 237 238 239 241 # RFC2231-related functions - parameter encoding and decoding 242 def decode_rfc2231(s): 243 """Decode string according to RFC 2231""" 244 parts = s.split(TICK, 2) 245 if len(parts) <= 2: 246 return None, None, s 247 return parts 248 249 250 def encode_rfc2231(s, charset=None, language=None): 251 """Encode string according to RFC 2231. 252 253 If neither charset nor language is given, then s is returned as-is. If 254 charset is given but not language, the string is encoded using the empty 255 string for language. 256 """ 257 import urllib 258 s = urllib.quote(s, safe='') 259 if charset is None and language is None: 260 return s 261 if language is None: 262 language = '' 263 return "%s'%s'%s" % (charset, language, s) 264 265 266 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') 267 268 def decode_params(params): 269 """Decode parameters list according to RFC 2231. 270 271 params is a sequence of 2-tuples containing (param name, string value). 272 """ 273 # Copy params so we don't mess with the original 274 params = params[:] 275 new_params = [] 276 # Map parameter's name to a list of continuations. The values are a 277 # 3-tuple of the continuation number, the string value, and a flag 278 # specifying whether a particular segment is %-encoded. 279 rfc2231_params = {} 280 name, value = params.pop(0) 281 new_params.append((name, value)) 282 while params: 283 name, value = params.pop(0) 284 if name.endswith('*'): 285 encoded = True 286 else: 287 encoded = False 288 value = unquote(value) 289 mo = rfc2231_continuation.match(name) 290 if mo: 291 name, num = mo.group('name', 'num') 292 if num is not None: 293 num = int(num) 294 rfc2231_params.setdefault(name, []).append((num, value, encoded)) 295 else: 296 new_params.append((name, '"%s"' % quote(value))) 297 if rfc2231_params: 298 for name, continuations in rfc2231_params.items(): 299 value = [] 300 extended = False 301 # Sort by number 302 continuations.sort() 303 # And now append all values in numerical order, converting 304 # %-encodings for the encoded segments. If any of the 305 # continuation names ends in a *, then the entire string, after 306 # decoding segments and concatenating, must have the charset and 307 # language specifiers at the beginning of the string. 308 for num, s, encoded in continuations: 309 if encoded: 310 s = urllib.unquote(s) 311 extended = True 312 value.append(s) 313 value = quote(EMPTYSTRING.join(value)) 314 if extended: 315 charset, language, value = decode_rfc2231(value) 316 new_params.append((name, (charset, language, '"%s"' % value))) 317 else: 318 new_params.append((name, '"%s"' % value)) 319 return new_params 320 321 def collapse_rfc2231_value(value, errors='replace', 322 fallback_charset='us-ascii'): 323 if isinstance(value, tuple): 324 rawval = unquote(value[2]) 325 charset = value[0] or 'us-ascii' 326 try: 327 return unicode(rawval, charset, errors) 328 except LookupError: 329 # XXX charset is unknown to Python. 330 return unicode(rawval, fallback_charset, errors) 331 else: 332 return unquote(value) 333