1 # Copyright (C) 2001-2010 Python Software Foundation 2 # Author: Barry Warsaw 3 # Contact: email-sig (at] python.org 4 5 """Miscellaneous utilities.""" 6 7 __all__ = [ 8 'collapse_rfc2231_value', 9 'decode_params', 10 'decode_rfc2231', 11 'encode_rfc2231', 12 'formataddr', 13 'formatdate', 14 'getaddresses', 15 'make_msgid', 16 'mktime_tz', 17 'parseaddr', 18 'parsedate', 19 'parsedate_tz', 20 'unquote', 21 ] 22 23 import os 24 import re 25 import time 26 import base64 27 import random 28 import socket 29 import urllib 30 import warnings 31 32 from email._parseaddr import quote 33 from email._parseaddr import AddressList as _AddressList 34 from email._parseaddr import mktime_tz 35 36 # We need wormarounds for bugs in these methods in older Pythons (see below) 37 from email._parseaddr import parsedate as _parsedate 38 from email._parseaddr import parsedate_tz as _parsedate_tz 39 40 from quopri import decodestring as _qdecode 41 42 # Intrapackage imports 43 from email.encoders import _bencode, _qencode 44 45 COMMASPACE = ', ' 46 EMPTYSTRING = '' 47 UEMPTYSTRING = u'' 48 CRLF = '\r\n' 49 TICK = "'" 50 51 specialsre = re.compile(r'[][\\()<>@,:;".]') 52 escapesre = re.compile(r'[][\\()"]') 53 54 55 57 # Helpers 58 59 def _identity(s): 60 return s 61 62 63 def _bdecode(s): 64 """Decodes a base64 string. 65 66 This function is equivalent to base64.decodestring and it's retained only 67 for backward compatibility. It used to remove the last \n of the decoded 68 string, if it had any (see issue 7143). 69 """ 70 if not s: 71 return s 72 return base64.decodestring(s) 73 74 75 77 def fix_eols(s): 78 """Replace all line-ending characters with \r\n.""" 79 # Fix newlines with no preceding carriage return 80 s = re.sub(r'(?<!\r)\n', CRLF, s) 81 # Fix carriage returns with no following newline 82 s = re.sub(r'\r(?!\n)', CRLF, s) 83 return s 84 85 86 88 def formataddr(pair): 89 """The inverse of parseaddr(), this takes a 2-tuple of the form 90 (realname, email_address) and returns the string value suitable 91 for an RFC 2822 From, To or Cc header. 92 93 If the first element of pair is false, then the second element is 94 returned unmodified. 95 """ 96 name, address = pair 97 if name: 98 quotes = '' 99 if specialsre.search(name): 100 quotes = '"' 101 name = escapesre.sub(r'\\\g<0>', name) 102 return '%s%s%s <%s>' % (quotes, name, quotes, address) 103 return address 104 105 106 108 def getaddresses(fieldvalues): 109 """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" 110 all = COMMASPACE.join(fieldvalues) 111 a = _AddressList(all) 112 return a.addresslist 113 114 115 117 ecre = re.compile(r''' 118 =\? # literal =? 119 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset 120 \? # literal ? 121 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive 122 \? # literal ? 123 (?P<atom>.*?) # non-greedy up to the next ?= is the atom 124 \?= # literal ?= 125 ''', re.VERBOSE | re.IGNORECASE) 126 127 128 130 def formatdate(timeval=None, localtime=False, usegmt=False): 131 """Returns a date string as specified by RFC 2822, e.g.: 132 133 Fri, 09 Nov 2001 01:08:47 -0000 134 135 Optional timeval if given is a floating point time value as accepted by 136 gmtime() and localtime(), otherwise the current time is used. 137 138 Optional localtime is a flag that when True, interprets timeval, and 139 returns a date relative to the local timezone instead of UTC, properly 140 taking daylight savings time into account. 141 142 Optional argument usegmt means that the timezone is written out as 143 an ascii string, not numeric one (so "GMT" instead of "+0000"). This 144 is needed for HTTP, and is only used when localtime==False. 145 """ 146 # Note: we cannot use strftime() because that honors the locale and RFC 147 # 2822 requires that day and month names be the English abbreviations. 148 if timeval is None: 149 timeval = time.time() 150 if localtime: 151 now = time.localtime(timeval) 152 # Calculate timezone offset, based on whether the local zone has 153 # daylight savings time, and whether DST is in effect. 154 if time.daylight and now[-1]: 155 offset = time.altzone 156 else: 157 offset = time.timezone 158 hours, minutes = divmod(abs(offset), 3600) 159 # Remember offset is in seconds west of UTC, but the timezone is in 160 # minutes east of UTC, so the signs differ. 161 if offset > 0: 162 sign = '-' 163 else: 164 sign = '+' 165 zone = '%s%02d%02d' % (sign, hours, minutes // 60) 166 else: 167 now = time.gmtime(timeval) 168 # Timezone offset is always -0000 169 if usegmt: 170 zone = 'GMT' 171 else: 172 zone = '-0000' 173 return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( 174 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], 175 now[2], 176 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 177 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], 178 now[0], now[3], now[4], now[5], 179 zone) 180 181 182 184 def make_msgid(idstring=None): 185 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: 186 187 <20020201195627.33539.96671 (at] nightshade.la.mastaler.com> 188 189 Optional idstring if given is a string used to strengthen the 190 uniqueness of the message id. 191 """ 192 timeval = time.time() 193 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) 194 pid = os.getpid() 195 randint = random.randrange(100000) 196 if idstring is None: 197 idstring = '' 198 else: 199 idstring = '.' + idstring 200 idhost = socket.getfqdn() 201 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) 202 return msgid 203 204 205 207 # These functions are in the standalone mimelib version only because they've 208 # subsequently been fixed in the latest Python versions. We use this to worm 209 # around broken older Pythons. 210 def parsedate(data): 211 if not data: 212 return None 213 return _parsedate(data) 214 215 216 def parsedate_tz(data): 217 if not data: 218 return None 219 return _parsedate_tz(data) 220 221 222 def parseaddr(addr): 223 addrs = _AddressList(addr).addresslist 224 if not addrs: 225 return '', '' 226 return addrs[0] 227 228 229 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. 230 def unquote(str): 231 """Remove quotes from a string.""" 232 if len(str) > 1: 233 if str.startswith('"') and str.endswith('"'): 234 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') 235 if str.startswith('<') and str.endswith('>'): 236 return str[1:-1] 237 return str 238 239 240 242 # RFC2231-related functions - parameter encoding and decoding 243 def decode_rfc2231(s): 244 """Decode string according to RFC 2231""" 245 parts = s.split(TICK, 2) 246 if len(parts) <= 2: 247 return None, None, s 248 return parts 249 250 251 def encode_rfc2231(s, charset=None, language=None): 252 """Encode string according to RFC 2231. 253 254 If neither charset nor language is given, then s is returned as-is. If 255 charset is given but not language, the string is encoded using the empty 256 string for language. 257 """ 258 import urllib 259 s = urllib.quote(s, safe='') 260 if charset is None and language is None: 261 return s 262 if language is None: 263 language = '' 264 return "%s'%s'%s" % (charset, language, s) 265 266 267 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') 268 269 def decode_params(params): 270 """Decode parameters list according to RFC 2231. 271 272 params is a sequence of 2-tuples containing (param name, string value). 273 """ 274 # Copy params so we don't mess with the original 275 params = params[:] 276 new_params = [] 277 # Map parameter's name to a list of continuations. The values are a 278 # 3-tuple of the continuation number, the string value, and a flag 279 # specifying whether a particular segment is %-encoded. 280 rfc2231_params = {} 281 name, value = params.pop(0) 282 new_params.append((name, value)) 283 while params: 284 name, value = params.pop(0) 285 if name.endswith('*'): 286 encoded = True 287 else: 288 encoded = False 289 value = unquote(value) 290 mo = rfc2231_continuation.match(name) 291 if mo: 292 name, num = mo.group('name', 'num') 293 if num is not None: 294 num = int(num) 295 rfc2231_params.setdefault(name, []).append((num, value, encoded)) 296 else: 297 new_params.append((name, '"%s"' % quote(value))) 298 if rfc2231_params: 299 for name, continuations in rfc2231_params.items(): 300 value = [] 301 extended = False 302 # Sort by number 303 continuations.sort() 304 # And now append all values in numerical order, converting 305 # %-encodings for the encoded segments. If any of the 306 # continuation names ends in a *, then the entire string, after 307 # decoding segments and concatenating, must have the charset and 308 # language specifiers at the beginning of the string. 309 for num, s, encoded in continuations: 310 if encoded: 311 s = urllib.unquote(s) 312 extended = True 313 value.append(s) 314 value = quote(EMPTYSTRING.join(value)) 315 if extended: 316 charset, language, value = decode_rfc2231(value) 317 new_params.append((name, (charset, language, '"%s"' % value))) 318 else: 319 new_params.append((name, '"%s"' % value)) 320 return new_params 321 322 def collapse_rfc2231_value(value, errors='replace', 323 fallback_charset='us-ascii'): 324 if isinstance(value, tuple): 325 rawval = unquote(value[2]) 326 charset = value[0] or 'us-ascii' 327 try: 328 return unicode(rawval, charset, errors) 329 except LookupError: 330 # XXX charset is unknown to Python. 331 return unicode(rawval, fallback_charset, errors) 332 else: 333 return unquote(value) 334