1 # Copyright (C) 2001-2010 Python Software Foundation 2 # Contact: email-sig (at] python.org 3 4 """Classes to generate plain text from a message object tree.""" 5 6 __all__ = ['Generator', 'DecodedGenerator'] 7 8 import re 9 import sys 10 import time 11 import random 12 import warnings 13 14 from cStringIO import StringIO 15 from email.header import Header 16 17 UNDERSCORE = '_' 18 NL = '\n' 19 20 fcre = re.compile(r'^From ', re.MULTILINE) 21 22 def _is8bitstring(s): 23 if isinstance(s, str): 24 try: 25 unicode(s, 'us-ascii') 26 except UnicodeError: 27 return True 28 return False 29 30 31 33 class Generator: 34 """Generates output from a Message object tree. 35 36 This basic generator writes the message to the given file object as plain 37 text. 38 """ 39 # 40 # Public interface 41 # 42 43 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): 44 """Create the generator for message flattening. 45 46 outfp is the output file-like object for writing the message to. It 47 must have a write() method. 48 49 Optional mangle_from_ is a flag that, when True (the default), escapes 50 From_ lines in the body of the message by putting a `>' in front of 51 them. 52 53 Optional maxheaderlen specifies the longest length for a non-continued 54 header. When a header line is longer (in characters, with tabs 55 expanded to 8 spaces) than maxheaderlen, the header will split as 56 defined in the Header class. Set maxheaderlen to zero to disable 57 header wrapping. The default is 78, as recommended (but not required) 58 by RFC 2822. 59 """ 60 self._fp = outfp 61 self._mangle_from_ = mangle_from_ 62 self._maxheaderlen = maxheaderlen 63 64 def write(self, s): 65 # Just delegate to the file object 66 self._fp.write(s) 67 68 def flatten(self, msg, unixfrom=False): 69 """Print the message object tree rooted at msg to the output file 70 specified when the Generator instance was created. 71 72 unixfrom is a flag that forces the printing of a Unix From_ delimiter 73 before the first object in the message tree. If the original message 74 has no From_ delimiter, a `standard' one is crafted. By default, this 75 is False to inhibit the printing of any From_ delimiter. 76 77 Note that for subobjects, no From_ line is printed. 78 """ 79 if unixfrom: 80 ufrom = msg.get_unixfrom() 81 if not ufrom: 82 ufrom = 'From nobody ' + time.ctime(time.time()) 83 print >> self._fp, ufrom 84 self._write(msg) 85 86 def clone(self, fp): 87 """Clone this generator with the exact same options.""" 88 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) 89 90 # 91 # Protected interface - undocumented ;/ 92 # 93 94 def _write(self, msg): 95 # We can't write the headers yet because of the following scenario: 96 # say a multipart message includes the boundary string somewhere in 97 # its body. We'd have to calculate the new boundary /before/ we write 98 # the headers so that we can write the correct Content-Type: 99 # parameter. 100 # 101 # The way we do this, so as to make the _handle_*() methods simpler, 102 # is to cache any subpart writes into a StringIO. The we write the 103 # headers and the StringIO contents. That way, subpart handlers can 104 # Do The Right Thing, and can still modify the Content-Type: header if 105 # necessary. 106 oldfp = self._fp 107 try: 108 self._fp = sfp = StringIO() 109 self._dispatch(msg) 110 finally: 111 self._fp = oldfp 112 # Write the headers. First we see if the message object wants to 113 # handle that itself. If not, we'll do it generically. 114 meth = getattr(msg, '_write_headers', None) 115 if meth is None: 116 self._write_headers(msg) 117 else: 118 meth(self) 119 self._fp.write(sfp.getvalue()) 120 121 def _dispatch(self, msg): 122 # Get the Content-Type: for the message, then try to dispatch to 123 # self._handle_<maintype>_<subtype>(). If there's no handler for the 124 # full MIME type, then dispatch to self._handle_<maintype>(). If 125 # that's missing too, then dispatch to self._writeBody(). 126 main = msg.get_content_maintype() 127 sub = msg.get_content_subtype() 128 specific = UNDERSCORE.join((main, sub)).replace('-', '_') 129 meth = getattr(self, '_handle_' + specific, None) 130 if meth is None: 131 generic = main.replace('-', '_') 132 meth = getattr(self, '_handle_' + generic, None) 133 if meth is None: 134 meth = self._writeBody 135 meth(msg) 136 137 # 138 # Default handlers 139 # 140 141 def _write_headers(self, msg): 142 for h, v in msg.items(): 143 print >> self._fp, '%s:' % h, 144 if self._maxheaderlen == 0: 145 # Explicit no-wrapping 146 print >> self._fp, v 147 elif isinstance(v, Header): 148 # Header instances know what to do 149 print >> self._fp, v.encode() 150 elif _is8bitstring(v): 151 # If we have raw 8bit data in a byte string, we have no idea 152 # what the encoding is. There is no safe way to split this 153 # string. If it's ascii-subset, then we could do a normal 154 # ascii split, but if it's multibyte then we could break the 155 # string. There's no way to know so the least harm seems to 156 # be to not split the string and risk it being too long. 157 print >> self._fp, v 158 else: 159 # Header's got lots of smarts, so use it. Note that this is 160 # fundamentally broken though because we lose idempotency when 161 # the header string is continued with tabs. It will now be 162 # continued with spaces. This was reversedly broken before we 163 # fixed bug 1974. Either way, we lose. 164 print >> self._fp, Header( 165 v, maxlinelen=self._maxheaderlen, header_name=h).encode() 166 # A blank line always separates headers from body 167 print >> self._fp 168 169 # 170 # Handlers for writing types and subtypes 171 # 172 173 def _handle_text(self, msg): 174 payload = msg.get_payload() 175 if payload is None: 176 return 177 if not isinstance(payload, basestring): 178 raise TypeError('string payload expected: %s' % type(payload)) 179 if self._mangle_from_: 180 payload = fcre.sub('>From ', payload) 181 self._fp.write(payload) 182 183 # Default body handler 184 _writeBody = _handle_text 185 186 def _handle_multipart(self, msg): 187 # The trick here is to write out each part separately, merge them all 188 # together, and then make sure that the boundary we've chosen isn't 189 # present in the payload. 190 msgtexts = [] 191 subparts = msg.get_payload() 192 if subparts is None: 193 subparts = [] 194 elif isinstance(subparts, basestring): 195 # e.g. a non-strict parse of a message with no starting boundary. 196 self._fp.write(subparts) 197 return 198 elif not isinstance(subparts, list): 199 # Scalar payload 200 subparts = [subparts] 201 for part in subparts: 202 s = StringIO() 203 g = self.clone(s) 204 g.flatten(part, unixfrom=False) 205 msgtexts.append(s.getvalue()) 206 # BAW: What about boundaries that are wrapped in double-quotes? 207 boundary = msg.get_boundary() 208 if not boundary: 209 # Create a boundary that doesn't appear in any of the 210 # message texts. 211 alltext = NL.join(msgtexts) 212 boundary = _make_boundary(alltext) 213 msg.set_boundary(boundary) 214 # If there's a preamble, write it out, with a trailing CRLF 215 if msg.preamble is not None: 216 if self._mangle_from_: 217 preamble = fcre.sub('>From ', msg.preamble) 218 else: 219 preamble = msg.preamble 220 print >> self._fp, preamble 221 # dash-boundary transport-padding CRLF 222 print >> self._fp, '--' + boundary 223 # body-part 224 if msgtexts: 225 self._fp.write(msgtexts.pop(0)) 226 # *encapsulation 227 # --> delimiter transport-padding 228 # --> CRLF body-part 229 for body_part in msgtexts: 230 # delimiter transport-padding CRLF 231 print >> self._fp, '\n--' + boundary 232 # body-part 233 self._fp.write(body_part) 234 # close-delimiter transport-padding 235 self._fp.write('\n--' + boundary + '--') 236 if msg.epilogue is not None: 237 print >> self._fp 238 if self._mangle_from_: 239 epilogue = fcre.sub('>From ', msg.epilogue) 240 else: 241 epilogue = msg.epilogue 242 self._fp.write(epilogue) 243 244 def _handle_multipart_signed(self, msg): 245 # The contents of signed parts has to stay unmodified in order to keep 246 # the signature intact per RFC1847 2.1, so we disable header wrapping. 247 # RDM: This isn't enough to completely preserve the part, but it helps. 248 old_maxheaderlen = self._maxheaderlen 249 try: 250 self._maxheaderlen = 0 251 self._handle_multipart(msg) 252 finally: 253 self._maxheaderlen = old_maxheaderlen 254 255 def _handle_message_delivery_status(self, msg): 256 # We can't just write the headers directly to self's file object 257 # because this will leave an extra newline between the last header 258 # block and the boundary. Sigh. 259 blocks = [] 260 for part in msg.get_payload(): 261 s = StringIO() 262 g = self.clone(s) 263 g.flatten(part, unixfrom=False) 264 text = s.getvalue() 265 lines = text.split('\n') 266 # Strip off the unnecessary trailing empty line 267 if lines and lines[-1] == '': 268 blocks.append(NL.join(lines[:-1])) 269 else: 270 blocks.append(text) 271 # Now join all the blocks with an empty line. This has the lovely 272 # effect of separating each block with an empty line, but not adding 273 # an extra one after the last one. 274 self._fp.write(NL.join(blocks)) 275 276 def _handle_message(self, msg): 277 s = StringIO() 278 g = self.clone(s) 279 # The payload of a message/rfc822 part should be a multipart sequence 280 # of length 1. The zeroth element of the list should be the Message 281 # object for the subpart. Extract that object, stringify it, and 282 # write it out. 283 # Except, it turns out, when it's a string instead, which happens when 284 # and only when HeaderParser is used on a message of mime type 285 # message/rfc822. Such messages are generated by, for example, 286 # Groupwise when forwarding unadorned messages. (Issue 7970.) So 287 # in that case we just emit the string body. 288 payload = msg.get_payload() 289 if isinstance(payload, list): 290 g.flatten(msg.get_payload(0), unixfrom=False) 291 payload = s.getvalue() 292 self._fp.write(payload) 293 294 295 297 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' 298 299 class DecodedGenerator(Generator): 300 """Generates a text representation of a message. 301 302 Like the Generator base class, except that non-text parts are substituted 303 with a format string representing the part. 304 """ 305 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): 306 """Like Generator.__init__() except that an additional optional 307 argument is allowed. 308 309 Walks through all subparts of a message. If the subpart is of main 310 type `text', then it prints the decoded payload of the subpart. 311 312 Otherwise, fmt is a format string that is used instead of the message 313 payload. fmt is expanded with the following keywords (in 314 %(keyword)s format): 315 316 type : Full MIME type of the non-text part 317 maintype : Main MIME type of the non-text part 318 subtype : Sub-MIME type of the non-text part 319 filename : Filename of the non-text part 320 description: Description associated with the non-text part 321 encoding : Content transfer encoding of the non-text part 322 323 The default value for fmt is None, meaning 324 325 [Non-text (%(type)s) part of message omitted, filename %(filename)s] 326 """ 327 Generator.__init__(self, outfp, mangle_from_, maxheaderlen) 328 if fmt is None: 329 self._fmt = _FMT 330 else: 331 self._fmt = fmt 332 333 def _dispatch(self, msg): 334 for part in msg.walk(): 335 maintype = part.get_content_maintype() 336 if maintype == 'text': 337 print >> self, part.get_payload(decode=True) 338 elif maintype == 'multipart': 339 # Just skip this 340 pass 341 else: 342 print >> self, self._fmt % { 343 'type' : part.get_content_type(), 344 'maintype' : part.get_content_maintype(), 345 'subtype' : part.get_content_subtype(), 346 'filename' : part.get_filename('[no filename]'), 347 'description': part.get('Content-Description', 348 '[no description]'), 349 'encoding' : part.get('Content-Transfer-Encoding', 350 '[no encoding]'), 351 } 352 353 354 356 # Helper 357 _width = len(repr(sys.maxint-1)) 358 _fmt = '%%0%dd' % _width 359 360 def _make_boundary(text=None): 361 # Craft a random boundary. If text is given, ensure that the chosen 362 # boundary doesn't appear in the text. 363 token = random.randrange(sys.maxint) 364 boundary = ('=' * 15) + (_fmt % token) + '==' 365 if text is None: 366 return boundary 367 b = boundary 368 counter = 0 369 while True: 370 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) 371 if not cre.search(text): 372 break 373 b = boundary + '.' + str(counter) 374 counter += 1 375 return b 376