1 # Copyright 2011, Google Inc. 2 # All rights reserved. 3 # 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following disclaimer 12 # in the documentation and/or other materials provided with the 13 # distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived from 16 # this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """WebSocket utilities. 32 """ 33 34 35 import array 36 import errno 37 38 # Import hash classes from a module available and recommended for each Python 39 # version and re-export those symbol. Use sha and md5 module in Python 2.4, and 40 # hashlib module in Python 2.6. 41 try: 42 import hashlib 43 md5_hash = hashlib.md5 44 sha1_hash = hashlib.sha1 45 except ImportError: 46 import md5 47 import sha 48 md5_hash = md5.md5 49 sha1_hash = sha.sha 50 51 import StringIO 52 import logging 53 import os 54 import re 55 import socket 56 import traceback 57 import zlib 58 59 60 def get_stack_trace(): 61 """Get the current stack trace as string. 62 63 This is needed to support Python 2.3. 64 TODO: Remove this when we only support Python 2.4 and above. 65 Use traceback.format_exc instead. 66 """ 67 68 out = StringIO.StringIO() 69 traceback.print_exc(file=out) 70 return out.getvalue() 71 72 73 def prepend_message_to_exception(message, exc): 74 """Prepend message to the exception.""" 75 76 exc.args = (message + str(exc),) 77 return 78 79 80 def __translate_interp(interp, cygwin_path): 81 """Translate interp program path for Win32 python to run cygwin program 82 (e.g. perl). Note that it doesn't support path that contains space, 83 which is typically true for Unix, where #!-script is written. 84 For Win32 python, cygwin_path is a directory of cygwin binaries. 85 86 Args: 87 interp: interp command line 88 cygwin_path: directory name of cygwin binary, or None 89 Returns: 90 translated interp command line. 91 """ 92 if not cygwin_path: 93 return interp 94 m = re.match('^[^ ]*/([^ ]+)( .*)?', interp) 95 if m: 96 cmd = os.path.join(cygwin_path, m.group(1)) 97 return cmd + m.group(2) 98 return interp 99 100 101 def get_script_interp(script_path, cygwin_path=None): 102 """Gets #!-interpreter command line from the script. 103 104 It also fixes command path. When Cygwin Python is used, e.g. in WebKit, 105 it could run "/usr/bin/perl -wT hello.pl". 106 When Win32 Python is used, e.g. in Chromium, it couldn't. So, fix 107 "/usr/bin/perl" to "<cygwin_path>\perl.exe". 108 109 Args: 110 script_path: pathname of the script 111 cygwin_path: directory name of cygwin binary, or None 112 Returns: 113 #!-interpreter command line, or None if it is not #!-script. 114 """ 115 fp = open(script_path) 116 line = fp.readline() 117 fp.close() 118 m = re.match('^#!(.*)', line) 119 if m: 120 return __translate_interp(m.group(1), cygwin_path) 121 return None 122 123 124 def wrap_popen3_for_win(cygwin_path): 125 """Wrap popen3 to support #!-script on Windows. 126 127 Args: 128 cygwin_path: path for cygwin binary if command path is needed to be 129 translated. None if no translation required. 130 """ 131 132 __orig_popen3 = os.popen3 133 134 def __wrap_popen3(cmd, mode='t', bufsize=-1): 135 cmdline = cmd.split(' ') 136 interp = get_script_interp(cmdline[0], cygwin_path) 137 if interp: 138 cmd = interp + ' ' + cmd 139 return __orig_popen3(cmd, mode, bufsize) 140 141 os.popen3 = __wrap_popen3 142 143 144 def hexify(s): 145 return ' '.join(map(lambda x: '%02x' % ord(x), s)) 146 147 148 def get_class_logger(o): 149 return logging.getLogger( 150 '%s.%s' % (o.__class__.__module__, o.__class__.__name__)) 151 152 153 class NoopMasker(object): 154 """A masking object that has the same interface as RepeatedXorMasker but 155 just returns the string passed in without making any change. 156 """ 157 158 def __init__(self): 159 pass 160 161 def mask(self, s): 162 return s 163 164 165 class RepeatedXorMasker(object): 166 """A masking object that applies XOR on the string given to mask method 167 with the masking bytes given to the constructor repeatedly. This object 168 remembers the position in the masking bytes the last mask method call 169 ended and resumes from that point on the next mask method call. 170 """ 171 172 def __init__(self, mask): 173 self._mask = map(ord, mask) 174 self._mask_size = len(self._mask) 175 self._count = 0 176 177 def mask(self, s): 178 result = array.array('B') 179 result.fromstring(s) 180 # Use temporary local variables to eliminate the cost to access 181 # attributes 182 count = self._count 183 mask = self._mask 184 mask_size = self._mask_size 185 for i in xrange(len(result)): 186 result[i] ^= mask[count] 187 count = (count + 1) % mask_size 188 self._count = count 189 190 return result.tostring() 191 192 193 class DeflateRequest(object): 194 """A wrapper class for request object to intercept send and recv to perform 195 deflate compression and decompression transparently. 196 """ 197 198 def __init__(self, request): 199 self._request = request 200 self.connection = DeflateConnection(request.connection) 201 202 def __getattribute__(self, name): 203 if name in ('_request', 'connection'): 204 return object.__getattribute__(self, name) 205 return self._request.__getattribute__(name) 206 207 def __setattr__(self, name, value): 208 if name in ('_request', 'connection'): 209 return object.__setattr__(self, name, value) 210 return self._request.__setattr__(name, value) 211 212 213 # By making wbits option negative, we can suppress CMF/FLG (2 octet) and 214 # ADLER32 (4 octet) fields of zlib so that we can use zlib module just as 215 # deflate library. DICTID won't be added as far as we don't set dictionary. 216 # LZ77 window of 32K will be used for both compression and decompression. 217 # For decompression, we can just use 32K to cover any windows size. For 218 # compression, we use 32K so receivers must use 32K. 219 # 220 # Compression level is Z_DEFAULT_COMPRESSION. We don't have to match level 221 # to decode. 222 # 223 # See zconf.h, deflate.cc, inflate.cc of zlib library, and zlibmodule.c of 224 # Python. See also RFC1950 (ZLIB 3.3). 225 226 227 class _Deflater(object): 228 229 def __init__(self, window_bits): 230 self._logger = get_class_logger(self) 231 232 self._compress = zlib.compressobj( 233 zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -window_bits) 234 235 def compress_and_flush(self, bytes): 236 compressed_bytes = self._compress.compress(bytes) 237 compressed_bytes += self._compress.flush(zlib.Z_SYNC_FLUSH) 238 self._logger.debug('Compress input %r', bytes) 239 self._logger.debug('Compress result %r', compressed_bytes) 240 return compressed_bytes 241 242 243 class _Inflater(object): 244 245 def __init__(self): 246 self._logger = get_class_logger(self) 247 248 self._unconsumed = '' 249 250 self.reset() 251 252 def decompress(self, size): 253 if not (size == -1 or size > 0): 254 raise Exception('size must be -1 or positive') 255 256 data = '' 257 258 while True: 259 if size == -1: 260 data += self._decompress.decompress(self._unconsumed) 261 # See Python bug http://bugs.python.org/issue12050 to 262 # understand why the same code cannot be used for updating 263 # self._unconsumed for here and else block. 264 self._unconsumed = '' 265 else: 266 data += self._decompress.decompress( 267 self._unconsumed, size - len(data)) 268 self._unconsumed = self._decompress.unconsumed_tail 269 if self._decompress.unused_data: 270 # Encountered a last block (i.e. a block with BFINAL = 1) and 271 # found a new stream (unused_data). We cannot use the same 272 # zlib.Decompress object for the new stream. Create a new 273 # Decompress object to decompress the new one. 274 # 275 # It's fine to ignore unconsumed_tail if unused_data is not 276 # empty. 277 self._unconsumed = self._decompress.unused_data 278 self.reset() 279 if size >= 0 and len(data) == size: 280 # data is filled. Don't call decompress again. 281 break 282 else: 283 # Re-invoke Decompress.decompress to try to decompress all 284 # available bytes before invoking read which blocks until 285 # any new byte is available. 286 continue 287 else: 288 # Here, since unused_data is empty, even if unconsumed_tail is 289 # not empty, bytes of requested length are already in data. We 290 # don't have to "continue" here. 291 break 292 293 if data: 294 self._logger.debug('Decompressed %r', data) 295 return data 296 297 def append(self, data): 298 self._logger.debug('Appended %r', data) 299 self._unconsumed += data 300 301 def reset(self): 302 self._logger.debug('Reset') 303 self._decompress = zlib.decompressobj(-zlib.MAX_WBITS) 304 305 306 # Compresses/decompresses given octets using the method introduced in RFC1979. 307 308 309 class _RFC1979Deflater(object): 310 """A compressor class that applies DEFLATE to given byte sequence and 311 flushes using the algorithm described in the RFC1979 section 2.1. 312 """ 313 314 def __init__(self, window_bits, no_context_takeover): 315 self._deflater = None 316 if window_bits is None: 317 window_bits = zlib.MAX_WBITS 318 self._window_bits = window_bits 319 self._no_context_takeover = no_context_takeover 320 321 def filter(self, bytes): 322 if self._deflater is None or self._no_context_takeover: 323 self._deflater = _Deflater(self._window_bits) 324 325 # Strip last 4 octets which is LEN and NLEN field of a non-compressed 326 # block added for Z_SYNC_FLUSH. 327 return self._deflater.compress_and_flush(bytes)[:-4] 328 329 330 class _RFC1979Inflater(object): 331 """A decompressor class for byte sequence compressed and flushed following 332 the algorithm described in the RFC1979 section 2.1. 333 """ 334 335 def __init__(self): 336 self._inflater = _Inflater() 337 338 def filter(self, bytes): 339 # Restore stripped LEN and NLEN field of a non-compressed block added 340 # for Z_SYNC_FLUSH. 341 self._inflater.append(bytes + '\x00\x00\xff\xff') 342 return self._inflater.decompress(-1) 343 344 345 class DeflateSocket(object): 346 """A wrapper class for socket object to intercept send and recv to perform 347 deflate compression and decompression transparently. 348 """ 349 350 # Size of the buffer passed to recv to receive compressed data. 351 _RECV_SIZE = 4096 352 353 def __init__(self, socket): 354 self._socket = socket 355 356 self._logger = get_class_logger(self) 357 358 self._deflater = _Deflater(zlib.MAX_WBITS) 359 self._inflater = _Inflater() 360 361 def recv(self, size): 362 """Receives data from the socket specified on the construction up 363 to the specified size. Once any data is available, returns it even 364 if it's smaller than the specified size. 365 """ 366 367 # TODO(tyoshino): Allow call with size=0. It should block until any 368 # decompressed data is available. 369 if size <= 0: 370 raise Exception('Non-positive size passed') 371 while True: 372 data = self._inflater.decompress(size) 373 if len(data) != 0: 374 return data 375 376 read_data = self._socket.recv(DeflateSocket._RECV_SIZE) 377 if not read_data: 378 return '' 379 self._inflater.append(read_data) 380 381 def sendall(self, bytes): 382 self.send(bytes) 383 384 def send(self, bytes): 385 self._socket.sendall(self._deflater.compress_and_flush(bytes)) 386 return len(bytes) 387 388 389 class DeflateConnection(object): 390 """A wrapper class for request object to intercept write and read to 391 perform deflate compression and decompression transparently. 392 """ 393 394 def __init__(self, connection): 395 self._connection = connection 396 397 self._logger = get_class_logger(self) 398 399 self._deflater = _Deflater(zlib.MAX_WBITS) 400 self._inflater = _Inflater() 401 402 def get_remote_addr(self): 403 return self._connection.remote_addr 404 remote_addr = property(get_remote_addr) 405 406 def put_bytes(self, bytes): 407 self.write(bytes) 408 409 def read(self, size=-1): 410 """Reads at most size bytes. Blocks until there's at least one byte 411 available. 412 """ 413 414 # TODO(tyoshino): Allow call with size=0. 415 if not (size == -1 or size > 0): 416 raise Exception('size must be -1 or positive') 417 418 data = '' 419 while True: 420 if size == -1: 421 data += self._inflater.decompress(-1) 422 else: 423 data += self._inflater.decompress(size - len(data)) 424 425 if size >= 0 and len(data) != 0: 426 break 427 428 # TODO(tyoshino): Make this read efficient by some workaround. 429 # 430 # In 3.0.3 and prior of mod_python, read blocks until length bytes 431 # was read. We don't know the exact size to read while using 432 # deflate, so read byte-by-byte. 433 # 434 # _StandaloneRequest.read that ultimately performs 435 # socket._fileobject.read also blocks until length bytes was read 436 read_data = self._connection.read(1) 437 if not read_data: 438 break 439 self._inflater.append(read_data) 440 return data 441 442 def write(self, bytes): 443 self._connection.write(self._deflater.compress_and_flush(bytes)) 444 445 446 def _is_ewouldblock_errno(error_number): 447 """Returns True iff error_number indicates that receive operation would 448 block. To make this portable, we check availability of errno and then 449 compare them. 450 """ 451 452 for error_name in ['WSAEWOULDBLOCK', 'EWOULDBLOCK', 'EAGAIN']: 453 if (error_name in dir(errno) and 454 error_number == getattr(errno, error_name)): 455 return True 456 return False 457 458 459 def drain_received_data(raw_socket): 460 # Set the socket non-blocking. 461 original_timeout = raw_socket.gettimeout() 462 raw_socket.settimeout(0.0) 463 464 drained_data = [] 465 466 # Drain until the socket is closed or no data is immediately 467 # available for read. 468 while True: 469 try: 470 data = raw_socket.recv(1) 471 if not data: 472 break 473 drained_data.append(data) 474 except socket.error, e: 475 # e can be either a pair (errno, string) or just a string (or 476 # something else) telling what went wrong. We suppress only 477 # the errors that indicates that the socket blocks. Those 478 # exceptions can be parsed as a pair (errno, string). 479 try: 480 error_number, message = e 481 except: 482 # Failed to parse socket.error. 483 raise e 484 485 if _is_ewouldblock_errno(error_number): 486 break 487 else: 488 raise e 489 490 # Rollback timeout value. 491 raw_socket.settimeout(original_timeout) 492 493 return ''.join(drained_data) 494 495 496 # vi:sts=4 sw=4 et 497