1 r"""HTTP/1.1 client library 2 3 <intro stuff goes here> 4 <other stuff, too> 5 6 HTTPConnection goes through a number of "states", which define when a client 7 may legally make another request or fetch the response for a particular 8 request. This diagram details these state transitions: 9 10 (null) 11 | 12 | HTTPConnection() 13 v 14 Idle 15 | 16 | putrequest() 17 v 18 Request-started 19 | 20 | ( putheader() )* endheaders() 21 v 22 Request-sent 23 | 24 | response = getresponse() 25 v 26 Unread-response [Response-headers-read] 27 |\____________________ 28 | | 29 | response.read() | putrequest() 30 v v 31 Idle Req-started-unread-response 32 ______/| 33 / | 34 response.read() | | ( putheader() )* endheaders() 35 v v 36 Request-started Req-sent-unread-response 37 | 38 | response.read() 39 v 40 Request-sent 41 42 This diagram presents the following rules: 43 -- a second request may not be started until {response-headers-read} 44 -- a response [object] cannot be retrieved until {request-sent} 45 -- there is no differentiation between an unread response body and a 46 partially read response body 47 48 Note: this enforcement is applied by the HTTPConnection class. The 49 HTTPResponse class does not enforce this state machine, which 50 implies sophisticated clients may accelerate the request/response 51 pipeline. Caution should be taken, though: accelerating the states 52 beyond the above pattern may imply knowledge of the server's 53 connection-close behavior for certain requests. For example, it 54 is impossible to tell whether the server will close the connection 55 UNTIL the response headers have been read; this means that further 56 requests cannot be placed into the pipeline until it is known that 57 the server will NOT be closing the connection. 58 59 Logical State __state __response 60 ------------- ------- ---------- 61 Idle _CS_IDLE None 62 Request-started _CS_REQ_STARTED None 63 Request-sent _CS_REQ_SENT None 64 Unread-response _CS_IDLE <response_class> 65 Req-started-unread-response _CS_REQ_STARTED <response_class> 66 Req-sent-unread-response _CS_REQ_SENT <response_class> 67 """ 68 69 from array import array 70 import os 71 import socket 72 from sys import py3kwarning 73 from urlparse import urlsplit 74 import warnings 75 with warnings.catch_warnings(): 76 if py3kwarning: 77 warnings.filterwarnings("ignore", ".*mimetools has been removed", 78 DeprecationWarning) 79 import mimetools 80 81 try: 82 from cStringIO import StringIO 83 except ImportError: 84 from StringIO import StringIO 85 86 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", 87 "HTTPException", "NotConnected", "UnknownProtocol", 88 "UnknownTransferEncoding", "UnimplementedFileMode", 89 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 90 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 91 "BadStatusLine", "error", "responses"] 92 93 HTTP_PORT = 80 94 HTTPS_PORT = 443 95 96 _UNKNOWN = 'UNKNOWN' 97 98 # connection states 99 _CS_IDLE = 'Idle' 100 _CS_REQ_STARTED = 'Request-started' 101 _CS_REQ_SENT = 'Request-sent' 102 103 # status codes 104 # informational 105 CONTINUE = 100 106 SWITCHING_PROTOCOLS = 101 107 PROCESSING = 102 108 109 # successful 110 OK = 200 111 CREATED = 201 112 ACCEPTED = 202 113 NON_AUTHORITATIVE_INFORMATION = 203 114 NO_CONTENT = 204 115 RESET_CONTENT = 205 116 PARTIAL_CONTENT = 206 117 MULTI_STATUS = 207 118 IM_USED = 226 119 120 # redirection 121 MULTIPLE_CHOICES = 300 122 MOVED_PERMANENTLY = 301 123 FOUND = 302 124 SEE_OTHER = 303 125 NOT_MODIFIED = 304 126 USE_PROXY = 305 127 TEMPORARY_REDIRECT = 307 128 129 # client error 130 BAD_REQUEST = 400 131 UNAUTHORIZED = 401 132 PAYMENT_REQUIRED = 402 133 FORBIDDEN = 403 134 NOT_FOUND = 404 135 METHOD_NOT_ALLOWED = 405 136 NOT_ACCEPTABLE = 406 137 PROXY_AUTHENTICATION_REQUIRED = 407 138 REQUEST_TIMEOUT = 408 139 CONFLICT = 409 140 GONE = 410 141 LENGTH_REQUIRED = 411 142 PRECONDITION_FAILED = 412 143 REQUEST_ENTITY_TOO_LARGE = 413 144 REQUEST_URI_TOO_LONG = 414 145 UNSUPPORTED_MEDIA_TYPE = 415 146 REQUESTED_RANGE_NOT_SATISFIABLE = 416 147 EXPECTATION_FAILED = 417 148 UNPROCESSABLE_ENTITY = 422 149 LOCKED = 423 150 FAILED_DEPENDENCY = 424 151 UPGRADE_REQUIRED = 426 152 153 # server error 154 INTERNAL_SERVER_ERROR = 500 155 NOT_IMPLEMENTED = 501 156 BAD_GATEWAY = 502 157 SERVICE_UNAVAILABLE = 503 158 GATEWAY_TIMEOUT = 504 159 HTTP_VERSION_NOT_SUPPORTED = 505 160 INSUFFICIENT_STORAGE = 507 161 NOT_EXTENDED = 510 162 163 # Mapping status codes to official W3C names 164 responses = { 165 100: 'Continue', 166 101: 'Switching Protocols', 167 168 200: 'OK', 169 201: 'Created', 170 202: 'Accepted', 171 203: 'Non-Authoritative Information', 172 204: 'No Content', 173 205: 'Reset Content', 174 206: 'Partial Content', 175 176 300: 'Multiple Choices', 177 301: 'Moved Permanently', 178 302: 'Found', 179 303: 'See Other', 180 304: 'Not Modified', 181 305: 'Use Proxy', 182 306: '(Unused)', 183 307: 'Temporary Redirect', 184 185 400: 'Bad Request', 186 401: 'Unauthorized', 187 402: 'Payment Required', 188 403: 'Forbidden', 189 404: 'Not Found', 190 405: 'Method Not Allowed', 191 406: 'Not Acceptable', 192 407: 'Proxy Authentication Required', 193 408: 'Request Timeout', 194 409: 'Conflict', 195 410: 'Gone', 196 411: 'Length Required', 197 412: 'Precondition Failed', 198 413: 'Request Entity Too Large', 199 414: 'Request-URI Too Long', 200 415: 'Unsupported Media Type', 201 416: 'Requested Range Not Satisfiable', 202 417: 'Expectation Failed', 203 204 500: 'Internal Server Error', 205 501: 'Not Implemented', 206 502: 'Bad Gateway', 207 503: 'Service Unavailable', 208 504: 'Gateway Timeout', 209 505: 'HTTP Version Not Supported', 210 } 211 212 # maximal amount of data to read at one time in _safe_read 213 MAXAMOUNT = 1048576 214 215 # maximal line length when calling readline(). 216 _MAXLINE = 65536 217 218 class HTTPMessage(mimetools.Message): 219 220 def addheader(self, key, value): 221 """Add header for field key handling repeats.""" 222 prev = self.dict.get(key) 223 if prev is None: 224 self.dict[key] = value 225 else: 226 combined = ", ".join((prev, value)) 227 self.dict[key] = combined 228 229 def addcontinue(self, key, more): 230 """Add more field data from a continuation line.""" 231 prev = self.dict[key] 232 self.dict[key] = prev + "\n " + more 233 234 def readheaders(self): 235 """Read header lines. 236 237 Read header lines up to the entirely blank line that terminates them. 238 The (normally blank) line that ends the headers is skipped, but not 239 included in the returned list. If a non-header line ends the headers, 240 (which is an error), an attempt is made to backspace over it; it is 241 never included in the returned list. 242 243 The variable self.status is set to the empty string if all went well, 244 otherwise it is an error message. The variable self.headers is a 245 completely uninterpreted list of lines contained in the header (so 246 printing them will reproduce the header exactly as it appears in the 247 file). 248 249 If multiple header fields with the same name occur, they are combined 250 according to the rules in RFC 2616 sec 4.2: 251 252 Appending each subsequent field-value to the first, each separated 253 by a comma. The order in which header fields with the same field-name 254 are received is significant to the interpretation of the combined 255 field value. 256 """ 257 # XXX The implementation overrides the readheaders() method of 258 # rfc822.Message. The base class design isn't amenable to 259 # customized behavior here so the method here is a copy of the 260 # base class code with a few small changes. 261 262 self.dict = {} 263 self.unixfrom = '' 264 self.headers = hlist = [] 265 self.status = '' 266 headerseen = "" 267 firstline = 1 268 startofline = unread = tell = None 269 if hasattr(self.fp, 'unread'): 270 unread = self.fp.unread 271 elif self.seekable: 272 tell = self.fp.tell 273 while True: 274 if tell: 275 try: 276 startofline = tell() 277 except IOError: 278 startofline = tell = None 279 self.seekable = 0 280 line = self.fp.readline(_MAXLINE + 1) 281 if len(line) > _MAXLINE: 282 raise LineTooLong("header line") 283 if not line: 284 self.status = 'EOF in headers' 285 break 286 # Skip unix From name time lines 287 if firstline and line.startswith('From '): 288 self.unixfrom = self.unixfrom + line 289 continue 290 firstline = 0 291 if headerseen and line[0] in ' \t': 292 # XXX Not sure if continuation lines are handled properly 293 # for http and/or for repeating headers 294 # It's a continuation line. 295 hlist.append(line) 296 self.addcontinue(headerseen, line.strip()) 297 continue 298 elif self.iscomment(line): 299 # It's a comment. Ignore it. 300 continue 301 elif self.islast(line): 302 # Note! No pushback here! The delimiter line gets eaten. 303 break 304 headerseen = self.isheader(line) 305 if headerseen: 306 # It's a legal header line, save it. 307 hlist.append(line) 308 self.addheader(headerseen, line[len(headerseen)+1:].strip()) 309 continue 310 else: 311 # It's not a header line; throw it back and stop here. 312 if not self.dict: 313 self.status = 'No headers' 314 else: 315 self.status = 'Non-header line where header expected' 316 # Try to undo the read. 317 if unread: 318 unread(line) 319 elif tell: 320 self.fp.seek(startofline) 321 else: 322 self.status = self.status + '; bad seek' 323 break 324 325 class HTTPResponse: 326 327 # strict: If true, raise BadStatusLine if the status line can't be 328 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is 329 # false because it prevents clients from talking to HTTP/0.9 330 # servers. Note that a response with a sufficiently corrupted 331 # status line will look like an HTTP/0.9 response. 332 333 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 334 335 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False): 336 if buffering: 337 # The caller won't be using any sock.recv() calls, so buffering 338 # is fine and recommended for performance. 339 self.fp = sock.makefile('rb') 340 else: 341 # The buffer size is specified as zero, because the headers of 342 # the response are read with readline(). If the reads were 343 # buffered the readline() calls could consume some of the 344 # response, which make be read via a recv() on the underlying 345 # socket. 346 self.fp = sock.makefile('rb', 0) 347 self.debuglevel = debuglevel 348 self.strict = strict 349 self._method = method 350 351 self.msg = None 352 353 # from the Status-Line of the response 354 self.version = _UNKNOWN # HTTP-Version 355 self.status = _UNKNOWN # Status-Code 356 self.reason = _UNKNOWN # Reason-Phrase 357 358 self.chunked = _UNKNOWN # is "chunked" being used? 359 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 360 self.length = _UNKNOWN # number of bytes left in response 361 self.will_close = _UNKNOWN # conn will close at end of response 362 363 def _read_status(self): 364 # Initialize with Simple-Response defaults 365 line = self.fp.readline(_MAXLINE + 1) 366 if len(line) > _MAXLINE: 367 raise LineTooLong("header line") 368 if self.debuglevel > 0: 369 print "reply:", repr(line) 370 if not line: 371 # Presumably, the server closed the connection before 372 # sending a valid response. 373 raise BadStatusLine(line) 374 try: 375 [version, status, reason] = line.split(None, 2) 376 except ValueError: 377 try: 378 [version, status] = line.split(None, 1) 379 reason = "" 380 except ValueError: 381 # empty version will cause next test to fail and status 382 # will be treated as 0.9 response. 383 version = "" 384 if not version.startswith('HTTP/'): 385 if self.strict: 386 self.close() 387 raise BadStatusLine(line) 388 else: 389 # assume it's a Simple-Response from an 0.9 server 390 self.fp = LineAndFileWrapper(line, self.fp) 391 return "HTTP/0.9", 200, "" 392 393 # The status code is a three-digit number 394 try: 395 status = int(status) 396 if status < 100 or status > 999: 397 raise BadStatusLine(line) 398 except ValueError: 399 raise BadStatusLine(line) 400 return version, status, reason 401 402 def begin(self): 403 if self.msg is not None: 404 # we've already started reading the response 405 return 406 407 # read until we get a non-100 response 408 while True: 409 version, status, reason = self._read_status() 410 if status != CONTINUE: 411 break 412 # skip the header from the 100 response 413 while True: 414 skip = self.fp.readline(_MAXLINE + 1) 415 if len(skip) > _MAXLINE: 416 raise LineTooLong("header line") 417 skip = skip.strip() 418 if not skip: 419 break 420 if self.debuglevel > 0: 421 print "header:", skip 422 423 self.status = status 424 self.reason = reason.strip() 425 if version == 'HTTP/1.0': 426 self.version = 10 427 elif version.startswith('HTTP/1.'): 428 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 429 elif version == 'HTTP/0.9': 430 self.version = 9 431 else: 432 raise UnknownProtocol(version) 433 434 if self.version == 9: 435 self.length = None 436 self.chunked = 0 437 self.will_close = 1 438 self.msg = HTTPMessage(StringIO()) 439 return 440 441 self.msg = HTTPMessage(self.fp, 0) 442 if self.debuglevel > 0: 443 for hdr in self.msg.headers: 444 print "header:", hdr, 445 446 # don't let the msg keep an fp 447 self.msg.fp = None 448 449 # are we using the chunked-style of transfer encoding? 450 tr_enc = self.msg.getheader('transfer-encoding') 451 if tr_enc and tr_enc.lower() == "chunked": 452 self.chunked = 1 453 self.chunk_left = None 454 else: 455 self.chunked = 0 456 457 # will the connection close at the end of the response? 458 self.will_close = self._check_close() 459 460 # do we have a Content-Length? 461 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 462 length = self.msg.getheader('content-length') 463 if length and not self.chunked: 464 try: 465 self.length = int(length) 466 except ValueError: 467 self.length = None 468 else: 469 if self.length < 0: # ignore nonsensical negative lengths 470 self.length = None 471 else: 472 self.length = None 473 474 # does the body have a fixed length? (of zero) 475 if (status == NO_CONTENT or status == NOT_MODIFIED or 476 100 <= status < 200 or # 1xx codes 477 self._method == 'HEAD'): 478 self.length = 0 479 480 # if the connection remains open, and we aren't using chunked, and 481 # a content-length was not provided, then assume that the connection 482 # WILL close. 483 if not self.will_close and \ 484 not self.chunked and \ 485 self.length is None: 486 self.will_close = 1 487 488 def _check_close(self): 489 conn = self.msg.getheader('connection') 490 if self.version == 11: 491 # An HTTP/1.1 proxy is assumed to stay open unless 492 # explicitly closed. 493 conn = self.msg.getheader('connection') 494 if conn and "close" in conn.lower(): 495 return True 496 return False 497 498 # Some HTTP/1.0 implementations have support for persistent 499 # connections, using rules different than HTTP/1.1. 500 501 # For older HTTP, Keep-Alive indicates persistent connection. 502 if self.msg.getheader('keep-alive'): 503 return False 504 505 # At least Akamai returns a "Connection: Keep-Alive" header, 506 # which was supposed to be sent by the client. 507 if conn and "keep-alive" in conn.lower(): 508 return False 509 510 # Proxy-Connection is a netscape hack. 511 pconn = self.msg.getheader('proxy-connection') 512 if pconn and "keep-alive" in pconn.lower(): 513 return False 514 515 # otherwise, assume it will close 516 return True 517 518 def close(self): 519 if self.fp: 520 self.fp.close() 521 self.fp = None 522 523 def isclosed(self): 524 # NOTE: it is possible that we will not ever call self.close(). This 525 # case occurs when will_close is TRUE, length is None, and we 526 # read up to the last byte, but NOT past it. 527 # 528 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 529 # called, meaning self.isclosed() is meaningful. 530 return self.fp is None 531 532 # XXX It would be nice to have readline and __iter__ for this, too. 533 534 def read(self, amt=None): 535 if self.fp is None: 536 return '' 537 538 if self._method == 'HEAD': 539 self.close() 540 return '' 541 542 if self.chunked: 543 return self._read_chunked(amt) 544 545 if amt is None: 546 # unbounded read 547 if self.length is None: 548 s = self.fp.read() 549 else: 550 try: 551 s = self._safe_read(self.length) 552 except IncompleteRead: 553 self.close() 554 raise 555 self.length = 0 556 self.close() # we read everything 557 return s 558 559 if self.length is not None: 560 if amt > self.length: 561 # clip the read to the "end of response" 562 amt = self.length 563 564 # we do not use _safe_read() here because this may be a .will_close 565 # connection, and the user is reading more bytes than will be provided 566 # (for example, reading in 1k chunks) 567 s = self.fp.read(amt) 568 if not s: 569 # Ideally, we would raise IncompleteRead if the content-length 570 # wasn't satisfied, but it might break compatibility. 571 self.close() 572 if self.length is not None: 573 self.length -= len(s) 574 if not self.length: 575 self.close() 576 577 return s 578 579 def _read_chunked(self, amt): 580 assert self.chunked != _UNKNOWN 581 chunk_left = self.chunk_left 582 value = [] 583 while True: 584 if chunk_left is None: 585 line = self.fp.readline(_MAXLINE + 1) 586 if len(line) > _MAXLINE: 587 raise LineTooLong("chunk size") 588 i = line.find(';') 589 if i >= 0: 590 line = line[:i] # strip chunk-extensions 591 try: 592 chunk_left = int(line, 16) 593 except ValueError: 594 # close the connection as protocol synchronisation is 595 # probably lost 596 self.close() 597 raise IncompleteRead(''.join(value)) 598 if chunk_left == 0: 599 break 600 if amt is None: 601 value.append(self._safe_read(chunk_left)) 602 elif amt < chunk_left: 603 value.append(self._safe_read(amt)) 604 self.chunk_left = chunk_left - amt 605 return ''.join(value) 606 elif amt == chunk_left: 607 value.append(self._safe_read(amt)) 608 self._safe_read(2) # toss the CRLF at the end of the chunk 609 self.chunk_left = None 610 return ''.join(value) 611 else: 612 value.append(self._safe_read(chunk_left)) 613 amt -= chunk_left 614 615 # we read the whole chunk, get another 616 self._safe_read(2) # toss the CRLF at the end of the chunk 617 chunk_left = None 618 619 # read and discard trailer up to the CRLF terminator 620 ### note: we shouldn't have any trailers! 621 while True: 622 line = self.fp.readline(_MAXLINE + 1) 623 if len(line) > _MAXLINE: 624 raise LineTooLong("trailer line") 625 if not line: 626 # a vanishingly small number of sites EOF without 627 # sending the trailer 628 break 629 if line == '\r\n': 630 break 631 632 # we read everything; close the "file" 633 self.close() 634 635 return ''.join(value) 636 637 def _safe_read(self, amt): 638 """Read the number of bytes requested, compensating for partial reads. 639 640 Normally, we have a blocking socket, but a read() can be interrupted 641 by a signal (resulting in a partial read). 642 643 Note that we cannot distinguish between EOF and an interrupt when zero 644 bytes have been read. IncompleteRead() will be raised in this 645 situation. 646 647 This function should be used when <amt> bytes "should" be present for 648 reading. If the bytes are truly not available (due to EOF), then the 649 IncompleteRead exception can be used to detect the problem. 650 """ 651 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never 652 # return less than x bytes unless EOF is encountered. It now handles 653 # signal interruptions (socket.error EINTR) internally. This code 654 # never caught that exception anyways. It seems largely pointless. 655 # self.fp.read(amt) will work fine. 656 s = [] 657 while amt > 0: 658 chunk = self.fp.read(min(amt, MAXAMOUNT)) 659 if not chunk: 660 raise IncompleteRead(''.join(s), amt) 661 s.append(chunk) 662 amt -= len(chunk) 663 return ''.join(s) 664 665 def fileno(self): 666 return self.fp.fileno() 667 668 def getheader(self, name, default=None): 669 if self.msg is None: 670 raise ResponseNotReady() 671 return self.msg.getheader(name, default) 672 673 def getheaders(self): 674 """Return list of (header, value) tuples.""" 675 if self.msg is None: 676 raise ResponseNotReady() 677 return self.msg.items() 678 679 680 class HTTPConnection: 681 682 _http_vsn = 11 683 _http_vsn_str = 'HTTP/1.1' 684 685 response_class = HTTPResponse 686 default_port = HTTP_PORT 687 auto_open = 1 688 debuglevel = 0 689 strict = 0 690 691 def __init__(self, host, port=None, strict=None, 692 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): 693 self.timeout = timeout 694 self.source_address = source_address 695 self.sock = None 696 self._buffer = [] 697 self.__response = None 698 self.__state = _CS_IDLE 699 self._method = None 700 self._tunnel_host = None 701 self._tunnel_port = None 702 self._tunnel_headers = {} 703 704 self._set_hostport(host, port) 705 if strict is not None: 706 self.strict = strict 707 708 def set_tunnel(self, host, port=None, headers=None): 709 """ Sets up the host and the port for the HTTP CONNECT Tunnelling. 710 711 The headers argument should be a mapping of extra HTTP headers 712 to send with the CONNECT request. 713 """ 714 self._tunnel_host = host 715 self._tunnel_port = port 716 if headers: 717 self._tunnel_headers = headers 718 else: 719 self._tunnel_headers.clear() 720 721 def _set_hostport(self, host, port): 722 if port is None: 723 i = host.rfind(':') 724 j = host.rfind(']') # ipv6 addresses have [...] 725 if i > j: 726 try: 727 port = int(host[i+1:]) 728 except ValueError: 729 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 730 port = self.default_port 731 else: 732 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 733 host = host[:i] 734 else: 735 port = self.default_port 736 if host and host[0] == '[' and host[-1] == ']': 737 host = host[1:-1] 738 self.host = host 739 self.port = port 740 741 def set_debuglevel(self, level): 742 self.debuglevel = level 743 744 def _tunnel(self): 745 self._set_hostport(self._tunnel_host, self._tunnel_port) 746 self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)) 747 for header, value in self._tunnel_headers.iteritems(): 748 self.send("%s: %s\r\n" % (header, value)) 749 self.send("\r\n") 750 response = self.response_class(self.sock, strict = self.strict, 751 method = self._method) 752 (version, code, message) = response._read_status() 753 754 if code != 200: 755 self.close() 756 raise socket.error("Tunnel connection failed: %d %s" % (code, 757 message.strip())) 758 while True: 759 line = response.fp.readline(_MAXLINE + 1) 760 if len(line) > _MAXLINE: 761 raise LineTooLong("header line") 762 if not line: 763 # for sites which EOF without sending trailer 764 break 765 if line == '\r\n': 766 break 767 768 769 def connect(self): 770 """Connect to the host and port specified in __init__.""" 771 self.sock = socket.create_connection((self.host,self.port), 772 self.timeout, self.source_address) 773 774 if self._tunnel_host: 775 self._tunnel() 776 777 def close(self): 778 """Close the connection to the HTTP server.""" 779 if self.sock: 780 self.sock.close() # close it manually... there may be other refs 781 self.sock = None 782 if self.__response: 783 self.__response.close() 784 self.__response = None 785 self.__state = _CS_IDLE 786 787 def send(self, data): 788 """Send `data' to the server.""" 789 if self.sock is None: 790 if self.auto_open: 791 self.connect() 792 else: 793 raise NotConnected() 794 795 if self.debuglevel > 0: 796 print "send:", repr(data) 797 blocksize = 8192 798 if hasattr(data,'read') and not isinstance(data, array): 799 if self.debuglevel > 0: print "sendIng a read()able" 800 datablock = data.read(blocksize) 801 while datablock: 802 self.sock.sendall(datablock) 803 datablock = data.read(blocksize) 804 else: 805 self.sock.sendall(data) 806 807 def _output(self, s): 808 """Add a line of output to the current request buffer. 809 810 Assumes that the line does *not* end with \\r\\n. 811 """ 812 self._buffer.append(s) 813 814 def _send_output(self, message_body=None): 815 """Send the currently buffered request and clear the buffer. 816 817 Appends an extra \\r\\n to the buffer. 818 A message_body may be specified, to be appended to the request. 819 """ 820 self._buffer.extend(("", "")) 821 msg = "\r\n".join(self._buffer) 822 del self._buffer[:] 823 # If msg and message_body are sent in a single send() call, 824 # it will avoid performance problems caused by the interaction 825 # between delayed ack and the Nagle algorithm. 826 if isinstance(message_body, str): 827 msg += message_body 828 message_body = None 829 self.send(msg) 830 if message_body is not None: 831 #message_body was not a string (i.e. it is a file) and 832 #we must run the risk of Nagle 833 self.send(message_body) 834 835 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): 836 """Send a request to the server. 837 838 `method' specifies an HTTP request method, e.g. 'GET'. 839 `url' specifies the object being requested, e.g. '/index.html'. 840 `skip_host' if True does not add automatically a 'Host:' header 841 `skip_accept_encoding' if True does not add automatically an 842 'Accept-Encoding:' header 843 """ 844 845 # if a prior response has been completed, then forget about it. 846 if self.__response and self.__response.isclosed(): 847 self.__response = None 848 849 850 # in certain cases, we cannot issue another request on this connection. 851 # this occurs when: 852 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 853 # 2) a response to a previous request has signalled that it is going 854 # to close the connection upon completion. 855 # 3) the headers for the previous response have not been read, thus 856 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 857 # 858 # if there is no prior response, then we can request at will. 859 # 860 # if point (2) is true, then we will have passed the socket to the 861 # response (effectively meaning, "there is no prior response"), and 862 # will open a new one when a new request is made. 863 # 864 # Note: if a prior response exists, then we *can* start a new request. 865 # We are not allowed to begin fetching the response to this new 866 # request, however, until that prior response is complete. 867 # 868 if self.__state == _CS_IDLE: 869 self.__state = _CS_REQ_STARTED 870 else: 871 raise CannotSendRequest() 872 873 # Save the method we use, we need it later in the response phase 874 self._method = method 875 if not url: 876 url = '/' 877 hdr = '%s %s %s' % (method, url, self._http_vsn_str) 878 879 self._output(hdr) 880 881 if self._http_vsn == 11: 882 # Issue some standard headers for better HTTP/1.1 compliance 883 884 if not skip_host: 885 # this header is issued *only* for HTTP/1.1 886 # connections. more specifically, this means it is 887 # only issued when the client uses the new 888 # HTTPConnection() class. backwards-compat clients 889 # will be using HTTP/1.0 and those clients may be 890 # issuing this header themselves. we should NOT issue 891 # it twice; some web servers (such as Apache) barf 892 # when they see two Host: headers 893 894 # If we need a non-standard port,include it in the 895 # header. If the request is going through a proxy, 896 # but the host of the actual URL, not the host of the 897 # proxy. 898 899 netloc = '' 900 if url.startswith('http'): 901 nil, netloc, nil, nil, nil = urlsplit(url) 902 903 if netloc: 904 try: 905 netloc_enc = netloc.encode("ascii") 906 except UnicodeEncodeError: 907 netloc_enc = netloc.encode("idna") 908 self.putheader('Host', netloc_enc) 909 else: 910 try: 911 host_enc = self.host.encode("ascii") 912 except UnicodeEncodeError: 913 host_enc = self.host.encode("idna") 914 # Wrap the IPv6 Host Header with [] (RFC 2732) 915 if host_enc.find(':') >= 0: 916 host_enc = "[" + host_enc + "]" 917 if self.port == self.default_port: 918 self.putheader('Host', host_enc) 919 else: 920 self.putheader('Host', "%s:%s" % (host_enc, self.port)) 921 922 # note: we are assuming that clients will not attempt to set these 923 # headers since *this* library must deal with the 924 # consequences. this also means that when the supporting 925 # libraries are updated to recognize other forms, then this 926 # code should be changed (removed or updated). 927 928 # we only want a Content-Encoding of "identity" since we don't 929 # support encodings such as x-gzip or x-deflate. 930 if not skip_accept_encoding: 931 self.putheader('Accept-Encoding', 'identity') 932 933 # we can accept "chunked" Transfer-Encodings, but no others 934 # NOTE: no TE header implies *only* "chunked" 935 #self.putheader('TE', 'chunked') 936 937 # if TE is supplied in the header, then it must appear in a 938 # Connection header. 939 #self.putheader('Connection', 'TE') 940 941 else: 942 # For HTTP/1.0, the server will assume "not chunked" 943 pass 944 945 def putheader(self, header, *values): 946 """Send a request header line to the server. 947 948 For example: h.putheader('Accept', 'text/html') 949 """ 950 if self.__state != _CS_REQ_STARTED: 951 raise CannotSendHeader() 952 953 hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values])) 954 self._output(hdr) 955 956 def endheaders(self, message_body=None): 957 """Indicate that the last header line has been sent to the server. 958 959 This method sends the request to the server. The optional 960 message_body argument can be used to pass a message body 961 associated with the request. The message body will be sent in 962 the same packet as the message headers if it is string, otherwise it is 963 sent as a separate packet. 964 """ 965 if self.__state == _CS_REQ_STARTED: 966 self.__state = _CS_REQ_SENT 967 else: 968 raise CannotSendHeader() 969 self._send_output(message_body) 970 971 def request(self, method, url, body=None, headers={}): 972 """Send a complete request to the server.""" 973 self._send_request(method, url, body, headers) 974 975 def _set_content_length(self, body): 976 # Set the content-length based on the body. 977 thelen = None 978 try: 979 thelen = str(len(body)) 980 except TypeError, te: 981 # If this is a file-like object, try to 982 # fstat its file descriptor 983 try: 984 thelen = str(os.fstat(body.fileno()).st_size) 985 except (AttributeError, OSError): 986 # Don't send a length if this failed 987 if self.debuglevel > 0: print "Cannot stat!!" 988 989 if thelen is not None: 990 self.putheader('Content-Length', thelen) 991 992 def _send_request(self, method, url, body, headers): 993 # Honor explicitly requested Host: and Accept-Encoding: headers. 994 header_names = dict.fromkeys([k.lower() for k in headers]) 995 skips = {} 996 if 'host' in header_names: 997 skips['skip_host'] = 1 998 if 'accept-encoding' in header_names: 999 skips['skip_accept_encoding'] = 1 1000 1001 self.putrequest(method, url, **skips) 1002 1003 if body is not None and 'content-length' not in header_names: 1004 self._set_content_length(body) 1005 for hdr, value in headers.iteritems(): 1006 self.putheader(hdr, value) 1007 self.endheaders(body) 1008 1009 def getresponse(self, buffering=False): 1010 "Get the response from the server." 1011 1012 # if a prior response has been completed, then forget about it. 1013 if self.__response and self.__response.isclosed(): 1014 self.__response = None 1015 1016 # 1017 # if a prior response exists, then it must be completed (otherwise, we 1018 # cannot read this response's header to determine the connection-close 1019 # behavior) 1020 # 1021 # note: if a prior response existed, but was connection-close, then the 1022 # socket and response were made independent of this HTTPConnection 1023 # object since a new request requires that we open a whole new 1024 # connection 1025 # 1026 # this means the prior response had one of two states: 1027 # 1) will_close: this connection was reset and the prior socket and 1028 # response operate independently 1029 # 2) persistent: the response was retained and we await its 1030 # isclosed() status to become true. 1031 # 1032 if self.__state != _CS_REQ_SENT or self.__response: 1033 raise ResponseNotReady() 1034 1035 args = (self.sock,) 1036 kwds = {"strict":self.strict, "method":self._method} 1037 if self.debuglevel > 0: 1038 args += (self.debuglevel,) 1039 if buffering: 1040 #only add this keyword if non-default, for compatibility with 1041 #other response_classes. 1042 kwds["buffering"] = True; 1043 response = self.response_class(*args, **kwds) 1044 1045 response.begin() 1046 assert response.will_close != _UNKNOWN 1047 self.__state = _CS_IDLE 1048 1049 if response.will_close: 1050 # this effectively passes the connection to the response 1051 self.close() 1052 else: 1053 # remember this, so we can tell when it is complete 1054 self.__response = response 1055 1056 return response 1057 1058 1059 class HTTP: 1060 "Compatibility class with httplib.py from 1.5." 1061 1062 _http_vsn = 10 1063 _http_vsn_str = 'HTTP/1.0' 1064 1065 debuglevel = 0 1066 1067 _connection_class = HTTPConnection 1068 1069 def __init__(self, host='', port=None, strict=None): 1070 "Provide a default host, since the superclass requires one." 1071 1072 # some joker passed 0 explicitly, meaning default port 1073 if port == 0: 1074 port = None 1075 1076 # Note that we may pass an empty string as the host; this will raise 1077 # an error when we attempt to connect. Presumably, the client code 1078 # will call connect before then, with a proper host. 1079 self._setup(self._connection_class(host, port, strict)) 1080 1081 def _setup(self, conn): 1082 self._conn = conn 1083 1084 # set up delegation to flesh out interface 1085 self.send = conn.send 1086 self.putrequest = conn.putrequest 1087 self.putheader = conn.putheader 1088 self.endheaders = conn.endheaders 1089 self.set_debuglevel = conn.set_debuglevel 1090 1091 conn._http_vsn = self._http_vsn 1092 conn._http_vsn_str = self._http_vsn_str 1093 1094 self.file = None 1095 1096 def connect(self, host=None, port=None): 1097 "Accept arguments to set the host/port, since the superclass doesn't." 1098 1099 if host is not None: 1100 self._conn._set_hostport(host, port) 1101 self._conn.connect() 1102 1103 def getfile(self): 1104 "Provide a getfile, since the superclass' does not use this concept." 1105 return self.file 1106 1107 def getreply(self, buffering=False): 1108 """Compat definition since superclass does not define it. 1109 1110 Returns a tuple consisting of: 1111 - server status code (e.g. '200' if all goes well) 1112 - server "reason" corresponding to status code 1113 - any RFC822 headers in the response from the server 1114 """ 1115 try: 1116 if not buffering: 1117 response = self._conn.getresponse() 1118 else: 1119 #only add this keyword if non-default for compatibility 1120 #with other connection classes 1121 response = self._conn.getresponse(buffering) 1122 except BadStatusLine, e: 1123 ### hmm. if getresponse() ever closes the socket on a bad request, 1124 ### then we are going to have problems with self.sock 1125 1126 ### should we keep this behavior? do people use it? 1127 # keep the socket open (as a file), and return it 1128 self.file = self._conn.sock.makefile('rb', 0) 1129 1130 # close our socket -- we want to restart after any protocol error 1131 self.close() 1132 1133 self.headers = None 1134 return -1, e.line, None 1135 1136 self.headers = response.msg 1137 self.file = response.fp 1138 return response.status, response.reason, response.msg 1139 1140 def close(self): 1141 self._conn.close() 1142 1143 # note that self.file == response.fp, which gets closed by the 1144 # superclass. just clear the object ref here. 1145 ### hmm. messy. if status==-1, then self.file is owned by us. 1146 ### well... we aren't explicitly closing, but losing this ref will 1147 ### do it 1148 self.file = None 1149 1150 try: 1151 import ssl 1152 except ImportError: 1153 pass 1154 else: 1155 class HTTPSConnection(HTTPConnection): 1156 "This class allows communication via SSL." 1157 1158 default_port = HTTPS_PORT 1159 1160 def __init__(self, host, port=None, key_file=None, cert_file=None, 1161 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1162 source_address=None): 1163 HTTPConnection.__init__(self, host, port, strict, timeout, 1164 source_address) 1165 self.key_file = key_file 1166 self.cert_file = cert_file 1167 1168 def connect(self): 1169 "Connect to a host on a given (SSL) port." 1170 1171 sock = socket.create_connection((self.host, self.port), 1172 self.timeout, self.source_address) 1173 if self._tunnel_host: 1174 self.sock = sock 1175 self._tunnel() 1176 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) 1177 1178 __all__.append("HTTPSConnection") 1179 1180 class HTTPS(HTTP): 1181 """Compatibility with 1.5 httplib interface 1182 1183 Python 1.5.2 did not have an HTTPS class, but it defined an 1184 interface for sending http requests that is also useful for 1185 https. 1186 """ 1187 1188 _connection_class = HTTPSConnection 1189 1190 def __init__(self, host='', port=None, key_file=None, cert_file=None, 1191 strict=None): 1192 # provide a default host, pass the X509 cert info 1193 1194 # urf. compensate for bad input. 1195 if port == 0: 1196 port = None 1197 self._setup(self._connection_class(host, port, key_file, 1198 cert_file, strict)) 1199 1200 # we never actually use these for anything, but we keep them 1201 # here for compatibility with post-1.5.2 CVS. 1202 self.key_file = key_file 1203 self.cert_file = cert_file 1204 1205 1206 def FakeSocket (sock, sslobj): 1207 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + 1208 "Use the result of ssl.wrap_socket() directly instead.", 1209 DeprecationWarning, stacklevel=2) 1210 return sslobj 1211 1212 1213 class HTTPException(Exception): 1214 # Subclasses that define an __init__ must call Exception.__init__ 1215 # or define self.args. Otherwise, str() will fail. 1216 pass 1217 1218 class NotConnected(HTTPException): 1219 pass 1220 1221 class InvalidURL(HTTPException): 1222 pass 1223 1224 class UnknownProtocol(HTTPException): 1225 def __init__(self, version): 1226 self.args = version, 1227 self.version = version 1228 1229 class UnknownTransferEncoding(HTTPException): 1230 pass 1231 1232 class UnimplementedFileMode(HTTPException): 1233 pass 1234 1235 class IncompleteRead(HTTPException): 1236 def __init__(self, partial, expected=None): 1237 self.args = partial, 1238 self.partial = partial 1239 self.expected = expected 1240 def __repr__(self): 1241 if self.expected is not None: 1242 e = ', %i more expected' % self.expected 1243 else: 1244 e = '' 1245 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) 1246 def __str__(self): 1247 return repr(self) 1248 1249 class ImproperConnectionState(HTTPException): 1250 pass 1251 1252 class CannotSendRequest(ImproperConnectionState): 1253 pass 1254 1255 class CannotSendHeader(ImproperConnectionState): 1256 pass 1257 1258 class ResponseNotReady(ImproperConnectionState): 1259 pass 1260 1261 class BadStatusLine(HTTPException): 1262 def __init__(self, line): 1263 if not line: 1264 line = repr(line) 1265 self.args = line, 1266 self.line = line 1267 1268 class LineTooLong(HTTPException): 1269 def __init__(self, line_type): 1270 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1271 % (_MAXLINE, line_type)) 1272 1273 # for backwards compatibility 1274 error = HTTPException 1275 1276 class LineAndFileWrapper: 1277 """A limited file-like object for HTTP/0.9 responses.""" 1278 1279 # The status-line parsing code calls readline(), which normally 1280 # get the HTTP status line. For a 0.9 response, however, this is 1281 # actually the first line of the body! Clients need to get a 1282 # readable file object that contains that line. 1283 1284 def __init__(self, line, file): 1285 self._line = line 1286 self._file = file 1287 self._line_consumed = 0 1288 self._line_offset = 0 1289 self._line_left = len(line) 1290 1291 def __getattr__(self, attr): 1292 return getattr(self._file, attr) 1293 1294 def _done(self): 1295 # called when the last byte is read from the line. After the 1296 # call, all read methods are delegated to the underlying file 1297 # object. 1298 self._line_consumed = 1 1299 self.read = self._file.read 1300 self.readline = self._file.readline 1301 self.readlines = self._file.readlines 1302 1303 def read(self, amt=None): 1304 if self._line_consumed: 1305 return self._file.read(amt) 1306 assert self._line_left 1307 if amt is None or amt > self._line_left: 1308 s = self._line[self._line_offset:] 1309 self._done() 1310 if amt is None: 1311 return s + self._file.read() 1312 else: 1313 return s + self._file.read(amt - len(s)) 1314 else: 1315 assert amt <= self._line_left 1316 i = self._line_offset 1317 j = i + amt 1318 s = self._line[i:j] 1319 self._line_offset = j 1320 self._line_left -= amt 1321 if self._line_left == 0: 1322 self._done() 1323 return s 1324 1325 def readline(self): 1326 if self._line_consumed: 1327 return self._file.readline() 1328 assert self._line_left 1329 s = self._line[self._line_offset:] 1330 self._done() 1331 return s 1332 1333 def readlines(self, size=None): 1334 if self._line_consumed: 1335 return self._file.readlines(size) 1336 assert self._line_left 1337 L = [self._line[self._line_offset:]] 1338 self._done() 1339 if size is None: 1340 return L + self._file.readlines() 1341 else: 1342 return L + self._file.readlines(size) 1343