Home | History | Annotate | Download | only in python2.7
      1 r"""HTTP/1.1 client library
      2 
      3 <intro stuff goes here>
      4 <other stuff, too>
      5 
      6 HTTPConnection goes through a number of "states", which define when a client
      7 may legally make another request or fetch the response for a particular
      8 request. This diagram details these state transitions:
      9 
     10     (null)
     11       |
     12       | HTTPConnection()
     13       v
     14     Idle
     15       |
     16       | putrequest()
     17       v
     18     Request-started
     19       |
     20       | ( putheader() )*  endheaders()
     21       v
     22     Request-sent
     23       |
     24       | response = getresponse()
     25       v
     26     Unread-response   [Response-headers-read]
     27       |\____________________
     28       |                     |
     29       | response.read()     | putrequest()
     30       v                     v
     31     Idle                  Req-started-unread-response
     32                      ______/|
     33                    /        |
     34    response.read() |        | ( putheader() )*  endheaders()
     35                    v        v
     36        Request-started    Req-sent-unread-response
     37                             |
     38                             | response.read()
     39                             v
     40                           Request-sent
     41 
     42 This diagram presents the following rules:
     43   -- a second request may not be started until {response-headers-read}
     44   -- a response [object] cannot be retrieved until {request-sent}
     45   -- there is no differentiation between an unread response body and a
     46      partially read response body
     47 
     48 Note: this enforcement is applied by the HTTPConnection class. The
     49       HTTPResponse class does not enforce this state machine, which
     50       implies sophisticated clients may accelerate the request/response
     51       pipeline. Caution should be taken, though: accelerating the states
     52       beyond the above pattern may imply knowledge of the server's
     53       connection-close behavior for certain requests. For example, it
     54       is impossible to tell whether the server will close the connection
     55       UNTIL the response headers have been read; this means that further
     56       requests cannot be placed into the pipeline until it is known that
     57       the server will NOT be closing the connection.
     58 
     59 Logical State                  __state            __response
     60 -------------                  -------            ----------
     61 Idle                           _CS_IDLE           None
     62 Request-started                _CS_REQ_STARTED    None
     63 Request-sent                   _CS_REQ_SENT       None
     64 Unread-response                _CS_IDLE           <response_class>
     65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
     66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
     67 """
     68 
     69 from array import array
     70 import os
     71 import socket
     72 from sys import py3kwarning
     73 from urlparse import urlsplit
     74 import warnings
     75 with warnings.catch_warnings():
     76     if py3kwarning:
     77         warnings.filterwarnings("ignore", ".*mimetools has been removed",
     78                                 DeprecationWarning)
     79     import mimetools
     80 
     81 try:
     82     from cStringIO import StringIO
     83 except ImportError:
     84     from StringIO import StringIO
     85 
     86 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
     87            "HTTPException", "NotConnected", "UnknownProtocol",
     88            "UnknownTransferEncoding", "UnimplementedFileMode",
     89            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
     90            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
     91            "BadStatusLine", "error", "responses"]
     92 
     93 HTTP_PORT = 80
     94 HTTPS_PORT = 443
     95 
     96 _UNKNOWN = 'UNKNOWN'
     97 
     98 # connection states
     99 _CS_IDLE = 'Idle'
    100 _CS_REQ_STARTED = 'Request-started'
    101 _CS_REQ_SENT = 'Request-sent'
    102 
    103 # status codes
    104 # informational
    105 CONTINUE = 100
    106 SWITCHING_PROTOCOLS = 101
    107 PROCESSING = 102
    108 
    109 # successful
    110 OK = 200
    111 CREATED = 201
    112 ACCEPTED = 202
    113 NON_AUTHORITATIVE_INFORMATION = 203
    114 NO_CONTENT = 204
    115 RESET_CONTENT = 205
    116 PARTIAL_CONTENT = 206
    117 MULTI_STATUS = 207
    118 IM_USED = 226
    119 
    120 # redirection
    121 MULTIPLE_CHOICES = 300
    122 MOVED_PERMANENTLY = 301
    123 FOUND = 302
    124 SEE_OTHER = 303
    125 NOT_MODIFIED = 304
    126 USE_PROXY = 305
    127 TEMPORARY_REDIRECT = 307
    128 
    129 # client error
    130 BAD_REQUEST = 400
    131 UNAUTHORIZED = 401
    132 PAYMENT_REQUIRED = 402
    133 FORBIDDEN = 403
    134 NOT_FOUND = 404
    135 METHOD_NOT_ALLOWED = 405
    136 NOT_ACCEPTABLE = 406
    137 PROXY_AUTHENTICATION_REQUIRED = 407
    138 REQUEST_TIMEOUT = 408
    139 CONFLICT = 409
    140 GONE = 410
    141 LENGTH_REQUIRED = 411
    142 PRECONDITION_FAILED = 412
    143 REQUEST_ENTITY_TOO_LARGE = 413
    144 REQUEST_URI_TOO_LONG = 414
    145 UNSUPPORTED_MEDIA_TYPE = 415
    146 REQUESTED_RANGE_NOT_SATISFIABLE = 416
    147 EXPECTATION_FAILED = 417
    148 UNPROCESSABLE_ENTITY = 422
    149 LOCKED = 423
    150 FAILED_DEPENDENCY = 424
    151 UPGRADE_REQUIRED = 426
    152 
    153 # server error
    154 INTERNAL_SERVER_ERROR = 500
    155 NOT_IMPLEMENTED = 501
    156 BAD_GATEWAY = 502
    157 SERVICE_UNAVAILABLE = 503
    158 GATEWAY_TIMEOUT = 504
    159 HTTP_VERSION_NOT_SUPPORTED = 505
    160 INSUFFICIENT_STORAGE = 507
    161 NOT_EXTENDED = 510
    162 
    163 # Mapping status codes to official W3C names
    164 responses = {
    165     100: 'Continue',
    166     101: 'Switching Protocols',
    167 
    168     200: 'OK',
    169     201: 'Created',
    170     202: 'Accepted',
    171     203: 'Non-Authoritative Information',
    172     204: 'No Content',
    173     205: 'Reset Content',
    174     206: 'Partial Content',
    175 
    176     300: 'Multiple Choices',
    177     301: 'Moved Permanently',
    178     302: 'Found',
    179     303: 'See Other',
    180     304: 'Not Modified',
    181     305: 'Use Proxy',
    182     306: '(Unused)',
    183     307: 'Temporary Redirect',
    184 
    185     400: 'Bad Request',
    186     401: 'Unauthorized',
    187     402: 'Payment Required',
    188     403: 'Forbidden',
    189     404: 'Not Found',
    190     405: 'Method Not Allowed',
    191     406: 'Not Acceptable',
    192     407: 'Proxy Authentication Required',
    193     408: 'Request Timeout',
    194     409: 'Conflict',
    195     410: 'Gone',
    196     411: 'Length Required',
    197     412: 'Precondition Failed',
    198     413: 'Request Entity Too Large',
    199     414: 'Request-URI Too Long',
    200     415: 'Unsupported Media Type',
    201     416: 'Requested Range Not Satisfiable',
    202     417: 'Expectation Failed',
    203 
    204     500: 'Internal Server Error',
    205     501: 'Not Implemented',
    206     502: 'Bad Gateway',
    207     503: 'Service Unavailable',
    208     504: 'Gateway Timeout',
    209     505: 'HTTP Version Not Supported',
    210 }
    211 
    212 # maximal amount of data to read at one time in _safe_read
    213 MAXAMOUNT = 1048576
    214 
    215 # maximal line length when calling readline().
    216 _MAXLINE = 65536
    217 
    218 class HTTPMessage(mimetools.Message):
    219 
    220     def addheader(self, key, value):
    221         """Add header for field key handling repeats."""
    222         prev = self.dict.get(key)
    223         if prev is None:
    224             self.dict[key] = value
    225         else:
    226             combined = ", ".join((prev, value))
    227             self.dict[key] = combined
    228 
    229     def addcontinue(self, key, more):
    230         """Add more field data from a continuation line."""
    231         prev = self.dict[key]
    232         self.dict[key] = prev + "\n " + more
    233 
    234     def readheaders(self):
    235         """Read header lines.
    236 
    237         Read header lines up to the entirely blank line that terminates them.
    238         The (normally blank) line that ends the headers is skipped, but not
    239         included in the returned list.  If a non-header line ends the headers,
    240         (which is an error), an attempt is made to backspace over it; it is
    241         never included in the returned list.
    242 
    243         The variable self.status is set to the empty string if all went well,
    244         otherwise it is an error message.  The variable self.headers is a
    245         completely uninterpreted list of lines contained in the header (so
    246         printing them will reproduce the header exactly as it appears in the
    247         file).
    248 
    249         If multiple header fields with the same name occur, they are combined
    250         according to the rules in RFC 2616 sec 4.2:
    251 
    252         Appending each subsequent field-value to the first, each separated
    253         by a comma. The order in which header fields with the same field-name
    254         are received is significant to the interpretation of the combined
    255         field value.
    256         """
    257         # XXX The implementation overrides the readheaders() method of
    258         # rfc822.Message.  The base class design isn't amenable to
    259         # customized behavior here so the method here is a copy of the
    260         # base class code with a few small changes.
    261 
    262         self.dict = {}
    263         self.unixfrom = ''
    264         self.headers = hlist = []
    265         self.status = ''
    266         headerseen = ""
    267         firstline = 1
    268         startofline = unread = tell = None
    269         if hasattr(self.fp, 'unread'):
    270             unread = self.fp.unread
    271         elif self.seekable:
    272             tell = self.fp.tell
    273         while True:
    274             if tell:
    275                 try:
    276                     startofline = tell()
    277                 except IOError:
    278                     startofline = tell = None
    279                     self.seekable = 0
    280             line = self.fp.readline(_MAXLINE + 1)
    281             if len(line) > _MAXLINE:
    282                 raise LineTooLong("header line")
    283             if not line:
    284                 self.status = 'EOF in headers'
    285                 break
    286             # Skip unix From name time lines
    287             if firstline and line.startswith('From '):
    288                 self.unixfrom = self.unixfrom + line
    289                 continue
    290             firstline = 0
    291             if headerseen and line[0] in ' \t':
    292                 # XXX Not sure if continuation lines are handled properly
    293                 # for http and/or for repeating headers
    294                 # It's a continuation line.
    295                 hlist.append(line)
    296                 self.addcontinue(headerseen, line.strip())
    297                 continue
    298             elif self.iscomment(line):
    299                 # It's a comment.  Ignore it.
    300                 continue
    301             elif self.islast(line):
    302                 # Note! No pushback here!  The delimiter line gets eaten.
    303                 break
    304             headerseen = self.isheader(line)
    305             if headerseen:
    306                 # It's a legal header line, save it.
    307                 hlist.append(line)
    308                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
    309                 continue
    310             else:
    311                 # It's not a header line; throw it back and stop here.
    312                 if not self.dict:
    313                     self.status = 'No headers'
    314                 else:
    315                     self.status = 'Non-header line where header expected'
    316                 # Try to undo the read.
    317                 if unread:
    318                     unread(line)
    319                 elif tell:
    320                     self.fp.seek(startofline)
    321                 else:
    322                     self.status = self.status + '; bad seek'
    323                 break
    324 
    325 class HTTPResponse:
    326 
    327     # strict: If true, raise BadStatusLine if the status line can't be
    328     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
    329     # false because it prevents clients from talking to HTTP/0.9
    330     # servers.  Note that a response with a sufficiently corrupted
    331     # status line will look like an HTTP/0.9 response.
    332 
    333     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
    334 
    335     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
    336         if buffering:
    337             # The caller won't be using any sock.recv() calls, so buffering
    338             # is fine and recommended for performance.
    339             self.fp = sock.makefile('rb')
    340         else:
    341             # The buffer size is specified as zero, because the headers of
    342             # the response are read with readline().  If the reads were
    343             # buffered the readline() calls could consume some of the
    344             # response, which make be read via a recv() on the underlying
    345             # socket.
    346             self.fp = sock.makefile('rb', 0)
    347         self.debuglevel = debuglevel
    348         self.strict = strict
    349         self._method = method
    350 
    351         self.msg = None
    352 
    353         # from the Status-Line of the response
    354         self.version = _UNKNOWN # HTTP-Version
    355         self.status = _UNKNOWN  # Status-Code
    356         self.reason = _UNKNOWN  # Reason-Phrase
    357 
    358         self.chunked = _UNKNOWN         # is "chunked" being used?
    359         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
    360         self.length = _UNKNOWN          # number of bytes left in response
    361         self.will_close = _UNKNOWN      # conn will close at end of response
    362 
    363     def _read_status(self):
    364         # Initialize with Simple-Response defaults
    365         line = self.fp.readline(_MAXLINE + 1)
    366         if len(line) > _MAXLINE:
    367             raise LineTooLong("header line")
    368         if self.debuglevel > 0:
    369             print "reply:", repr(line)
    370         if not line:
    371             # Presumably, the server closed the connection before
    372             # sending a valid response.
    373             raise BadStatusLine(line)
    374         try:
    375             [version, status, reason] = line.split(None, 2)
    376         except ValueError:
    377             try:
    378                 [version, status] = line.split(None, 1)
    379                 reason = ""
    380             except ValueError:
    381                 # empty version will cause next test to fail and status
    382                 # will be treated as 0.9 response.
    383                 version = ""
    384         if not version.startswith('HTTP/'):
    385             if self.strict:
    386                 self.close()
    387                 raise BadStatusLine(line)
    388             else:
    389                 # assume it's a Simple-Response from an 0.9 server
    390                 self.fp = LineAndFileWrapper(line, self.fp)
    391                 return "HTTP/0.9", 200, ""
    392 
    393         # The status code is a three-digit number
    394         try:
    395             status = int(status)
    396             if status < 100 or status > 999:
    397                 raise BadStatusLine(line)
    398         except ValueError:
    399             raise BadStatusLine(line)
    400         return version, status, reason
    401 
    402     def begin(self):
    403         if self.msg is not None:
    404             # we've already started reading the response
    405             return
    406 
    407         # read until we get a non-100 response
    408         while True:
    409             version, status, reason = self._read_status()
    410             if status != CONTINUE:
    411                 break
    412             # skip the header from the 100 response
    413             while True:
    414                 skip = self.fp.readline(_MAXLINE + 1)
    415                 if len(skip) > _MAXLINE:
    416                     raise LineTooLong("header line")
    417                 skip = skip.strip()
    418                 if not skip:
    419                     break
    420                 if self.debuglevel > 0:
    421                     print "header:", skip
    422 
    423         self.status = status
    424         self.reason = reason.strip()
    425         if version == 'HTTP/1.0':
    426             self.version = 10
    427         elif version.startswith('HTTP/1.'):
    428             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
    429         elif version == 'HTTP/0.9':
    430             self.version = 9
    431         else:
    432             raise UnknownProtocol(version)
    433 
    434         if self.version == 9:
    435             self.length = None
    436             self.chunked = 0
    437             self.will_close = 1
    438             self.msg = HTTPMessage(StringIO())
    439             return
    440 
    441         self.msg = HTTPMessage(self.fp, 0)
    442         if self.debuglevel > 0:
    443             for hdr in self.msg.headers:
    444                 print "header:", hdr,
    445 
    446         # don't let the msg keep an fp
    447         self.msg.fp = None
    448 
    449         # are we using the chunked-style of transfer encoding?
    450         tr_enc = self.msg.getheader('transfer-encoding')
    451         if tr_enc and tr_enc.lower() == "chunked":
    452             self.chunked = 1
    453             self.chunk_left = None
    454         else:
    455             self.chunked = 0
    456 
    457         # will the connection close at the end of the response?
    458         self.will_close = self._check_close()
    459 
    460         # do we have a Content-Length?
    461         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
    462         length = self.msg.getheader('content-length')
    463         if length and not self.chunked:
    464             try:
    465                 self.length = int(length)
    466             except ValueError:
    467                 self.length = None
    468             else:
    469                 if self.length < 0:  # ignore nonsensical negative lengths
    470                     self.length = None
    471         else:
    472             self.length = None
    473 
    474         # does the body have a fixed length? (of zero)
    475         if (status == NO_CONTENT or status == NOT_MODIFIED or
    476             100 <= status < 200 or      # 1xx codes
    477             self._method == 'HEAD'):
    478             self.length = 0
    479 
    480         # if the connection remains open, and we aren't using chunked, and
    481         # a content-length was not provided, then assume that the connection
    482         # WILL close.
    483         if not self.will_close and \
    484            not self.chunked and \
    485            self.length is None:
    486             self.will_close = 1
    487 
    488     def _check_close(self):
    489         conn = self.msg.getheader('connection')
    490         if self.version == 11:
    491             # An HTTP/1.1 proxy is assumed to stay open unless
    492             # explicitly closed.
    493             conn = self.msg.getheader('connection')
    494             if conn and "close" in conn.lower():
    495                 return True
    496             return False
    497 
    498         # Some HTTP/1.0 implementations have support for persistent
    499         # connections, using rules different than HTTP/1.1.
    500 
    501         # For older HTTP, Keep-Alive indicates persistent connection.
    502         if self.msg.getheader('keep-alive'):
    503             return False
    504 
    505         # At least Akamai returns a "Connection: Keep-Alive" header,
    506         # which was supposed to be sent by the client.
    507         if conn and "keep-alive" in conn.lower():
    508             return False
    509 
    510         # Proxy-Connection is a netscape hack.
    511         pconn = self.msg.getheader('proxy-connection')
    512         if pconn and "keep-alive" in pconn.lower():
    513             return False
    514 
    515         # otherwise, assume it will close
    516         return True
    517 
    518     def close(self):
    519         if self.fp:
    520             self.fp.close()
    521             self.fp = None
    522 
    523     def isclosed(self):
    524         # NOTE: it is possible that we will not ever call self.close(). This
    525         #       case occurs when will_close is TRUE, length is None, and we
    526         #       read up to the last byte, but NOT past it.
    527         #
    528         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
    529         #          called, meaning self.isclosed() is meaningful.
    530         return self.fp is None
    531 
    532     # XXX It would be nice to have readline and __iter__ for this, too.
    533 
    534     def read(self, amt=None):
    535         if self.fp is None:
    536             return ''
    537 
    538         if self._method == 'HEAD':
    539             self.close()
    540             return ''
    541 
    542         if self.chunked:
    543             return self._read_chunked(amt)
    544 
    545         if amt is None:
    546             # unbounded read
    547             if self.length is None:
    548                 s = self.fp.read()
    549             else:
    550                 try:
    551                     s = self._safe_read(self.length)
    552                 except IncompleteRead:
    553                     self.close()
    554                     raise
    555                 self.length = 0
    556             self.close()        # we read everything
    557             return s
    558 
    559         if self.length is not None:
    560             if amt > self.length:
    561                 # clip the read to the "end of response"
    562                 amt = self.length
    563 
    564         # we do not use _safe_read() here because this may be a .will_close
    565         # connection, and the user is reading more bytes than will be provided
    566         # (for example, reading in 1k chunks)
    567         s = self.fp.read(amt)
    568         if not s:
    569             # Ideally, we would raise IncompleteRead if the content-length
    570             # wasn't satisfied, but it might break compatibility.
    571             self.close()
    572         if self.length is not None:
    573             self.length -= len(s)
    574             if not self.length:
    575                 self.close()
    576 
    577         return s
    578 
    579     def _read_chunked(self, amt):
    580         assert self.chunked != _UNKNOWN
    581         chunk_left = self.chunk_left
    582         value = []
    583         while True:
    584             if chunk_left is None:
    585                 line = self.fp.readline(_MAXLINE + 1)
    586                 if len(line) > _MAXLINE:
    587                     raise LineTooLong("chunk size")
    588                 i = line.find(';')
    589                 if i >= 0:
    590                     line = line[:i] # strip chunk-extensions
    591                 try:
    592                     chunk_left = int(line, 16)
    593                 except ValueError:
    594                     # close the connection as protocol synchronisation is
    595                     # probably lost
    596                     self.close()
    597                     raise IncompleteRead(''.join(value))
    598                 if chunk_left == 0:
    599                     break
    600             if amt is None:
    601                 value.append(self._safe_read(chunk_left))
    602             elif amt < chunk_left:
    603                 value.append(self._safe_read(amt))
    604                 self.chunk_left = chunk_left - amt
    605                 return ''.join(value)
    606             elif amt == chunk_left:
    607                 value.append(self._safe_read(amt))
    608                 self._safe_read(2)  # toss the CRLF at the end of the chunk
    609                 self.chunk_left = None
    610                 return ''.join(value)
    611             else:
    612                 value.append(self._safe_read(chunk_left))
    613                 amt -= chunk_left
    614 
    615             # we read the whole chunk, get another
    616             self._safe_read(2)      # toss the CRLF at the end of the chunk
    617             chunk_left = None
    618 
    619         # read and discard trailer up to the CRLF terminator
    620         ### note: we shouldn't have any trailers!
    621         while True:
    622             line = self.fp.readline(_MAXLINE + 1)
    623             if len(line) > _MAXLINE:
    624                 raise LineTooLong("trailer line")
    625             if not line:
    626                 # a vanishingly small number of sites EOF without
    627                 # sending the trailer
    628                 break
    629             if line == '\r\n':
    630                 break
    631 
    632         # we read everything; close the "file"
    633         self.close()
    634 
    635         return ''.join(value)
    636 
    637     def _safe_read(self, amt):
    638         """Read the number of bytes requested, compensating for partial reads.
    639 
    640         Normally, we have a blocking socket, but a read() can be interrupted
    641         by a signal (resulting in a partial read).
    642 
    643         Note that we cannot distinguish between EOF and an interrupt when zero
    644         bytes have been read. IncompleteRead() will be raised in this
    645         situation.
    646 
    647         This function should be used when <amt> bytes "should" be present for
    648         reading. If the bytes are truly not available (due to EOF), then the
    649         IncompleteRead exception can be used to detect the problem.
    650         """
    651         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
    652         # return less than x bytes unless EOF is encountered.  It now handles
    653         # signal interruptions (socket.error EINTR) internally.  This code
    654         # never caught that exception anyways.  It seems largely pointless.
    655         # self.fp.read(amt) will work fine.
    656         s = []
    657         while amt > 0:
    658             chunk = self.fp.read(min(amt, MAXAMOUNT))
    659             if not chunk:
    660                 raise IncompleteRead(''.join(s), amt)
    661             s.append(chunk)
    662             amt -= len(chunk)
    663         return ''.join(s)
    664 
    665     def fileno(self):
    666         return self.fp.fileno()
    667 
    668     def getheader(self, name, default=None):
    669         if self.msg is None:
    670             raise ResponseNotReady()
    671         return self.msg.getheader(name, default)
    672 
    673     def getheaders(self):
    674         """Return list of (header, value) tuples."""
    675         if self.msg is None:
    676             raise ResponseNotReady()
    677         return self.msg.items()
    678 
    679 
    680 class HTTPConnection:
    681 
    682     _http_vsn = 11
    683     _http_vsn_str = 'HTTP/1.1'
    684 
    685     response_class = HTTPResponse
    686     default_port = HTTP_PORT
    687     auto_open = 1
    688     debuglevel = 0
    689     strict = 0
    690 
    691     def __init__(self, host, port=None, strict=None,
    692                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
    693         self.timeout = timeout
    694         self.source_address = source_address
    695         self.sock = None
    696         self._buffer = []
    697         self.__response = None
    698         self.__state = _CS_IDLE
    699         self._method = None
    700         self._tunnel_host = None
    701         self._tunnel_port = None
    702         self._tunnel_headers = {}
    703 
    704         self._set_hostport(host, port)
    705         if strict is not None:
    706             self.strict = strict
    707 
    708     def set_tunnel(self, host, port=None, headers=None):
    709         """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
    710 
    711         The headers argument should be a mapping of extra HTTP headers
    712         to send with the CONNECT request.
    713         """
    714         self._tunnel_host = host
    715         self._tunnel_port = port
    716         if headers:
    717             self._tunnel_headers = headers
    718         else:
    719             self._tunnel_headers.clear()
    720 
    721     def _set_hostport(self, host, port):
    722         if port is None:
    723             i = host.rfind(':')
    724             j = host.rfind(']')         # ipv6 addresses have [...]
    725             if i > j:
    726                 try:
    727                     port = int(host[i+1:])
    728                 except ValueError:
    729                     if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
    730                         port = self.default_port
    731                     else:
    732                         raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
    733                 host = host[:i]
    734             else:
    735                 port = self.default_port
    736             if host and host[0] == '[' and host[-1] == ']':
    737                 host = host[1:-1]
    738         self.host = host
    739         self.port = port
    740 
    741     def set_debuglevel(self, level):
    742         self.debuglevel = level
    743 
    744     def _tunnel(self):
    745         self._set_hostport(self._tunnel_host, self._tunnel_port)
    746         self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
    747         for header, value in self._tunnel_headers.iteritems():
    748             self.send("%s: %s\r\n" % (header, value))
    749         self.send("\r\n")
    750         response = self.response_class(self.sock, strict = self.strict,
    751                                        method = self._method)
    752         (version, code, message) = response._read_status()
    753 
    754         if code != 200:
    755             self.close()
    756             raise socket.error("Tunnel connection failed: %d %s" % (code,
    757                                                                     message.strip()))
    758         while True:
    759             line = response.fp.readline(_MAXLINE + 1)
    760             if len(line) > _MAXLINE:
    761                 raise LineTooLong("header line")
    762             if not line:
    763                 # for sites which EOF without sending trailer
    764                 break
    765             if line == '\r\n':
    766                 break
    767 
    768 
    769     def connect(self):
    770         """Connect to the host and port specified in __init__."""
    771         self.sock = socket.create_connection((self.host,self.port),
    772                                              self.timeout, self.source_address)
    773 
    774         if self._tunnel_host:
    775             self._tunnel()
    776 
    777     def close(self):
    778         """Close the connection to the HTTP server."""
    779         if self.sock:
    780             self.sock.close()   # close it manually... there may be other refs
    781             self.sock = None
    782         if self.__response:
    783             self.__response.close()
    784             self.__response = None
    785         self.__state = _CS_IDLE
    786 
    787     def send(self, data):
    788         """Send `data' to the server."""
    789         if self.sock is None:
    790             if self.auto_open:
    791                 self.connect()
    792             else:
    793                 raise NotConnected()
    794 
    795         if self.debuglevel > 0:
    796             print "send:", repr(data)
    797         blocksize = 8192
    798         if hasattr(data,'read') and not isinstance(data, array):
    799             if self.debuglevel > 0: print "sendIng a read()able"
    800             datablock = data.read(blocksize)
    801             while datablock:
    802                 self.sock.sendall(datablock)
    803                 datablock = data.read(blocksize)
    804         else:
    805             self.sock.sendall(data)
    806 
    807     def _output(self, s):
    808         """Add a line of output to the current request buffer.
    809 
    810         Assumes that the line does *not* end with \\r\\n.
    811         """
    812         self._buffer.append(s)
    813 
    814     def _send_output(self, message_body=None):
    815         """Send the currently buffered request and clear the buffer.
    816 
    817         Appends an extra \\r\\n to the buffer.
    818         A message_body may be specified, to be appended to the request.
    819         """
    820         self._buffer.extend(("", ""))
    821         msg = "\r\n".join(self._buffer)
    822         del self._buffer[:]
    823         # If msg and message_body are sent in a single send() call,
    824         # it will avoid performance problems caused by the interaction
    825         # between delayed ack and the Nagle algorithm.
    826         if isinstance(message_body, str):
    827             msg += message_body
    828             message_body = None
    829         self.send(msg)
    830         if message_body is not None:
    831             #message_body was not a string (i.e. it is a file) and
    832             #we must run the risk of Nagle
    833             self.send(message_body)
    834 
    835     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
    836         """Send a request to the server.
    837 
    838         `method' specifies an HTTP request method, e.g. 'GET'.
    839         `url' specifies the object being requested, e.g. '/index.html'.
    840         `skip_host' if True does not add automatically a 'Host:' header
    841         `skip_accept_encoding' if True does not add automatically an
    842            'Accept-Encoding:' header
    843         """
    844 
    845         # if a prior response has been completed, then forget about it.
    846         if self.__response and self.__response.isclosed():
    847             self.__response = None
    848 
    849 
    850         # in certain cases, we cannot issue another request on this connection.
    851         # this occurs when:
    852         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
    853         #   2) a response to a previous request has signalled that it is going
    854         #      to close the connection upon completion.
    855         #   3) the headers for the previous response have not been read, thus
    856         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
    857         #
    858         # if there is no prior response, then we can request at will.
    859         #
    860         # if point (2) is true, then we will have passed the socket to the
    861         # response (effectively meaning, "there is no prior response"), and
    862         # will open a new one when a new request is made.
    863         #
    864         # Note: if a prior response exists, then we *can* start a new request.
    865         #       We are not allowed to begin fetching the response to this new
    866         #       request, however, until that prior response is complete.
    867         #
    868         if self.__state == _CS_IDLE:
    869             self.__state = _CS_REQ_STARTED
    870         else:
    871             raise CannotSendRequest()
    872 
    873         # Save the method we use, we need it later in the response phase
    874         self._method = method
    875         if not url:
    876             url = '/'
    877         hdr = '%s %s %s' % (method, url, self._http_vsn_str)
    878 
    879         self._output(hdr)
    880 
    881         if self._http_vsn == 11:
    882             # Issue some standard headers for better HTTP/1.1 compliance
    883 
    884             if not skip_host:
    885                 # this header is issued *only* for HTTP/1.1
    886                 # connections. more specifically, this means it is
    887                 # only issued when the client uses the new
    888                 # HTTPConnection() class. backwards-compat clients
    889                 # will be using HTTP/1.0 and those clients may be
    890                 # issuing this header themselves. we should NOT issue
    891                 # it twice; some web servers (such as Apache) barf
    892                 # when they see two Host: headers
    893 
    894                 # If we need a non-standard port,include it in the
    895                 # header.  If the request is going through a proxy,
    896                 # but the host of the actual URL, not the host of the
    897                 # proxy.
    898 
    899                 netloc = ''
    900                 if url.startswith('http'):
    901                     nil, netloc, nil, nil, nil = urlsplit(url)
    902 
    903                 if netloc:
    904                     try:
    905                         netloc_enc = netloc.encode("ascii")
    906                     except UnicodeEncodeError:
    907                         netloc_enc = netloc.encode("idna")
    908                     self.putheader('Host', netloc_enc)
    909                 else:
    910                     try:
    911                         host_enc = self.host.encode("ascii")
    912                     except UnicodeEncodeError:
    913                         host_enc = self.host.encode("idna")
    914                     # Wrap the IPv6 Host Header with [] (RFC 2732)
    915                     if host_enc.find(':') >= 0:
    916                         host_enc = "[" + host_enc + "]"
    917                     if self.port == self.default_port:
    918                         self.putheader('Host', host_enc)
    919                     else:
    920                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
    921 
    922             # note: we are assuming that clients will not attempt to set these
    923             #       headers since *this* library must deal with the
    924             #       consequences. this also means that when the supporting
    925             #       libraries are updated to recognize other forms, then this
    926             #       code should be changed (removed or updated).
    927 
    928             # we only want a Content-Encoding of "identity" since we don't
    929             # support encodings such as x-gzip or x-deflate.
    930             if not skip_accept_encoding:
    931                 self.putheader('Accept-Encoding', 'identity')
    932 
    933             # we can accept "chunked" Transfer-Encodings, but no others
    934             # NOTE: no TE header implies *only* "chunked"
    935             #self.putheader('TE', 'chunked')
    936 
    937             # if TE is supplied in the header, then it must appear in a
    938             # Connection header.
    939             #self.putheader('Connection', 'TE')
    940 
    941         else:
    942             # For HTTP/1.0, the server will assume "not chunked"
    943             pass
    944 
    945     def putheader(self, header, *values):
    946         """Send a request header line to the server.
    947 
    948         For example: h.putheader('Accept', 'text/html')
    949         """
    950         if self.__state != _CS_REQ_STARTED:
    951             raise CannotSendHeader()
    952 
    953         hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
    954         self._output(hdr)
    955 
    956     def endheaders(self, message_body=None):
    957         """Indicate that the last header line has been sent to the server.
    958 
    959         This method sends the request to the server.  The optional
    960         message_body argument can be used to pass a message body
    961         associated with the request.  The message body will be sent in
    962         the same packet as the message headers if it is string, otherwise it is
    963         sent as a separate packet.
    964         """
    965         if self.__state == _CS_REQ_STARTED:
    966             self.__state = _CS_REQ_SENT
    967         else:
    968             raise CannotSendHeader()
    969         self._send_output(message_body)
    970 
    971     def request(self, method, url, body=None, headers={}):
    972         """Send a complete request to the server."""
    973         self._send_request(method, url, body, headers)
    974 
    975     def _set_content_length(self, body):
    976         # Set the content-length based on the body.
    977         thelen = None
    978         try:
    979             thelen = str(len(body))
    980         except TypeError, te:
    981             # If this is a file-like object, try to
    982             # fstat its file descriptor
    983             try:
    984                 thelen = str(os.fstat(body.fileno()).st_size)
    985             except (AttributeError, OSError):
    986                 # Don't send a length if this failed
    987                 if self.debuglevel > 0: print "Cannot stat!!"
    988 
    989         if thelen is not None:
    990             self.putheader('Content-Length', thelen)
    991 
    992     def _send_request(self, method, url, body, headers):
    993         # Honor explicitly requested Host: and Accept-Encoding: headers.
    994         header_names = dict.fromkeys([k.lower() for k in headers])
    995         skips = {}
    996         if 'host' in header_names:
    997             skips['skip_host'] = 1
    998         if 'accept-encoding' in header_names:
    999             skips['skip_accept_encoding'] = 1
   1000 
   1001         self.putrequest(method, url, **skips)
   1002 
   1003         if body is not None and 'content-length' not in header_names:
   1004             self._set_content_length(body)
   1005         for hdr, value in headers.iteritems():
   1006             self.putheader(hdr, value)
   1007         self.endheaders(body)
   1008 
   1009     def getresponse(self, buffering=False):
   1010         "Get the response from the server."
   1011 
   1012         # if a prior response has been completed, then forget about it.
   1013         if self.__response and self.__response.isclosed():
   1014             self.__response = None
   1015 
   1016         #
   1017         # if a prior response exists, then it must be completed (otherwise, we
   1018         # cannot read this response's header to determine the connection-close
   1019         # behavior)
   1020         #
   1021         # note: if a prior response existed, but was connection-close, then the
   1022         # socket and response were made independent of this HTTPConnection
   1023         # object since a new request requires that we open a whole new
   1024         # connection
   1025         #
   1026         # this means the prior response had one of two states:
   1027         #   1) will_close: this connection was reset and the prior socket and
   1028         #                  response operate independently
   1029         #   2) persistent: the response was retained and we await its
   1030         #                  isclosed() status to become true.
   1031         #
   1032         if self.__state != _CS_REQ_SENT or self.__response:
   1033             raise ResponseNotReady()
   1034 
   1035         args = (self.sock,)
   1036         kwds = {"strict":self.strict, "method":self._method}
   1037         if self.debuglevel > 0:
   1038             args += (self.debuglevel,)
   1039         if buffering:
   1040             #only add this keyword if non-default, for compatibility with
   1041             #other response_classes.
   1042             kwds["buffering"] = True;
   1043         response = self.response_class(*args, **kwds)
   1044 
   1045         response.begin()
   1046         assert response.will_close != _UNKNOWN
   1047         self.__state = _CS_IDLE
   1048 
   1049         if response.will_close:
   1050             # this effectively passes the connection to the response
   1051             self.close()
   1052         else:
   1053             # remember this, so we can tell when it is complete
   1054             self.__response = response
   1055 
   1056         return response
   1057 
   1058 
   1059 class HTTP:
   1060     "Compatibility class with httplib.py from 1.5."
   1061 
   1062     _http_vsn = 10
   1063     _http_vsn_str = 'HTTP/1.0'
   1064 
   1065     debuglevel = 0
   1066 
   1067     _connection_class = HTTPConnection
   1068 
   1069     def __init__(self, host='', port=None, strict=None):
   1070         "Provide a default host, since the superclass requires one."
   1071 
   1072         # some joker passed 0 explicitly, meaning default port
   1073         if port == 0:
   1074             port = None
   1075 
   1076         # Note that we may pass an empty string as the host; this will raise
   1077         # an error when we attempt to connect. Presumably, the client code
   1078         # will call connect before then, with a proper host.
   1079         self._setup(self._connection_class(host, port, strict))
   1080 
   1081     def _setup(self, conn):
   1082         self._conn = conn
   1083 
   1084         # set up delegation to flesh out interface
   1085         self.send = conn.send
   1086         self.putrequest = conn.putrequest
   1087         self.putheader = conn.putheader
   1088         self.endheaders = conn.endheaders
   1089         self.set_debuglevel = conn.set_debuglevel
   1090 
   1091         conn._http_vsn = self._http_vsn
   1092         conn._http_vsn_str = self._http_vsn_str
   1093 
   1094         self.file = None
   1095 
   1096     def connect(self, host=None, port=None):
   1097         "Accept arguments to set the host/port, since the superclass doesn't."
   1098 
   1099         if host is not None:
   1100             self._conn._set_hostport(host, port)
   1101         self._conn.connect()
   1102 
   1103     def getfile(self):
   1104         "Provide a getfile, since the superclass' does not use this concept."
   1105         return self.file
   1106 
   1107     def getreply(self, buffering=False):
   1108         """Compat definition since superclass does not define it.
   1109 
   1110         Returns a tuple consisting of:
   1111         - server status code (e.g. '200' if all goes well)
   1112         - server "reason" corresponding to status code
   1113         - any RFC822 headers in the response from the server
   1114         """
   1115         try:
   1116             if not buffering:
   1117                 response = self._conn.getresponse()
   1118             else:
   1119                 #only add this keyword if non-default for compatibility
   1120                 #with other connection classes
   1121                 response = self._conn.getresponse(buffering)
   1122         except BadStatusLine, e:
   1123             ### hmm. if getresponse() ever closes the socket on a bad request,
   1124             ### then we are going to have problems with self.sock
   1125 
   1126             ### should we keep this behavior? do people use it?
   1127             # keep the socket open (as a file), and return it
   1128             self.file = self._conn.sock.makefile('rb', 0)
   1129 
   1130             # close our socket -- we want to restart after any protocol error
   1131             self.close()
   1132 
   1133             self.headers = None
   1134             return -1, e.line, None
   1135 
   1136         self.headers = response.msg
   1137         self.file = response.fp
   1138         return response.status, response.reason, response.msg
   1139 
   1140     def close(self):
   1141         self._conn.close()
   1142 
   1143         # note that self.file == response.fp, which gets closed by the
   1144         # superclass. just clear the object ref here.
   1145         ### hmm. messy. if status==-1, then self.file is owned by us.
   1146         ### well... we aren't explicitly closing, but losing this ref will
   1147         ### do it
   1148         self.file = None
   1149 
   1150 try:
   1151     import ssl
   1152 except ImportError:
   1153     pass
   1154 else:
   1155     class HTTPSConnection(HTTPConnection):
   1156         "This class allows communication via SSL."
   1157 
   1158         default_port = HTTPS_PORT
   1159 
   1160         def __init__(self, host, port=None, key_file=None, cert_file=None,
   1161                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
   1162                      source_address=None):
   1163             HTTPConnection.__init__(self, host, port, strict, timeout,
   1164                                     source_address)
   1165             self.key_file = key_file
   1166             self.cert_file = cert_file
   1167 
   1168         def connect(self):
   1169             "Connect to a host on a given (SSL) port."
   1170 
   1171             sock = socket.create_connection((self.host, self.port),
   1172                                             self.timeout, self.source_address)
   1173             if self._tunnel_host:
   1174                 self.sock = sock
   1175                 self._tunnel()
   1176             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
   1177 
   1178     __all__.append("HTTPSConnection")
   1179 
   1180     class HTTPS(HTTP):
   1181         """Compatibility with 1.5 httplib interface
   1182 
   1183         Python 1.5.2 did not have an HTTPS class, but it defined an
   1184         interface for sending http requests that is also useful for
   1185         https.
   1186         """
   1187 
   1188         _connection_class = HTTPSConnection
   1189 
   1190         def __init__(self, host='', port=None, key_file=None, cert_file=None,
   1191                      strict=None):
   1192             # provide a default host, pass the X509 cert info
   1193 
   1194             # urf. compensate for bad input.
   1195             if port == 0:
   1196                 port = None
   1197             self._setup(self._connection_class(host, port, key_file,
   1198                                                cert_file, strict))
   1199 
   1200             # we never actually use these for anything, but we keep them
   1201             # here for compatibility with post-1.5.2 CVS.
   1202             self.key_file = key_file
   1203             self.cert_file = cert_file
   1204 
   1205 
   1206     def FakeSocket (sock, sslobj):
   1207         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
   1208                       "Use the result of ssl.wrap_socket() directly instead.",
   1209                       DeprecationWarning, stacklevel=2)
   1210         return sslobj
   1211 
   1212 
   1213 class HTTPException(Exception):
   1214     # Subclasses that define an __init__ must call Exception.__init__
   1215     # or define self.args.  Otherwise, str() will fail.
   1216     pass
   1217 
   1218 class NotConnected(HTTPException):
   1219     pass
   1220 
   1221 class InvalidURL(HTTPException):
   1222     pass
   1223 
   1224 class UnknownProtocol(HTTPException):
   1225     def __init__(self, version):
   1226         self.args = version,
   1227         self.version = version
   1228 
   1229 class UnknownTransferEncoding(HTTPException):
   1230     pass
   1231 
   1232 class UnimplementedFileMode(HTTPException):
   1233     pass
   1234 
   1235 class IncompleteRead(HTTPException):
   1236     def __init__(self, partial, expected=None):
   1237         self.args = partial,
   1238         self.partial = partial
   1239         self.expected = expected
   1240     def __repr__(self):
   1241         if self.expected is not None:
   1242             e = ', %i more expected' % self.expected
   1243         else:
   1244             e = ''
   1245         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
   1246     def __str__(self):
   1247         return repr(self)
   1248 
   1249 class ImproperConnectionState(HTTPException):
   1250     pass
   1251 
   1252 class CannotSendRequest(ImproperConnectionState):
   1253     pass
   1254 
   1255 class CannotSendHeader(ImproperConnectionState):
   1256     pass
   1257 
   1258 class ResponseNotReady(ImproperConnectionState):
   1259     pass
   1260 
   1261 class BadStatusLine(HTTPException):
   1262     def __init__(self, line):
   1263         if not line:
   1264             line = repr(line)
   1265         self.args = line,
   1266         self.line = line
   1267 
   1268 class LineTooLong(HTTPException):
   1269     def __init__(self, line_type):
   1270         HTTPException.__init__(self, "got more than %d bytes when reading %s"
   1271                                      % (_MAXLINE, line_type))
   1272 
   1273 # for backwards compatibility
   1274 error = HTTPException
   1275 
   1276 class LineAndFileWrapper:
   1277     """A limited file-like object for HTTP/0.9 responses."""
   1278 
   1279     # The status-line parsing code calls readline(), which normally
   1280     # get the HTTP status line.  For a 0.9 response, however, this is
   1281     # actually the first line of the body!  Clients need to get a
   1282     # readable file object that contains that line.
   1283 
   1284     def __init__(self, line, file):
   1285         self._line = line
   1286         self._file = file
   1287         self._line_consumed = 0
   1288         self._line_offset = 0
   1289         self._line_left = len(line)
   1290 
   1291     def __getattr__(self, attr):
   1292         return getattr(self._file, attr)
   1293 
   1294     def _done(self):
   1295         # called when the last byte is read from the line.  After the
   1296         # call, all read methods are delegated to the underlying file
   1297         # object.
   1298         self._line_consumed = 1
   1299         self.read = self._file.read
   1300         self.readline = self._file.readline
   1301         self.readlines = self._file.readlines
   1302 
   1303     def read(self, amt=None):
   1304         if self._line_consumed:
   1305             return self._file.read(amt)
   1306         assert self._line_left
   1307         if amt is None or amt > self._line_left:
   1308             s = self._line[self._line_offset:]
   1309             self._done()
   1310             if amt is None:
   1311                 return s + self._file.read()
   1312             else:
   1313                 return s + self._file.read(amt - len(s))
   1314         else:
   1315             assert amt <= self._line_left
   1316             i = self._line_offset
   1317             j = i + amt
   1318             s = self._line[i:j]
   1319             self._line_offset = j
   1320             self._line_left -= amt
   1321             if self._line_left == 0:
   1322                 self._done()
   1323             return s
   1324 
   1325     def readline(self):
   1326         if self._line_consumed:
   1327             return self._file.readline()
   1328         assert self._line_left
   1329         s = self._line[self._line_offset:]
   1330         self._done()
   1331         return s
   1332 
   1333     def readlines(self, size=None):
   1334         if self._line_consumed:
   1335             return self._file.readlines(size)
   1336         assert self._line_left
   1337         L = [self._line[self._line_offset:]]
   1338         self._done()
   1339         if size is None:
   1340             return L + self._file.readlines()
   1341         else:
   1342             return L + self._file.readlines(size)
   1343