Home | History | Annotate | Download | only in Lib
      1 """HTTP/1.1 client library
      2 
      3 <intro stuff goes here>
      4 <other stuff, too>
      5 
      6 HTTPConnection goes through a number of "states", which define when a client
      7 may legally make another request or fetch the response for a particular
      8 request. This diagram details these state transitions:
      9 
     10     (null)
     11       |
     12       | HTTPConnection()
     13       v
     14     Idle
     15       |
     16       | putrequest()
     17       v
     18     Request-started
     19       |
     20       | ( putheader() )*  endheaders()
     21       v
     22     Request-sent
     23       |
     24       | response = getresponse()
     25       v
     26     Unread-response   [Response-headers-read]
     27       |\____________________
     28       |                     |
     29       | response.read()     | putrequest()
     30       v                     v
     31     Idle                  Req-started-unread-response
     32                      ______/|
     33                    /        |
     34    response.read() |        | ( putheader() )*  endheaders()
     35                    v        v
     36        Request-started    Req-sent-unread-response
     37                             |
     38                             | response.read()
     39                             v
     40                           Request-sent
     41 
     42 This diagram presents the following rules:
     43   -- a second request may not be started until {response-headers-read}
     44   -- a response [object] cannot be retrieved until {request-sent}
     45   -- there is no differentiation between an unread response body and a
     46      partially read response body
     47 
     48 Note: this enforcement is applied by the HTTPConnection class. The
     49       HTTPResponse class does not enforce this state machine, which
     50       implies sophisticated clients may accelerate the request/response
     51       pipeline. Caution should be taken, though: accelerating the states
     52       beyond the above pattern may imply knowledge of the server's
     53       connection-close behavior for certain requests. For example, it
     54       is impossible to tell whether the server will close the connection
     55       UNTIL the response headers have been read; this means that further
     56       requests cannot be placed into the pipeline until it is known that
     57       the server will NOT be closing the connection.
     58 
     59 Logical State                  __state            __response
     60 -------------                  -------            ----------
     61 Idle                           _CS_IDLE           None
     62 Request-started                _CS_REQ_STARTED    None
     63 Request-sent                   _CS_REQ_SENT       None
     64 Unread-response                _CS_IDLE           <response_class>
     65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
     66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
     67 """
     68 
     69 from array import array
     70 import os
     71 import socket
     72 from sys import py3kwarning
     73 from urlparse import urlsplit
     74 import warnings
     75 with warnings.catch_warnings():
     76     if py3kwarning:
     77         warnings.filterwarnings("ignore", ".*mimetools has been removed",
     78                                 DeprecationWarning)
     79     import mimetools
     80 
     81 try:
     82     from cStringIO import StringIO
     83 except ImportError:
     84     from StringIO import StringIO
     85 
     86 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
     87            "HTTPException", "NotConnected", "UnknownProtocol",
     88            "UnknownTransferEncoding", "UnimplementedFileMode",
     89            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
     90            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
     91            "BadStatusLine", "error", "responses"]
     92 
     93 HTTP_PORT = 80
     94 HTTPS_PORT = 443
     95 
     96 _UNKNOWN = 'UNKNOWN'
     97 
     98 # connection states

     99 _CS_IDLE = 'Idle'
    100 _CS_REQ_STARTED = 'Request-started'
    101 _CS_REQ_SENT = 'Request-sent'
    102 
    103 # status codes

    104 # informational

    105 CONTINUE = 100
    106 SWITCHING_PROTOCOLS = 101
    107 PROCESSING = 102
    108 
    109 # successful

    110 OK = 200
    111 CREATED = 201
    112 ACCEPTED = 202
    113 NON_AUTHORITATIVE_INFORMATION = 203
    114 NO_CONTENT = 204
    115 RESET_CONTENT = 205
    116 PARTIAL_CONTENT = 206
    117 MULTI_STATUS = 207
    118 IM_USED = 226
    119 
    120 # redirection

    121 MULTIPLE_CHOICES = 300
    122 MOVED_PERMANENTLY = 301
    123 FOUND = 302
    124 SEE_OTHER = 303
    125 NOT_MODIFIED = 304
    126 USE_PROXY = 305
    127 TEMPORARY_REDIRECT = 307
    128 
    129 # client error

    130 BAD_REQUEST = 400
    131 UNAUTHORIZED = 401
    132 PAYMENT_REQUIRED = 402
    133 FORBIDDEN = 403
    134 NOT_FOUND = 404
    135 METHOD_NOT_ALLOWED = 405
    136 NOT_ACCEPTABLE = 406
    137 PROXY_AUTHENTICATION_REQUIRED = 407
    138 REQUEST_TIMEOUT = 408
    139 CONFLICT = 409
    140 GONE = 410
    141 LENGTH_REQUIRED = 411
    142 PRECONDITION_FAILED = 412
    143 REQUEST_ENTITY_TOO_LARGE = 413
    144 REQUEST_URI_TOO_LONG = 414
    145 UNSUPPORTED_MEDIA_TYPE = 415
    146 REQUESTED_RANGE_NOT_SATISFIABLE = 416
    147 EXPECTATION_FAILED = 417
    148 UNPROCESSABLE_ENTITY = 422
    149 LOCKED = 423
    150 FAILED_DEPENDENCY = 424
    151 UPGRADE_REQUIRED = 426
    152 
    153 # server error

    154 INTERNAL_SERVER_ERROR = 500
    155 NOT_IMPLEMENTED = 501
    156 BAD_GATEWAY = 502
    157 SERVICE_UNAVAILABLE = 503
    158 GATEWAY_TIMEOUT = 504
    159 HTTP_VERSION_NOT_SUPPORTED = 505
    160 INSUFFICIENT_STORAGE = 507
    161 NOT_EXTENDED = 510
    162 
    163 # Mapping status codes to official W3C names

    164 responses = {
    165     100: 'Continue',
    166     101: 'Switching Protocols',
    167 
    168     200: 'OK',
    169     201: 'Created',
    170     202: 'Accepted',
    171     203: 'Non-Authoritative Information',
    172     204: 'No Content',
    173     205: 'Reset Content',
    174     206: 'Partial Content',
    175 
    176     300: 'Multiple Choices',
    177     301: 'Moved Permanently',
    178     302: 'Found',
    179     303: 'See Other',
    180     304: 'Not Modified',
    181     305: 'Use Proxy',
    182     306: '(Unused)',
    183     307: 'Temporary Redirect',
    184 
    185     400: 'Bad Request',
    186     401: 'Unauthorized',
    187     402: 'Payment Required',
    188     403: 'Forbidden',
    189     404: 'Not Found',
    190     405: 'Method Not Allowed',
    191     406: 'Not Acceptable',
    192     407: 'Proxy Authentication Required',
    193     408: 'Request Timeout',
    194     409: 'Conflict',
    195     410: 'Gone',
    196     411: 'Length Required',
    197     412: 'Precondition Failed',
    198     413: 'Request Entity Too Large',
    199     414: 'Request-URI Too Long',
    200     415: 'Unsupported Media Type',
    201     416: 'Requested Range Not Satisfiable',
    202     417: 'Expectation Failed',
    203 
    204     500: 'Internal Server Error',
    205     501: 'Not Implemented',
    206     502: 'Bad Gateway',
    207     503: 'Service Unavailable',
    208     504: 'Gateway Timeout',
    209     505: 'HTTP Version Not Supported',
    210 }
    211 
    212 # maximal amount of data to read at one time in _safe_read

    213 MAXAMOUNT = 1048576
    214 
    215 # maximal line length when calling readline().

    216 _MAXLINE = 65536
    217 
    218 class HTTPMessage(mimetools.Message):
    219 
    220     def addheader(self, key, value):
    221         """Add header for field key handling repeats."""
    222         prev = self.dict.get(key)
    223         if prev is None:
    224             self.dict[key] = value
    225         else:
    226             combined = ", ".join((prev, value))
    227             self.dict[key] = combined
    228 
    229     def addcontinue(self, key, more):
    230         """Add more field data from a continuation line."""
    231         prev = self.dict[key]
    232         self.dict[key] = prev + "\n " + more
    233 
    234     def readheaders(self):
    235         """Read header lines.
    236 
    237         Read header lines up to the entirely blank line that terminates them.
    238         The (normally blank) line that ends the headers is skipped, but not
    239         included in the returned list.  If a non-header line ends the headers,
    240         (which is an error), an attempt is made to backspace over it; it is
    241         never included in the returned list.
    242 
    243         The variable self.status is set to the empty string if all went well,
    244         otherwise it is an error message.  The variable self.headers is a
    245         completely uninterpreted list of lines contained in the header (so
    246         printing them will reproduce the header exactly as it appears in the
    247         file).
    248 
    249         If multiple header fields with the same name occur, they are combined
    250         according to the rules in RFC 2616 sec 4.2:
    251 
    252         Appending each subsequent field-value to the first, each separated
    253         by a comma. The order in which header fields with the same field-name
    254         are received is significant to the interpretation of the combined
    255         field value.
    256         """
    257         # XXX The implementation overrides the readheaders() method of

    258         # rfc822.Message.  The base class design isn't amenable to

    259         # customized behavior here so the method here is a copy of the

    260         # base class code with a few small changes.

    261 
    262         self.dict = {}
    263         self.unixfrom = ''
    264         self.headers = hlist = []
    265         self.status = ''
    266         headerseen = ""
    267         firstline = 1
    268         startofline = unread = tell = None
    269         if hasattr(self.fp, 'unread'):
    270             unread = self.fp.unread
    271         elif self.seekable:
    272             tell = self.fp.tell
    273         while True:
    274             if tell:
    275                 try:
    276                     startofline = tell()
    277                 except IOError:
    278                     startofline = tell = None
    279                     self.seekable = 0
    280             line = self.fp.readline(_MAXLINE + 1)
    281             if len(line) > _MAXLINE:
    282                 raise LineTooLong("header line")
    283             if not line:
    284                 self.status = 'EOF in headers'
    285                 break
    286             # Skip unix From name time lines

    287             if firstline and line.startswith('From '):
    288                 self.unixfrom = self.unixfrom + line
    289                 continue
    290             firstline = 0
    291             if headerseen and line[0] in ' \t':
    292                 # XXX Not sure if continuation lines are handled properly

    293                 # for http and/or for repeating headers

    294                 # It's a continuation line.

    295                 hlist.append(line)
    296                 self.addcontinue(headerseen, line.strip())
    297                 continue
    298             elif self.iscomment(line):
    299                 # It's a comment.  Ignore it.

    300                 continue
    301             elif self.islast(line):
    302                 # Note! No pushback here!  The delimiter line gets eaten.

    303                 break
    304             headerseen = self.isheader(line)
    305             if headerseen:
    306                 # It's a legal header line, save it.

    307                 hlist.append(line)
    308                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
    309                 continue
    310             else:
    311                 # It's not a header line; throw it back and stop here.

    312                 if not self.dict:
    313                     self.status = 'No headers'
    314                 else:
    315                     self.status = 'Non-header line where header expected'
    316                 # Try to undo the read.

    317                 if unread:
    318                     unread(line)
    319                 elif tell:
    320                     self.fp.seek(startofline)
    321                 else:
    322                     self.status = self.status + '; bad seek'
    323                 break
    324 
    325 class HTTPResponse:
    326 
    327     # strict: If true, raise BadStatusLine if the status line can't be

    328     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is

    329     # false because it prevents clients from talking to HTTP/0.9

    330     # servers.  Note that a response with a sufficiently corrupted

    331     # status line will look like an HTTP/0.9 response.

    332 
    333     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.

    334 
    335     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
    336         if buffering:
    337             # The caller won't be using any sock.recv() calls, so buffering

    338             # is fine and recommended for performance.

    339             self.fp = sock.makefile('rb')
    340         else:
    341             # The buffer size is specified as zero, because the headers of

    342             # the response are read with readline().  If the reads were

    343             # buffered the readline() calls could consume some of the

    344             # response, which make be read via a recv() on the underlying

    345             # socket.

    346             self.fp = sock.makefile('rb', 0)
    347         self.debuglevel = debuglevel
    348         self.strict = strict
    349         self._method = method
    350 
    351         self.msg = None
    352 
    353         # from the Status-Line of the response

    354         self.version = _UNKNOWN # HTTP-Version

    355         self.status = _UNKNOWN  # Status-Code

    356         self.reason = _UNKNOWN  # Reason-Phrase

    357 
    358         self.chunked = _UNKNOWN         # is "chunked" being used?

    359         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk

    360         self.length = _UNKNOWN          # number of bytes left in response

    361         self.will_close = _UNKNOWN      # conn will close at end of response

    362 
    363     def _read_status(self):
    364         # Initialize with Simple-Response defaults

    365         line = self.fp.readline()
    366         if self.debuglevel > 0:
    367             print "reply:", repr(line)
    368         if not line:
    369             # Presumably, the server closed the connection before

    370             # sending a valid response.

    371             raise BadStatusLine(line)
    372         try:
    373             [version, status, reason] = line.split(None, 2)
    374         except ValueError:
    375             try:
    376                 [version, status] = line.split(None, 1)
    377                 reason = ""
    378             except ValueError:
    379                 # empty version will cause next test to fail and status

    380                 # will be treated as 0.9 response.

    381                 version = ""
    382         if not version.startswith('HTTP/'):
    383             if self.strict:
    384                 self.close()
    385                 raise BadStatusLine(line)
    386             else:
    387                 # assume it's a Simple-Response from an 0.9 server

    388                 self.fp = LineAndFileWrapper(line, self.fp)
    389                 return "HTTP/0.9", 200, ""
    390 
    391         # The status code is a three-digit number

    392         try:
    393             status = int(status)
    394             if status < 100 or status > 999:
    395                 raise BadStatusLine(line)
    396         except ValueError:
    397             raise BadStatusLine(line)
    398         return version, status, reason
    399 
    400     def begin(self):
    401         if self.msg is not None:
    402             # we've already started reading the response

    403             return
    404 
    405         # read until we get a non-100 response

    406         while True:
    407             version, status, reason = self._read_status()
    408             if status != CONTINUE:
    409                 break
    410             # skip the header from the 100 response

    411             while True:
    412                 skip = self.fp.readline(_MAXLINE + 1)
    413                 if len(skip) > _MAXLINE:
    414                     raise LineTooLong("header line")
    415                 skip = skip.strip()
    416                 if not skip:
    417                     break
    418                 if self.debuglevel > 0:
    419                     print "header:", skip
    420 
    421         self.status = status
    422         self.reason = reason.strip()
    423         if version == 'HTTP/1.0':
    424             self.version = 10
    425         elif version.startswith('HTTP/1.'):
    426             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1

    427         elif version == 'HTTP/0.9':
    428             self.version = 9
    429         else:
    430             raise UnknownProtocol(version)
    431 
    432         if self.version == 9:
    433             self.length = None
    434             self.chunked = 0
    435             self.will_close = 1
    436             self.msg = HTTPMessage(StringIO())
    437             return
    438 
    439         self.msg = HTTPMessage(self.fp, 0)
    440         if self.debuglevel > 0:
    441             for hdr in self.msg.headers:
    442                 print "header:", hdr,
    443 
    444         # don't let the msg keep an fp

    445         self.msg.fp = None
    446 
    447         # are we using the chunked-style of transfer encoding?

    448         tr_enc = self.msg.getheader('transfer-encoding')
    449         if tr_enc and tr_enc.lower() == "chunked":
    450             self.chunked = 1
    451             self.chunk_left = None
    452         else:
    453             self.chunked = 0
    454 
    455         # will the connection close at the end of the response?

    456         self.will_close = self._check_close()
    457 
    458         # do we have a Content-Length?

    459         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"

    460         length = self.msg.getheader('content-length')
    461         if length and not self.chunked:
    462             try:
    463                 self.length = int(length)
    464             except ValueError:
    465                 self.length = None
    466             else:
    467                 if self.length < 0:  # ignore nonsensical negative lengths

    468                     self.length = None
    469         else:
    470             self.length = None
    471 
    472         # does the body have a fixed length? (of zero)

    473         if (status == NO_CONTENT or status == NOT_MODIFIED or
    474             100 <= status < 200 or      # 1xx codes

    475             self._method == 'HEAD'):
    476             self.length = 0
    477 
    478         # if the connection remains open, and we aren't using chunked, and

    479         # a content-length was not provided, then assume that the connection

    480         # WILL close.

    481         if not self.will_close and \
    482            not self.chunked and \
    483            self.length is None:
    484             self.will_close = 1
    485 
    486     def _check_close(self):
    487         conn = self.msg.getheader('connection')
    488         if self.version == 11:
    489             # An HTTP/1.1 proxy is assumed to stay open unless

    490             # explicitly closed.

    491             conn = self.msg.getheader('connection')
    492             if conn and "close" in conn.lower():
    493                 return True
    494             return False
    495 
    496         # Some HTTP/1.0 implementations have support for persistent

    497         # connections, using rules different than HTTP/1.1.

    498 
    499         # For older HTTP, Keep-Alive indicates persistent connection.

    500         if self.msg.getheader('keep-alive'):
    501             return False
    502 
    503         # At least Akamai returns a "Connection: Keep-Alive" header,

    504         # which was supposed to be sent by the client.

    505         if conn and "keep-alive" in conn.lower():
    506             return False
    507 
    508         # Proxy-Connection is a netscape hack.

    509         pconn = self.msg.getheader('proxy-connection')
    510         if pconn and "keep-alive" in pconn.lower():
    511             return False
    512 
    513         # otherwise, assume it will close

    514         return True
    515 
    516     def close(self):
    517         if self.fp:
    518             self.fp.close()
    519             self.fp = None
    520 
    521     def isclosed(self):
    522         # NOTE: it is possible that we will not ever call self.close(). This

    523         #       case occurs when will_close is TRUE, length is None, and we

    524         #       read up to the last byte, but NOT past it.

    525         #

    526         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be

    527         #          called, meaning self.isclosed() is meaningful.

    528         return self.fp is None
    529 
    530     # XXX It would be nice to have readline and __iter__ for this, too.

    531 
    532     def read(self, amt=None):
    533         if self.fp is None:
    534             return ''
    535 
    536         if self._method == 'HEAD':
    537             self.close()
    538             return ''
    539 
    540         if self.chunked:
    541             return self._read_chunked(amt)
    542 
    543         if amt is None:
    544             # unbounded read

    545             if self.length is None:
    546                 s = self.fp.read()
    547             else:
    548                 s = self._safe_read(self.length)
    549                 self.length = 0
    550             self.close()        # we read everything

    551             return s
    552 
    553         if self.length is not None:
    554             if amt > self.length:
    555                 # clip the read to the "end of response"

    556                 amt = self.length
    557 
    558         # we do not use _safe_read() here because this may be a .will_close

    559         # connection, and the user is reading more bytes than will be provided

    560         # (for example, reading in 1k chunks)

    561         s = self.fp.read(amt)
    562         if self.length is not None:
    563             self.length -= len(s)
    564             if not self.length:
    565                 self.close()
    566         return s
    567 
    568     def _read_chunked(self, amt):
    569         assert self.chunked != _UNKNOWN
    570         chunk_left = self.chunk_left
    571         value = []
    572         while True:
    573             if chunk_left is None:
    574                 line = self.fp.readline(_MAXLINE + 1)
    575                 if len(line) > _MAXLINE:
    576                     raise LineTooLong("chunk size")
    577                 i = line.find(';')
    578                 if i >= 0:
    579                     line = line[:i] # strip chunk-extensions

    580                 try:
    581                     chunk_left = int(line, 16)
    582                 except ValueError:
    583                     # close the connection as protocol synchronisation is

    584                     # probably lost

    585                     self.close()
    586                     raise IncompleteRead(''.join(value))
    587                 if chunk_left == 0:
    588                     break
    589             if amt is None:
    590                 value.append(self._safe_read(chunk_left))
    591             elif amt < chunk_left:
    592                 value.append(self._safe_read(amt))
    593                 self.chunk_left = chunk_left - amt
    594                 return ''.join(value)
    595             elif amt == chunk_left:
    596                 value.append(self._safe_read(amt))
    597                 self._safe_read(2)  # toss the CRLF at the end of the chunk

    598                 self.chunk_left = None
    599                 return ''.join(value)
    600             else:
    601                 value.append(self._safe_read(chunk_left))
    602                 amt -= chunk_left
    603 
    604             # we read the whole chunk, get another

    605             self._safe_read(2)      # toss the CRLF at the end of the chunk

    606             chunk_left = None
    607 
    608         # read and discard trailer up to the CRLF terminator

    609         ### note: we shouldn't have any trailers!

    610         while True:
    611             line = self.fp.readline(_MAXLINE + 1)
    612             if len(line) > _MAXLINE:
    613                 raise LineTooLong("trailer line")
    614             if not line:
    615                 # a vanishingly small number of sites EOF without

    616                 # sending the trailer

    617                 break
    618             if line == '\r\n':
    619                 break
    620 
    621         # we read everything; close the "file"

    622         self.close()
    623 
    624         return ''.join(value)
    625 
    626     def _safe_read(self, amt):
    627         """Read the number of bytes requested, compensating for partial reads.
    628 
    629         Normally, we have a blocking socket, but a read() can be interrupted
    630         by a signal (resulting in a partial read).
    631 
    632         Note that we cannot distinguish between EOF and an interrupt when zero
    633         bytes have been read. IncompleteRead() will be raised in this
    634         situation.
    635 
    636         This function should be used when <amt> bytes "should" be present for
    637         reading. If the bytes are truly not available (due to EOF), then the
    638         IncompleteRead exception can be used to detect the problem.
    639         """
    640         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never

    641         # return less than x bytes unless EOF is encountered.  It now handles

    642         # signal interruptions (socket.error EINTR) internally.  This code

    643         # never caught that exception anyways.  It seems largely pointless.

    644         # self.fp.read(amt) will work fine.

    645         s = []
    646         while amt > 0:
    647             chunk = self.fp.read(min(amt, MAXAMOUNT))
    648             if not chunk:
    649                 raise IncompleteRead(''.join(s), amt)
    650             s.append(chunk)
    651             amt -= len(chunk)
    652         return ''.join(s)
    653 
    654     def fileno(self):
    655         return self.fp.fileno()
    656 
    657     def getheader(self, name, default=None):
    658         if self.msg is None:
    659             raise ResponseNotReady()
    660         return self.msg.getheader(name, default)
    661 
    662     def getheaders(self):
    663         """Return list of (header, value) tuples."""
    664         if self.msg is None:
    665             raise ResponseNotReady()
    666         return self.msg.items()
    667 
    668 
    669 class HTTPConnection:
    670 
    671     _http_vsn = 11
    672     _http_vsn_str = 'HTTP/1.1'
    673 
    674     response_class = HTTPResponse
    675     default_port = HTTP_PORT
    676     auto_open = 1
    677     debuglevel = 0
    678     strict = 0
    679 
    680     def __init__(self, host, port=None, strict=None,
    681                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
    682         self.timeout = timeout
    683         self.source_address = source_address
    684         self.sock = None
    685         self._buffer = []
    686         self.__response = None
    687         self.__state = _CS_IDLE
    688         self._method = None
    689         self._tunnel_host = None
    690         self._tunnel_port = None
    691         self._tunnel_headers = {}
    692 
    693         self._set_hostport(host, port)
    694         if strict is not None:
    695             self.strict = strict
    696 
    697     def set_tunnel(self, host, port=None, headers=None):
    698         """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
    699 
    700         The headers argument should be a mapping of extra HTTP headers
    701         to send with the CONNECT request.
    702         """
    703         self._tunnel_host = host
    704         self._tunnel_port = port
    705         if headers:
    706             self._tunnel_headers = headers
    707         else:
    708             self._tunnel_headers.clear()
    709 
    710     def _set_hostport(self, host, port):
    711         if port is None:
    712             i = host.rfind(':')
    713             j = host.rfind(']')         # ipv6 addresses have [...]

    714             if i > j:
    715                 try:
    716                     port = int(host[i+1:])
    717                 except ValueError:
    718                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
    719                 host = host[:i]
    720             else:
    721                 port = self.default_port
    722             if host and host[0] == '[' and host[-1] == ']':
    723                 host = host[1:-1]
    724         self.host = host
    725         self.port = port
    726 
    727     def set_debuglevel(self, level):
    728         self.debuglevel = level
    729 
    730     def _tunnel(self):
    731         self._set_hostport(self._tunnel_host, self._tunnel_port)
    732         self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
    733         for header, value in self._tunnel_headers.iteritems():
    734             self.send("%s: %s\r\n" % (header, value))
    735         self.send("\r\n")
    736         response = self.response_class(self.sock, strict = self.strict,
    737                                        method = self._method)
    738         (version, code, message) = response._read_status()
    739 
    740         if code != 200:
    741             self.close()
    742             raise socket.error("Tunnel connection failed: %d %s" % (code,
    743                                                                     message.strip()))
    744         while True:
    745             line = response.fp.readline(_MAXLINE + 1)
    746             if len(line) > _MAXLINE:
    747                 raise LineTooLong("header line")
    748             if line == '\r\n': break
    749 
    750 
    751     def connect(self):
    752         """Connect to the host and port specified in __init__."""
    753         self.sock = socket.create_connection((self.host,self.port),
    754                                              self.timeout, self.source_address)
    755 
    756         if self._tunnel_host:
    757             self._tunnel()
    758 
    759     def close(self):
    760         """Close the connection to the HTTP server."""
    761         if self.sock:
    762             self.sock.close()   # close it manually... there may be other refs

    763             self.sock = None
    764         if self.__response:
    765             self.__response.close()
    766             self.__response = None
    767         self.__state = _CS_IDLE
    768 
    769     def send(self, data):
    770         """Send `data' to the server."""
    771         if self.sock is None:
    772             if self.auto_open:
    773                 self.connect()
    774             else:
    775                 raise NotConnected()
    776 
    777         if self.debuglevel > 0:
    778             print "send:", repr(data)
    779         blocksize = 8192
    780         if hasattr(data,'read') and not isinstance(data, array):
    781             if self.debuglevel > 0: print "sendIng a read()able"
    782             datablock = data.read(blocksize)
    783             while datablock:
    784                 self.sock.sendall(datablock)
    785                 datablock = data.read(blocksize)
    786         else:
    787             self.sock.sendall(data)
    788 
    789     def _output(self, s):
    790         """Add a line of output to the current request buffer.
    791 
    792         Assumes that the line does *not* end with \\r\\n.
    793         """
    794         self._buffer.append(s)
    795 
    796     def _send_output(self, message_body=None):
    797         """Send the currently buffered request and clear the buffer.
    798 
    799         Appends an extra \\r\\n to the buffer.
    800         A message_body may be specified, to be appended to the request.
    801         """
    802         self._buffer.extend(("", ""))
    803         msg = "\r\n".join(self._buffer)
    804         del self._buffer[:]
    805         # If msg and message_body are sent in a single send() call,
    806         # it will avoid performance problems caused by the interaction
    807         # between delayed ack and the Nagle algorithm.
    808         if isinstance(message_body, str):
    809             msg += message_body
    810             message_body = None
    811         self.send(msg)
    812         if message_body is not None:
    813             #message_body was not a string (i.e. it is a file) and
    814             #we must run the risk of Nagle
    815             self.send(message_body)
    816 
    817     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
    818         """Send a request to the server.
    819 
    820         `method' specifies an HTTP request method, e.g. 'GET'.
    821         `url' specifies the object being requested, e.g. '/index.html'.
    822         `skip_host' if True does not add automatically a 'Host:' header
    823         `skip_accept_encoding' if True does not add automatically an
    824            'Accept-Encoding:' header
    825         """
    826 
    827         # if a prior response has been completed, then forget about it.
    828         if self.__response and self.__response.isclosed():
    829             self.__response = None
    830 
    831 
    832         # in certain cases, we cannot issue another request on this connection.
    833         # this occurs when:
    834         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
    835         #   2) a response to a previous request has signalled that it is going
    836         #      to close the connection upon completion.
    837         #   3) the headers for the previous response have not been read, thus
    838         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
    839         #
    840         # if there is no prior response, then we can request at will.
    841         #
    842         # if point (2) is true, then we will have passed the socket to the
    843         # response (effectively meaning, "there is no prior response"), and
    844         # will open a new one when a new request is made.
    845         #
    846         # Note: if a prior response exists, then we *can* start a new request.
    847         #       We are not allowed to begin fetching the response to this new
    848         #       request, however, until that prior response is complete.
    849         #
    850         if self.__state == _CS_IDLE:
    851             self.__state = _CS_REQ_STARTED
    852         else:
    853             raise CannotSendRequest()
    854 
    855         # Save the method we use, we need it later in the response phase
    856         self._method = method
    857         if not url:
    858             url = '/'
    859         hdr = '%s %s %s' % (method, url, self._http_vsn_str)
    860 
    861         self._output(hdr)
    862 
    863         if self._http_vsn == 11:
    864             # Issue some standard headers for better HTTP/1.1 compliance
    865 
    866             if not skip_host:
    867                 # this header is issued *only* for HTTP/1.1
    868                 # connections. more specifically, this means it is
    869                 # only issued when the client uses the new
    870                 # HTTPConnection() class. backwards-compat clients
    871                 # will be using HTTP/1.0 and those clients may be
    872                 # issuing this header themselves. we should NOT issue
    873                 # it twice; some web servers (such as Apache) barf
    874                 # when they see two Host: headers
    875 
    876                 # If we need a non-standard port,include it in the
    877                 # header.  If the request is going through a proxy,
    878                 # but the host of the actual URL, not the host of the
    879                 # proxy.
    880 
    881                 netloc = ''
    882                 if url.startswith('http'):
    883                     nil, netloc, nil, nil, nil = urlsplit(url)
    884 
    885                 if netloc:
    886                     try:
    887                         netloc_enc = netloc.encode("ascii")
    888                     except UnicodeEncodeError:
    889                         netloc_enc = netloc.encode("idna")
    890                     self.putheader('Host', netloc_enc)
    891                 else:
    892                     try:
    893                         host_enc = self.host.encode("ascii")
    894                     except UnicodeEncodeError:
    895                         host_enc = self.host.encode("idna")
    896                     # Wrap the IPv6 Host Header with [] (RFC 2732)
    897                     if host_enc.find(':') >= 0:
    898                         host_enc = "[" + host_enc + "]"
    899                     if self.port == self.default_port:
    900                         self.putheader('Host', host_enc)
    901                     else:
    902                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
    903 
    904             # note: we are assuming that clients will not attempt to set these
    905             #       headers since *this* library must deal with the
    906             #       consequences. this also means that when the supporting
    907             #       libraries are updated to recognize other forms, then this
    908             #       code should be changed (removed or updated).
    909 
    910             # we only want a Content-Encoding of "identity" since we don't
    911             # support encodings such as x-gzip or x-deflate.
    912             if not skip_accept_encoding:
    913                 self.putheader('Accept-Encoding', 'identity')
    914 
    915             # we can accept "chunked" Transfer-Encodings, but no others
    916             # NOTE: no TE header implies *only* "chunked"
    917             #self.putheader('TE', 'chunked')
    918 
    919             # if TE is supplied in the header, then it must appear in a
    920             # Connection header.
    921             #self.putheader('Connection', 'TE')
    922 
    923         else:
    924             # For HTTP/1.0, the server will assume "not chunked"
    925             pass
    926 
    927     def putheader(self, header, *values):
    928         """Send a request header line to the server.
    929 
    930         For example: h.putheader('Accept', 'text/html')
    931         """
    932         if self.__state != _CS_REQ_STARTED:
    933             raise CannotSendHeader()
    934 
    935         hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
    936         self._output(hdr)
    937 
    938     def endheaders(self, message_body=None):
    939         """Indicate that the last header line has been sent to the server.
    940 
    941         This method sends the request to the server.  The optional
    942         message_body argument can be used to pass message body
    943         associated with the request.  The message body will be sent in
    944         the same packet as the message headers if possible.  The
    945         message_body should be a string.
    946         """
    947         if self.__state == _CS_REQ_STARTED:
    948             self.__state = _CS_REQ_SENT
    949         else:
    950             raise CannotSendHeader()
    951         self._send_output(message_body)
    952 
    953     def request(self, method, url, body=None, headers={}):
    954         """Send a complete request to the server."""
    955         self._send_request(method, url, body, headers)
    956 
    957     def _set_content_length(self, body):
    958         # Set the content-length based on the body.
    959         thelen = None
    960         try:
    961             thelen = str(len(body))
    962         except TypeError, te:
    963             # If this is a file-like object, try to
    964             # fstat its file descriptor
    965             try:
    966                 thelen = str(os.fstat(body.fileno()).st_size)
    967             except (AttributeError, OSError):
    968                 # Don't send a length if this failed
    969                 if self.debuglevel > 0: print "Cannot stat!!"
    970 
    971         if thelen is not None:
    972             self.putheader('Content-Length', thelen)
    973 
    974     def _send_request(self, method, url, body, headers):
    975         # Honor explicitly requested Host: and Accept-Encoding: headers.
    976         header_names = dict.fromkeys([k.lower() for k in headers])
    977         skips = {}
    978         if 'host' in header_names:
    979             skips['skip_host'] = 1
    980         if 'accept-encoding' in header_names:
    981             skips['skip_accept_encoding'] = 1
    982 
    983         self.putrequest(method, url, **skips)
    984 
    985         if body and ('content-length' not in header_names):
    986             self._set_content_length(body)
    987         for hdr, value in headers.iteritems():
    988             self.putheader(hdr, value)
    989         self.endheaders(body)
    990 
    991     def getresponse(self, buffering=False):
    992         "Get the response from the server."
    993 
    994         # if a prior response has been completed, then forget about it.
    995         if self.__response and self.__response.isclosed():
    996             self.__response = None
    997 
    998         #
    999         # if a prior response exists, then it must be completed (otherwise, we
   1000         # cannot read this response's header to determine the connection-close
   1001         # behavior)
   1002         #
   1003         # note: if a prior response existed, but was connection-close, then the
   1004         # socket and response were made independent of this HTTPConnection
   1005         # object since a new request requires that we open a whole new
   1006         # connection
   1007         #
   1008         # this means the prior response had one of two states:
   1009         #   1) will_close: this connection was reset and the prior socket and
   1010         #                  response operate independently
   1011         #   2) persistent: the response was retained and we await its
   1012         #                  isclosed() status to become true.
   1013         #
   1014         if self.__state != _CS_REQ_SENT or self.__response:
   1015             raise ResponseNotReady()
   1016 
   1017         args = (self.sock,)
   1018         kwds = {"strict":self.strict, "method":self._method}
   1019         if self.debuglevel > 0:
   1020             args += (self.debuglevel,)
   1021         if buffering:
   1022             #only add this keyword if non-default, for compatibility with
   1023             #other response_classes.
   1024             kwds["buffering"] = True;
   1025         response = self.response_class(*args, **kwds)
   1026 
   1027         response.begin()
   1028         assert response.will_close != _UNKNOWN
   1029         self.__state = _CS_IDLE
   1030 
   1031         if response.will_close:
   1032             # this effectively passes the connection to the response
   1033             self.close()
   1034         else:
   1035             # remember this, so we can tell when it is complete
   1036             self.__response = response
   1037 
   1038         return response
   1039 
   1040 
   1041 class HTTP:
   1042     "Compatibility class with httplib.py from 1.5."
   1043 
   1044     _http_vsn = 10
   1045     _http_vsn_str = 'HTTP/1.0'
   1046 
   1047     debuglevel = 0
   1048 
   1049     _connection_class = HTTPConnection
   1050 
   1051     def __init__(self, host='', port=None, strict=None):
   1052         "Provide a default host, since the superclass requires one."
   1053 
   1054         # some joker passed 0 explicitly, meaning default port
   1055         if port == 0:
   1056             port = None
   1057 
   1058         # Note that we may pass an empty string as the host; this will throw
   1059         # an error when we attempt to connect. Presumably, the client code
   1060         # will call connect before then, with a proper host.
   1061         self._setup(self._connection_class(host, port, strict))
   1062 
   1063     def _setup(self, conn):
   1064         self._conn = conn
   1065 
   1066         # set up delegation to flesh out interface
   1067         self.send = conn.send
   1068         self.putrequest = conn.putrequest
   1069         self.putheader = conn.putheader
   1070         self.endheaders = conn.endheaders
   1071         self.set_debuglevel = conn.set_debuglevel
   1072 
   1073         conn._http_vsn = self._http_vsn
   1074         conn._http_vsn_str = self._http_vsn_str
   1075 
   1076         self.file = None
   1077 
   1078     def connect(self, host=None, port=None):
   1079         "Accept arguments to set the host/port, since the superclass doesn't."
   1080 
   1081         if host is not None:
   1082             self._conn._set_hostport(host, port)
   1083         self._conn.connect()
   1084 
   1085     def getfile(self):
   1086         "Provide a getfile, since the superclass' does not use this concept."
   1087         return self.file
   1088 
   1089     def getreply(self, buffering=False):
   1090         """Compat definition since superclass does not define it.
   1091 
   1092         Returns a tuple consisting of:
   1093         - server status code (e.g. '200' if all goes well)
   1094         - server "reason" corresponding to status code
   1095         - any RFC822 headers in the response from the server
   1096         """
   1097         try:
   1098             if not buffering:
   1099                 response = self._conn.getresponse()
   1100             else:
   1101                 #only add this keyword if non-default for compatibility
   1102                 #with other connection classes
   1103                 response = self._conn.getresponse(buffering)
   1104         except BadStatusLine, e:
   1105             ### hmm. if getresponse() ever closes the socket on a bad request,
   1106             ### then we are going to have problems with self.sock
   1107 
   1108             ### should we keep this behavior? do people use it?
   1109             # keep the socket open (as a file), and return it
   1110             self.file = self._conn.sock.makefile('rb', 0)
   1111 
   1112             # close our socket -- we want to restart after any protocol error
   1113             self.close()
   1114 
   1115             self.headers = None
   1116             return -1, e.line, None
   1117 
   1118         self.headers = response.msg
   1119         self.file = response.fp
   1120         return response.status, response.reason, response.msg
   1121 
   1122     def close(self):
   1123         self._conn.close()
   1124 
   1125         # note that self.file == response.fp, which gets closed by the
   1126         # superclass. just clear the object ref here.
   1127         ### hmm. messy. if status==-1, then self.file is owned by us.
   1128         ### well... we aren't explicitly closing, but losing this ref will
   1129         ### do it
   1130         self.file = None
   1131 
   1132 try:
   1133     import ssl
   1134 except ImportError:
   1135     pass
   1136 else:
   1137     class HTTPSConnection(HTTPConnection):
   1138         "This class allows communication via SSL."
   1139 
   1140         default_port = HTTPS_PORT
   1141 
   1142         def __init__(self, host, port=None, key_file=None, cert_file=None,
   1143                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
   1144                      source_address=None):
   1145             HTTPConnection.__init__(self, host, port, strict, timeout,
   1146                                     source_address)
   1147             self.key_file = key_file
   1148             self.cert_file = cert_file
   1149 
   1150         def connect(self):
   1151             "Connect to a host on a given (SSL) port."
   1152 
   1153             sock = socket.create_connection((self.host, self.port),
   1154                                             self.timeout, self.source_address)
   1155             if self._tunnel_host:
   1156                 self.sock = sock
   1157                 self._tunnel()
   1158             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
   1159 
   1160     __all__.append("HTTPSConnection")
   1161 
   1162     class HTTPS(HTTP):
   1163         """Compatibility with 1.5 httplib interface
   1164 
   1165         Python 1.5.2 did not have an HTTPS class, but it defined an
   1166         interface for sending http requests that is also useful for
   1167         https.
   1168         """
   1169 
   1170         _connection_class = HTTPSConnection
   1171 
   1172         def __init__(self, host='', port=None, key_file=None, cert_file=None,
   1173                      strict=None):
   1174             # provide a default host, pass the X509 cert info
   1175 
   1176             # urf. compensate for bad input.
   1177             if port == 0:
   1178                 port = None
   1179             self._setup(self._connection_class(host, port, key_file,
   1180                                                cert_file, strict))
   1181 
   1182             # we never actually use these for anything, but we keep them
   1183             # here for compatibility with post-1.5.2 CVS.
   1184             self.key_file = key_file
   1185             self.cert_file = cert_file
   1186 
   1187 
   1188     def FakeSocket (sock, sslobj):
   1189         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
   1190                       "Use the result of ssl.wrap_socket() directly instead.",
   1191                       DeprecationWarning, stacklevel=2)
   1192         return sslobj
   1193 
   1194 
   1195 class HTTPException(Exception):
   1196     # Subclasses that define an __init__ must call Exception.__init__
   1197     # or define self.args.  Otherwise, str() will fail.
   1198     pass
   1199 
   1200 class NotConnected(HTTPException):
   1201     pass
   1202 
   1203 class InvalidURL(HTTPException):
   1204     pass
   1205 
   1206 class UnknownProtocol(HTTPException):
   1207     def __init__(self, version):
   1208         self.args = version,
   1209         self.version = version
   1210 
   1211 class UnknownTransferEncoding(HTTPException):
   1212     pass
   1213 
   1214 class UnimplementedFileMode(HTTPException):
   1215     pass
   1216 
   1217 class IncompleteRead(HTTPException):
   1218     def __init__(self, partial, expected=None):
   1219         self.args = partial,
   1220         self.partial = partial
   1221         self.expected = expected
   1222     def __repr__(self):
   1223         if self.expected is not None:
   1224             e = ', %i more expected' % self.expected
   1225         else:
   1226             e = ''
   1227         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
   1228     def __str__(self):
   1229         return repr(self)
   1230 
   1231 class ImproperConnectionState(HTTPException):
   1232     pass
   1233 
   1234 class CannotSendRequest(ImproperConnectionState):
   1235     pass
   1236 
   1237 class CannotSendHeader(ImproperConnectionState):
   1238     pass
   1239 
   1240 class ResponseNotReady(ImproperConnectionState):
   1241     pass
   1242 
   1243 class BadStatusLine(HTTPException):
   1244     def __init__(self, line):
   1245         if not line:
   1246             line = repr(line)
   1247         self.args = line,
   1248         self.line = line
   1249 
   1250 class LineTooLong(HTTPException):
   1251     def __init__(self, line_type):
   1252         HTTPException.__init__(self, "got more than %d bytes when reading %s"
   1253                                      % (_MAXLINE, line_type))
   1254 
   1255 # for backwards compatibility
   1256 error = HTTPException
   1257 
   1258 class LineAndFileWrapper:
   1259     """A limited file-like object for HTTP/0.9 responses."""
   1260 
   1261     # The status-line parsing code calls readline(), which normally
   1262     # get the HTTP status line.  For a 0.9 response, however, this is
   1263     # actually the first line of the body!  Clients need to get a
   1264     # readable file object that contains that line.
   1265 
   1266     def __init__(self, line, file):
   1267         self._line = line
   1268         self._file = file
   1269         self._line_consumed = 0
   1270         self._line_offset = 0
   1271         self._line_left = len(line)
   1272 
   1273     def __getattr__(self, attr):
   1274         return getattr(self._file, attr)
   1275 
   1276     def _done(self):
   1277         # called when the last byte is read from the line.  After the
   1278         # call, all read methods are delegated to the underlying file
   1279         # object.
   1280         self._line_consumed = 1
   1281         self.read = self._file.read
   1282         self.readline = self._file.readline
   1283         self.readlines = self._file.readlines
   1284 
   1285     def read(self, amt=None):
   1286         if self._line_consumed:
   1287             return self._file.read(amt)
   1288         assert self._line_left
   1289         if amt is None or amt > self._line_left:
   1290             s = self._line[self._line_offset:]
   1291             self._done()
   1292             if amt is None:
   1293                 return s + self._file.read()
   1294             else:
   1295                 return s + self._file.read(amt - len(s))
   1296         else:
   1297             assert amt <= self._line_left
   1298             i = self._line_offset
   1299             j = i + amt
   1300             s = self._line[i:j]
   1301             self._line_offset = j
   1302             self._line_left -= amt
   1303             if self._line_left == 0:
   1304                 self._done()
   1305             return s
   1306 
   1307     def readline(self):
   1308         if self._line_consumed:
   1309             return self._file.readline()
   1310         assert self._line_left
   1311         s = self._line[self._line_offset:]
   1312         self._done()
   1313         return s
   1314 
   1315     def readlines(self, size=None):
   1316         if self._line_consumed:
   1317             return self._file.readlines(size)
   1318         assert self._line_left
   1319         L = [self._line[self._line_offset:]]
   1320         self._done()
   1321         if size is None:
   1322             return L + self._file.readlines()
   1323         else:
   1324             return L + self._file.readlines(size)
   1325 
   1326 def test():
   1327     """Test this module.
   1328 
   1329     A hodge podge of tests collected here, because they have too many
   1330     external dependencies for the regular test suite.
   1331     """
   1332 
   1333     import sys
   1334     import getopt
   1335     opts, args = getopt.getopt(sys.argv[1:], 'd')
   1336     dl = 0
   1337     for o, a in opts:
   1338         if o == '-d': dl = dl + 1
   1339     host = 'www.python.org'
   1340     selector = '/'
   1341     if args[0:]: host = args[0]
   1342     if args[1:]: selector = args[1]
   1343     h = HTTP()
   1344     h.set_debuglevel(dl)
   1345     h.connect(host)
   1346     h.putrequest('GET', selector)
   1347     h.endheaders()
   1348     status, reason, headers = h.getreply()
   1349     print 'status =', status
   1350     print 'reason =', reason
   1351     print "read", len(h.getfile().read())
   1352     print
   1353     if headers:
   1354         for header in headers.headers: print header.strip()
   1355     print
   1356 
   1357     # minimal test that code to extract host from url works
   1358     class HTTP11(HTTP):
   1359         _http_vsn = 11
   1360         _http_vsn_str = 'HTTP/1.1'
   1361 
   1362     h = HTTP11('www.python.org')
   1363     h.putrequest('GET', 'http://www.python.org/~jeremy/')
   1364     h.endheaders()
   1365     h.getreply()
   1366     h.close()
   1367 
   1368     try:
   1369         import ssl
   1370     except ImportError:
   1371         pass
   1372     else:
   1373 
   1374         for host, selector in (('sourceforge.net', '/projects/python'),
   1375                                ):
   1376             print "https://%s%s" % (host, selector)
   1377             hs = HTTPS()
   1378             hs.set_debuglevel(dl)
   1379             hs.connect(host)
   1380             hs.putrequest('GET', selector)
   1381             hs.endheaders()
   1382             status, reason, headers = hs.getreply()
   1383             print 'status =', status
   1384             print 'reason =', reason
   1385             print "read", len(hs.getfile().read())
   1386             print
   1387             if headers:
   1388                 for header in headers.headers: print header.strip()
   1389             print
   1390 
   1391 if __name__ == '__main__':
   1392     test()
   1393