Home | History | Annotate | Download | only in Lib
      1 """HTTP server base class.
      2 
      3 Note: the class in this module doesn't implement any HTTP request; see
      4 SimpleHTTPServer for simple implementations of GET, HEAD and POST
      5 (including CGI scripts).  It does, however, optionally implement HTTP/1.1
      6 persistent connections, as of version 0.3.
      7 
      8 Contents:
      9 
     10 - BaseHTTPRequestHandler: HTTP request handler base class
     11 - test: test function
     12 
     13 XXX To do:
     14 
     15 - log requests even later (to capture byte count)
     16 - log user-agent header and other interesting goodies
     17 - send error log to separate file
     18 """
     19 
     20 
     21 # See also:

     22 #

     23 # HTTP Working Group                                        T. Berners-Lee

     24 # INTERNET-DRAFT                                            R. T. Fielding

     25 # <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen

     26 # Expires September 8, 1995                                  March 8, 1995

     27 #

     28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt

     29 #

     30 # and

     31 #

     32 # Network Working Group                                      R. Fielding

     33 # Request for Comments: 2616                                       et al

     34 # Obsoletes: 2068                                              June 1999

     35 # Category: Standards Track

     36 #

     37 # URL: http://www.faqs.org/rfcs/rfc2616.html

     38 
     39 # Log files

     40 # ---------

     41 #

     42 # Here's a quote from the NCSA httpd docs about log file format.

     43 #

     44 # | The logfile format is as follows. Each line consists of:

     45 # |

     46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb

     47 # |

     48 # |        host: Either the DNS name or the IP number of the remote client

     49 # |        rfc931: Any information returned by identd for this person,

     50 # |                - otherwise.

     51 # |        authuser: If user sent a userid for authentication, the user name,

     52 # |                  - otherwise.

     53 # |        DD: Day

     54 # |        Mon: Month (calendar name)

     55 # |        YYYY: Year

     56 # |        hh: hour (24-hour format, the machine's timezone)

     57 # |        mm: minutes

     58 # |        ss: seconds

     59 # |        request: The first line of the HTTP request as sent by the client.

     60 # |        ddd: the status code returned by the server, - if not available.

     61 # |        bbbb: the total number of bytes sent,

     62 # |              *not including the HTTP/1.0 header*, - if not available

     63 # |

     64 # | You can determine the name of the file accessed through request.

     65 #

     66 # (Actually, the latter is only true if you know the server configuration

     67 # at the time the request was made!)

     68 
     69 __version__ = "0.3"
     70 
     71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
     72 
     73 import sys
     74 import time
     75 import socket # For gethostbyaddr()

     76 from warnings import filterwarnings, catch_warnings
     77 with catch_warnings():
     78     if sys.py3kwarning:
     79         filterwarnings("ignore", ".*mimetools has been removed",
     80                         DeprecationWarning)
     81     import mimetools
     82 import SocketServer
     83 
     84 # Default error message template

     85 DEFAULT_ERROR_MESSAGE = """\
     86 <head>
     87 <title>Error response</title>
     88 </head>
     89 <body>
     90 <h1>Error response</h1>
     91 <p>Error code %(code)d.
     92 <p>Message: %(message)s.
     93 <p>Error code explanation: %(code)s = %(explain)s.
     94 </body>
     95 """
     96 
     97 DEFAULT_ERROR_CONTENT_TYPE = "text/html"
     98 
     99 def _quote_html(html):
    100     return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
    101 
    102 class HTTPServer(SocketServer.TCPServer):
    103 
    104     allow_reuse_address = 1    # Seems to make sense in testing environment

    105 
    106     def server_bind(self):
    107         """Override server_bind to store the server name."""
    108         SocketServer.TCPServer.server_bind(self)
    109         host, port = self.socket.getsockname()[:2]
    110         self.server_name = socket.getfqdn(host)
    111         self.server_port = port
    112 
    113 
    114 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
    115 
    116     """HTTP request handler base class.
    117 
    118     The following explanation of HTTP serves to guide you through the
    119     code as well as to expose any misunderstandings I may have about
    120     HTTP (so you don't need to read the code to figure out I'm wrong
    121     :-).
    122 
    123     HTTP (HyperText Transfer Protocol) is an extensible protocol on
    124     top of a reliable stream transport (e.g. TCP/IP).  The protocol
    125     recognizes three parts to a request:
    126 
    127     1. One line identifying the request type and path
    128     2. An optional set of RFC-822-style headers
    129     3. An optional data part
    130 
    131     The headers and data are separated by a blank line.
    132 
    133     The first line of the request has the form
    134 
    135     <command> <path> <version>
    136 
    137     where <command> is a (case-sensitive) keyword such as GET or POST,
    138     <path> is a string containing path information for the request,
    139     and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    140     <path> is encoded using the URL encoding scheme (using %xx to signify
    141     the ASCII character with hex code xx).
    142 
    143     The specification specifies that lines are separated by CRLF but
    144     for compatibility with the widest range of clients recommends
    145     servers also handle LF.  Similarly, whitespace in the request line
    146     is treated sensibly (allowing multiple spaces between components
    147     and allowing trailing whitespace).
    148 
    149     Similarly, for output, lines ought to be separated by CRLF pairs
    150     but most clients grok LF characters just fine.
    151 
    152     If the first line of the request has the form
    153 
    154     <command> <path>
    155 
    156     (i.e. <version> is left out) then this is assumed to be an HTTP
    157     0.9 request; this form has no optional headers and data part and
    158     the reply consists of just the data.
    159 
    160     The reply form of the HTTP 1.x protocol again has three parts:
    161 
    162     1. One line giving the response code
    163     2. An optional set of RFC-822-style headers
    164     3. The data
    165 
    166     Again, the headers and data are separated by a blank line.
    167 
    168     The response code line has the form
    169 
    170     <version> <responsecode> <responsestring>
    171 
    172     where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
    173     <responsecode> is a 3-digit response code indicating success or
    174     failure of the request, and <responsestring> is an optional
    175     human-readable string explaining what the response code means.
    176 
    177     This server parses the request and the headers, and then calls a
    178     function specific to the request type (<command>).  Specifically,
    179     a request SPAM will be handled by a method do_SPAM().  If no
    180     such method exists the server sends an error response to the
    181     client.  If it exists, it is called with no arguments:
    182 
    183     do_SPAM()
    184 
    185     Note that the request name is case sensitive (i.e. SPAM and spam
    186     are different requests).
    187 
    188     The various request details are stored in instance variables:
    189 
    190     - client_address is the client IP address in the form (host,
    191     port);
    192 
    193     - command, path and version are the broken-down request line;
    194 
    195     - headers is an instance of mimetools.Message (or a derived
    196     class) containing the header information;
    197 
    198     - rfile is a file object open for reading positioned at the
    199     start of the optional input data part;
    200 
    201     - wfile is a file object open for writing.
    202 
    203     IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
    204 
    205     The first thing to be written must be the response line.  Then
    206     follow 0 or more header lines, then a blank line, and then the
    207     actual data (if any).  The meaning of the header lines depends on
    208     the command executed by the server; in most cases, when data is
    209     returned, there should be at least one header line of the form
    210 
    211     Content-type: <type>/<subtype>
    212 
    213     where <type> and <subtype> should be registered MIME types,
    214     e.g. "text/html" or "text/plain".
    215 
    216     """
    217 
    218     # The Python system version, truncated to its first component.

    219     sys_version = "Python/" + sys.version.split()[0]
    220 
    221     # The server software version.  You may want to override this.

    222     # The format is multiple whitespace-separated strings,

    223     # where each string is of the form name[/version].

    224     server_version = "BaseHTTP/" + __version__
    225 
    226     # The default request version.  This only affects responses up until

    227     # the point where the request line is parsed, so it mainly decides what

    228     # the client gets back when sending a malformed request line.

    229     # Most web servers default to HTTP 0.9, i.e. don't send a status line.

    230     default_request_version = "HTTP/0.9"
    231 
    232     def parse_request(self):
    233         """Parse a request (internal).
    234 
    235         The request should be stored in self.raw_requestline; the results
    236         are in self.command, self.path, self.request_version and
    237         self.headers.
    238 
    239         Return True for success, False for failure; on failure, an
    240         error is sent back.
    241 
    242         """
    243         self.command = None  # set in case of error on the first line

    244         self.request_version = version = self.default_request_version
    245         self.close_connection = 1
    246         requestline = self.raw_requestline
    247         if requestline[-2:] == '\r\n':
    248             requestline = requestline[:-2]
    249         elif requestline[-1:] == '\n':
    250             requestline = requestline[:-1]
    251         self.requestline = requestline
    252         words = requestline.split()
    253         if len(words) == 3:
    254             [command, path, version] = words
    255             if version[:5] != 'HTTP/':
    256                 self.send_error(400, "Bad request version (%r)" % version)
    257                 return False
    258             try:
    259                 base_version_number = version.split('/', 1)[1]
    260                 version_number = base_version_number.split(".")
    261                 # RFC 2145 section 3.1 says there can be only one "." and

    262                 #   - major and minor numbers MUST be treated as

    263                 #      separate integers;

    264                 #   - HTTP/2.4 is a lower version than HTTP/2.13, which in

    265                 #      turn is lower than HTTP/12.3;

    266                 #   - Leading zeros MUST be ignored by recipients.

    267                 if len(version_number) != 2:
    268                     raise ValueError
    269                 version_number = int(version_number[0]), int(version_number[1])
    270             except (ValueError, IndexError):
    271                 self.send_error(400, "Bad request version (%r)" % version)
    272                 return False
    273             if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
    274                 self.close_connection = 0
    275             if version_number >= (2, 0):
    276                 self.send_error(505,
    277                           "Invalid HTTP Version (%s)" % base_version_number)
    278                 return False
    279         elif len(words) == 2:
    280             [command, path] = words
    281             self.close_connection = 1
    282             if command != 'GET':
    283                 self.send_error(400,
    284                                 "Bad HTTP/0.9 request type (%r)" % command)
    285                 return False
    286         elif not words:
    287             return False
    288         else:
    289             self.send_error(400, "Bad request syntax (%r)" % requestline)
    290             return False
    291         self.command, self.path, self.request_version = command, path, version
    292 
    293         # Examine the headers and look for a Connection directive

    294         self.headers = self.MessageClass(self.rfile, 0)
    295 
    296         conntype = self.headers.get('Connection', "")
    297         if conntype.lower() == 'close':
    298             self.close_connection = 1
    299         elif (conntype.lower() == 'keep-alive' and
    300               self.protocol_version >= "HTTP/1.1"):
    301             self.close_connection = 0
    302         return True
    303 
    304     def handle_one_request(self):
    305         """Handle a single HTTP request.
    306 
    307         You normally don't need to override this method; see the class
    308         __doc__ string for information on how to handle specific HTTP
    309         commands such as GET and POST.
    310 
    311         """
    312         try:
    313             self.raw_requestline = self.rfile.readline(65537)
    314             if len(self.raw_requestline) > 65536:
    315                 self.requestline = ''
    316                 self.request_version = ''
    317                 self.command = ''
    318                 self.send_error(414)
    319                 return
    320             if not self.raw_requestline:
    321                 self.close_connection = 1
    322                 return
    323             if not self.parse_request():
    324                 # An error code has been sent, just exit

    325                 return
    326             mname = 'do_' + self.command
    327             if not hasattr(self, mname):
    328                 self.send_error(501, "Unsupported method (%r)" % self.command)
    329                 return
    330             method = getattr(self, mname)
    331             method()
    332             self.wfile.flush() #actually send the response if not already done.

    333         except socket.timeout, e:
    334             #a read or a write timed out.  Discard this connection

    335             self.log_error("Request timed out: %r", e)
    336             self.close_connection = 1
    337             return
    338 
    339     def handle(self):
    340         """Handle multiple requests if necessary."""
    341         self.close_connection = 1
    342 
    343         self.handle_one_request()
    344         while not self.close_connection:
    345             self.handle_one_request()
    346 
    347     def send_error(self, code, message=None):
    348         """Send and log an error reply.
    349 
    350         Arguments are the error code, and a detailed message.
    351         The detailed message defaults to the short entry matching the
    352         response code.
    353 
    354         This sends an error response (so it must be called before any
    355         output has been generated), logs the error, and finally sends
    356         a piece of HTML explaining the error to the user.
    357 
    358         """
    359 
    360         try:
    361             short, long = self.responses[code]
    362         except KeyError:
    363             short, long = '???', '???'
    364         if message is None:
    365             message = short
    366         explain = long
    367         self.log_error("code %d, message %s", code, message)
    368         # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)

    369         content = (self.error_message_format %
    370                    {'code': code, 'message': _quote_html(message), 'explain': explain})
    371         self.send_response(code, message)
    372         self.send_header("Content-Type", self.error_content_type)
    373         self.send_header('Connection', 'close')
    374         self.end_headers()
    375         if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
    376             self.wfile.write(content)
    377 
    378     error_message_format = DEFAULT_ERROR_MESSAGE
    379     error_content_type = DEFAULT_ERROR_CONTENT_TYPE
    380 
    381     def send_response(self, code, message=None):
    382         """Send the response header and log the response code.
    383 
    384         Also send two standard headers with the server software
    385         version and the current date.
    386 
    387         """
    388         self.log_request(code)
    389         if message is None:
    390             if code in self.responses:
    391                 message = self.responses[code][0]
    392             else:
    393                 message = ''
    394         if self.request_version != 'HTTP/0.9':
    395             self.wfile.write("%s %d %s\r\n" %
    396                              (self.protocol_version, code, message))
    397             # print (self.protocol_version, code, message)

    398         self.send_header('Server', self.version_string())
    399         self.send_header('Date', self.date_time_string())
    400 
    401     def send_header(self, keyword, value):
    402         """Send a MIME header."""
    403         if self.request_version != 'HTTP/0.9':
    404             self.wfile.write("%s: %s\r\n" % (keyword, value))
    405 
    406         if keyword.lower() == 'connection':
    407             if value.lower() == 'close':
    408                 self.close_connection = 1
    409             elif value.lower() == 'keep-alive':
    410                 self.close_connection = 0
    411 
    412     def end_headers(self):
    413         """Send the blank line ending the MIME headers."""
    414         if self.request_version != 'HTTP/0.9':
    415             self.wfile.write("\r\n")
    416 
    417     def log_request(self, code='-', size='-'):
    418         """Log an accepted request.
    419 
    420         This is called by send_response().
    421 
    422         """
    423 
    424         self.log_message('"%s" %s %s',
    425                          self.requestline, str(code), str(size))
    426 
    427     def log_error(self, format, *args):
    428         """Log an error.
    429 
    430         This is called when a request cannot be fulfilled.  By
    431         default it passes the message on to log_message().
    432 
    433         Arguments are the same as for log_message().
    434 
    435         XXX This should go to the separate error log.
    436 
    437         """
    438 
    439         self.log_message(format, *args)
    440 
    441     def log_message(self, format, *args):
    442         """Log an arbitrary message.
    443 
    444         This is used by all other logging functions.  Override
    445         it if you have specific logging wishes.
    446 
    447         The first argument, FORMAT, is a format string for the
    448         message to be logged.  If the format string contains
    449         any % escapes requiring parameters, they should be
    450         specified as subsequent arguments (it's just like
    451         printf!).
    452 
    453         The client host and current date/time are prefixed to
    454         every message.
    455 
    456         """
    457 
    458         sys.stderr.write("%s - - [%s] %s\n" %
    459                          (self.address_string(),
    460                           self.log_date_time_string(),
    461                           format%args))
    462 
    463     def version_string(self):
    464         """Return the server software version string."""
    465         return self.server_version + ' ' + self.sys_version
    466 
    467     def date_time_string(self, timestamp=None):
    468         """Return the current date and time formatted for a message header."""
    469         if timestamp is None:
    470             timestamp = time.time()
    471         year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
    472         s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
    473                 self.weekdayname[wd],
    474                 day, self.monthname[month], year,
    475                 hh, mm, ss)
    476         return s
    477 
    478     def log_date_time_string(self):
    479         """Return the current time formatted for logging."""
    480         now = time.time()
    481         year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
    482         s = "%02d/%3s/%04d %02d:%02d:%02d" % (
    483                 day, self.monthname[month], year, hh, mm, ss)
    484         return s
    485 
    486     weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    487 
    488     monthname = [None,
    489                  'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    490                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    491 
    492     def address_string(self):
    493         """Return the client address formatted for logging.
    494 
    495         This version looks up the full hostname using gethostbyaddr(),
    496         and tries to find a name that contains at least one dot.
    497 
    498         """
    499 
    500         host, port = self.client_address[:2]
    501         return socket.getfqdn(host)
    502 
    503     # Essentially static class variables

    504 
    505     # The version of the HTTP protocol we support.

    506     # Set this to HTTP/1.1 to enable automatic keepalive

    507     protocol_version = "HTTP/1.0"
    508 
    509     # The Message-like class used to parse headers

    510     MessageClass = mimetools.Message
    511 
    512     # Table mapping response codes to messages; entries have the

    513     # form {code: (shortmessage, longmessage)}.

    514     # See RFC 2616.

    515     responses = {
    516         100: ('Continue', 'Request received, please continue'),
    517         101: ('Switching Protocols',
    518               'Switching to new protocol; obey Upgrade header'),
    519 
    520         200: ('OK', 'Request fulfilled, document follows'),
    521         201: ('Created', 'Document created, URL follows'),
    522         202: ('Accepted',
    523               'Request accepted, processing continues off-line'),
    524         203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
    525         204: ('No Content', 'Request fulfilled, nothing follows'),
    526         205: ('Reset Content', 'Clear input form for further input.'),
    527         206: ('Partial Content', 'Partial content follows.'),
    528 
    529         300: ('Multiple Choices',
    530               'Object has several resources -- see URI list'),
    531         301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
    532         302: ('Found', 'Object moved temporarily -- see URI list'),
    533         303: ('See Other', 'Object moved -- see Method and URL list'),
    534         304: ('Not Modified',
    535               'Document has not changed since given time'),
    536         305: ('Use Proxy',
    537               'You must use proxy specified in Location to access this '
    538               'resource.'),
    539         307: ('Temporary Redirect',
    540               'Object moved temporarily -- see URI list'),
    541 
    542         400: ('Bad Request',
    543               'Bad request syntax or unsupported method'),
    544         401: ('Unauthorized',
    545               'No permission -- see authorization schemes'),
    546         402: ('Payment Required',
    547               'No payment -- see charging schemes'),
    548         403: ('Forbidden',
    549               'Request forbidden -- authorization will not help'),
    550         404: ('Not Found', 'Nothing matches the given URI'),
    551         405: ('Method Not Allowed',
    552               'Specified method is invalid for this resource.'),
    553         406: ('Not Acceptable', 'URI not available in preferred format.'),
    554         407: ('Proxy Authentication Required', 'You must authenticate with '
    555               'this proxy before proceeding.'),
    556         408: ('Request Timeout', 'Request timed out; try again later.'),
    557         409: ('Conflict', 'Request conflict.'),
    558         410: ('Gone',
    559               'URI no longer exists and has been permanently removed.'),
    560         411: ('Length Required', 'Client must specify Content-Length.'),
    561         412: ('Precondition Failed', 'Precondition in headers is false.'),
    562         413: ('Request Entity Too Large', 'Entity is too large.'),
    563         414: ('Request-URI Too Long', 'URI is too long.'),
    564         415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
    565         416: ('Requested Range Not Satisfiable',
    566               'Cannot satisfy request range.'),
    567         417: ('Expectation Failed',
    568               'Expect condition could not be satisfied.'),
    569 
    570         500: ('Internal Server Error', 'Server got itself in trouble'),
    571         501: ('Not Implemented',
    572               'Server does not support this operation'),
    573         502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
    574         503: ('Service Unavailable',
    575               'The server cannot process the request due to a high load'),
    576         504: ('Gateway Timeout',
    577               'The gateway server did not receive a timely response'),
    578         505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
    579         }
    580 
    581 
    582 def test(HandlerClass = BaseHTTPRequestHandler,
    583          ServerClass = HTTPServer, protocol="HTTP/1.0"):
    584     """Test the HTTP request handler class.
    585 
    586     This runs an HTTP server on port 8000 (or the first command line
    587     argument).
    588 
    589     """
    590 
    591     if sys.argv[1:]:
    592         port = int(sys.argv[1])
    593     else:
    594         port = 8000
    595     server_address = ('', port)
    596 
    597     HandlerClass.protocol_version = protocol
    598     httpd = ServerClass(server_address, HandlerClass)
    599 
    600     sa = httpd.socket.getsockname()
    601     print "Serving HTTP on", sa[0], "port", sa[1], "..."
    602     httpd.serve_forever()
    603 
    604 
    605 if __name__ == '__main__':
    606     test()
    607