Home | History | Annotate | Download | only in Lib
      1 """HTTP server base class.
      2 
      3 Note: the class in this module doesn't implement any HTTP request; see
      4 SimpleHTTPServer for simple implementations of GET, HEAD and POST
      5 (including CGI scripts).  It does, however, optionally implement HTTP/1.1
      6 persistent connections, as of version 0.3.
      7 
      8 Contents:
      9 
     10 - BaseHTTPRequestHandler: HTTP request handler base class
     11 - test: test function
     12 
     13 XXX To do:
     14 
     15 - log requests even later (to capture byte count)
     16 - log user-agent header and other interesting goodies
     17 - send error log to separate file
     18 """
     19 
     20 
     21 # See also:
     22 #
     23 # HTTP Working Group                                        T. Berners-Lee
     24 # INTERNET-DRAFT                                            R. T. Fielding
     25 # <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
     26 # Expires September 8, 1995                                  March 8, 1995
     27 #
     28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
     29 #
     30 # and
     31 #
     32 # Network Working Group                                      R. Fielding
     33 # Request for Comments: 2616                                       et al
     34 # Obsoletes: 2068                                              June 1999
     35 # Category: Standards Track
     36 #
     37 # URL: http://www.faqs.org/rfcs/rfc2616.html
     38 
     39 # Log files
     40 # ---------
     41 #
     42 # Here's a quote from the NCSA httpd docs about log file format.
     43 #
     44 # | The logfile format is as follows. Each line consists of:
     45 # |
     46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
     47 # |
     48 # |        host: Either the DNS name or the IP number of the remote client
     49 # |        rfc931: Any information returned by identd for this person,
     50 # |                - otherwise.
     51 # |        authuser: If user sent a userid for authentication, the user name,
     52 # |                  - otherwise.
     53 # |        DD: Day
     54 # |        Mon: Month (calendar name)
     55 # |        YYYY: Year
     56 # |        hh: hour (24-hour format, the machine's timezone)
     57 # |        mm: minutes
     58 # |        ss: seconds
     59 # |        request: The first line of the HTTP request as sent by the client.
     60 # |        ddd: the status code returned by the server, - if not available.
     61 # |        bbbb: the total number of bytes sent,
     62 # |              *not including the HTTP/1.0 header*, - if not available
     63 # |
     64 # | You can determine the name of the file accessed through request.
     65 #
     66 # (Actually, the latter is only true if you know the server configuration
     67 # at the time the request was made!)
     68 
     69 __version__ = "0.3"
     70 
     71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
     72 
     73 import sys
     74 import time
     75 import socket # For gethostbyaddr()
     76 from warnings import filterwarnings, catch_warnings
     77 with catch_warnings():
     78     if sys.py3kwarning:
     79         filterwarnings("ignore", ".*mimetools has been removed",
     80                         DeprecationWarning)
     81     import mimetools
     82 import SocketServer
     83 
     84 # Default error message template
     85 DEFAULT_ERROR_MESSAGE = """\
     86 <head>
     87 <title>Error response</title>
     88 </head>
     89 <body>
     90 <h1>Error response</h1>
     91 <p>Error code %(code)d.
     92 <p>Message: %(message)s.
     93 <p>Error code explanation: %(code)s = %(explain)s.
     94 </body>
     95 """
     96 
     97 DEFAULT_ERROR_CONTENT_TYPE = "text/html"
     98 
     99 def _quote_html(html):
    100     return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
    101 
    102 class HTTPServer(SocketServer.TCPServer):
    103 
    104     allow_reuse_address = 1    # Seems to make sense in testing environment
    105 
    106     def server_bind(self):
    107         """Override server_bind to store the server name."""
    108         SocketServer.TCPServer.server_bind(self)
    109         host, port = self.socket.getsockname()[:2]
    110         self.server_name = socket.getfqdn(host)
    111         self.server_port = port
    112 
    113 
    114 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
    115 
    116     """HTTP request handler base class.
    117 
    118     The following explanation of HTTP serves to guide you through the
    119     code as well as to expose any misunderstandings I may have about
    120     HTTP (so you don't need to read the code to figure out I'm wrong
    121     :-).
    122 
    123     HTTP (HyperText Transfer Protocol) is an extensible protocol on
    124     top of a reliable stream transport (e.g. TCP/IP).  The protocol
    125     recognizes three parts to a request:
    126 
    127     1. One line identifying the request type and path
    128     2. An optional set of RFC-822-style headers
    129     3. An optional data part
    130 
    131     The headers and data are separated by a blank line.
    132 
    133     The first line of the request has the form
    134 
    135     <command> <path> <version>
    136 
    137     where <command> is a (case-sensitive) keyword such as GET or POST,
    138     <path> is a string containing path information for the request,
    139     and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    140     <path> is encoded using the URL encoding scheme (using %xx to signify
    141     the ASCII character with hex code xx).
    142 
    143     The specification specifies that lines are separated by CRLF but
    144     for compatibility with the widest range of clients recommends
    145     servers also handle LF.  Similarly, whitespace in the request line
    146     is treated sensibly (allowing multiple spaces between components
    147     and allowing trailing whitespace).
    148 
    149     Similarly, for output, lines ought to be separated by CRLF pairs
    150     but most clients grok LF characters just fine.
    151 
    152     If the first line of the request has the form
    153 
    154     <command> <path>
    155 
    156     (i.e. <version> is left out) then this is assumed to be an HTTP
    157     0.9 request; this form has no optional headers and data part and
    158     the reply consists of just the data.
    159 
    160     The reply form of the HTTP 1.x protocol again has three parts:
    161 
    162     1. One line giving the response code
    163     2. An optional set of RFC-822-style headers
    164     3. The data
    165 
    166     Again, the headers and data are separated by a blank line.
    167 
    168     The response code line has the form
    169 
    170     <version> <responsecode> <responsestring>
    171 
    172     where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
    173     <responsecode> is a 3-digit response code indicating success or
    174     failure of the request, and <responsestring> is an optional
    175     human-readable string explaining what the response code means.
    176 
    177     This server parses the request and the headers, and then calls a
    178     function specific to the request type (<command>).  Specifically,
    179     a request SPAM will be handled by a method do_SPAM().  If no
    180     such method exists the server sends an error response to the
    181     client.  If it exists, it is called with no arguments:
    182 
    183     do_SPAM()
    184 
    185     Note that the request name is case sensitive (i.e. SPAM and spam
    186     are different requests).
    187 
    188     The various request details are stored in instance variables:
    189 
    190     - client_address is the client IP address in the form (host,
    191     port);
    192 
    193     - command, path and version are the broken-down request line;
    194 
    195     - headers is an instance of mimetools.Message (or a derived
    196     class) containing the header information;
    197 
    198     - rfile is a file object open for reading positioned at the
    199     start of the optional input data part;
    200 
    201     - wfile is a file object open for writing.
    202 
    203     IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
    204 
    205     The first thing to be written must be the response line.  Then
    206     follow 0 or more header lines, then a blank line, and then the
    207     actual data (if any).  The meaning of the header lines depends on
    208     the command executed by the server; in most cases, when data is
    209     returned, there should be at least one header line of the form
    210 
    211     Content-type: <type>/<subtype>
    212 
    213     where <type> and <subtype> should be registered MIME types,
    214     e.g. "text/html" or "text/plain".
    215 
    216     """
    217 
    218     # The Python system version, truncated to its first component.
    219     sys_version = "Python/" + sys.version.split()[0]
    220 
    221     # The server software version.  You may want to override this.
    222     # The format is multiple whitespace-separated strings,
    223     # where each string is of the form name[/version].
    224     server_version = "BaseHTTP/" + __version__
    225 
    226     # The default request version.  This only affects responses up until
    227     # the point where the request line is parsed, so it mainly decides what
    228     # the client gets back when sending a malformed request line.
    229     # Most web servers default to HTTP 0.9, i.e. don't send a status line.
    230     default_request_version = "HTTP/0.9"
    231 
    232     def parse_request(self):
    233         """Parse a request (internal).
    234 
    235         The request should be stored in self.raw_requestline; the results
    236         are in self.command, self.path, self.request_version and
    237         self.headers.
    238 
    239         Return True for success, False for failure; on failure, an
    240         error is sent back.
    241 
    242         """
    243         self.command = None  # set in case of error on the first line
    244         self.request_version = version = self.default_request_version
    245         self.close_connection = 1
    246         requestline = self.raw_requestline
    247         requestline = requestline.rstrip('\r\n')
    248         self.requestline = requestline
    249         words = requestline.split()
    250         if len(words) == 3:
    251             command, path, version = words
    252             if version[:5] != 'HTTP/':
    253                 self.send_error(400, "Bad request version (%r)" % version)
    254                 return False
    255             try:
    256                 base_version_number = version.split('/', 1)[1]
    257                 version_number = base_version_number.split(".")
    258                 # RFC 2145 section 3.1 says there can be only one "." and
    259                 #   - major and minor numbers MUST be treated as
    260                 #      separate integers;
    261                 #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
    262                 #      turn is lower than HTTP/12.3;
    263                 #   - Leading zeros MUST be ignored by recipients.
    264                 if len(version_number) != 2:
    265                     raise ValueError
    266                 version_number = int(version_number[0]), int(version_number[1])
    267             except (ValueError, IndexError):
    268                 self.send_error(400, "Bad request version (%r)" % version)
    269                 return False
    270             if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
    271                 self.close_connection = 0
    272             if version_number >= (2, 0):
    273                 self.send_error(505,
    274                           "Invalid HTTP Version (%s)" % base_version_number)
    275                 return False
    276         elif len(words) == 2:
    277             command, path = words
    278             self.close_connection = 1
    279             if command != 'GET':
    280                 self.send_error(400,
    281                                 "Bad HTTP/0.9 request type (%r)" % command)
    282                 return False
    283         elif not words:
    284             return False
    285         else:
    286             self.send_error(400, "Bad request syntax (%r)" % requestline)
    287             return False
    288         self.command, self.path, self.request_version = command, path, version
    289 
    290         # Examine the headers and look for a Connection directive
    291         self.headers = self.MessageClass(self.rfile, 0)
    292 
    293         conntype = self.headers.get('Connection', "")
    294         if conntype.lower() == 'close':
    295             self.close_connection = 1
    296         elif (conntype.lower() == 'keep-alive' and
    297               self.protocol_version >= "HTTP/1.1"):
    298             self.close_connection = 0
    299         return True
    300 
    301     def handle_one_request(self):
    302         """Handle a single HTTP request.
    303 
    304         You normally don't need to override this method; see the class
    305         __doc__ string for information on how to handle specific HTTP
    306         commands such as GET and POST.
    307 
    308         """
    309         try:
    310             self.raw_requestline = self.rfile.readline(65537)
    311             if len(self.raw_requestline) > 65536:
    312                 self.requestline = ''
    313                 self.request_version = ''
    314                 self.command = ''
    315                 self.send_error(414)
    316                 return
    317             if not self.raw_requestline:
    318                 self.close_connection = 1
    319                 return
    320             if not self.parse_request():
    321                 # An error code has been sent, just exit
    322                 return
    323             mname = 'do_' + self.command
    324             if not hasattr(self, mname):
    325                 self.send_error(501, "Unsupported method (%r)" % self.command)
    326                 return
    327             method = getattr(self, mname)
    328             method()
    329             self.wfile.flush() #actually send the response if not already done.
    330         except socket.timeout, e:
    331             #a read or a write timed out.  Discard this connection
    332             self.log_error("Request timed out: %r", e)
    333             self.close_connection = 1
    334             return
    335 
    336     def handle(self):
    337         """Handle multiple requests if necessary."""
    338         self.close_connection = 1
    339 
    340         self.handle_one_request()
    341         while not self.close_connection:
    342             self.handle_one_request()
    343 
    344     def send_error(self, code, message=None):
    345         """Send and log an error reply.
    346 
    347         Arguments are the error code, and a detailed message.
    348         The detailed message defaults to the short entry matching the
    349         response code.
    350 
    351         This sends an error response (so it must be called before any
    352         output has been generated), logs the error, and finally sends
    353         a piece of HTML explaining the error to the user.
    354 
    355         """
    356 
    357         try:
    358             short, long = self.responses[code]
    359         except KeyError:
    360             short, long = '???', '???'
    361         if message is None:
    362             message = short
    363         explain = long
    364         self.log_error("code %d, message %s", code, message)
    365         self.send_response(code, message)
    366         self.send_header('Connection', 'close')
    367 
    368         # Message body is omitted for cases described in:
    369         #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
    370         #  - RFC7231: 6.3.6. 205(Reset Content)
    371         content = None
    372         if code >= 200 and code not in (204, 205, 304):
    373             # HTML encode to prevent Cross Site Scripting attacks
    374             # (see bug #1100201)
    375             content = (self.error_message_format % {
    376                 'code': code,
    377                 'message': _quote_html(message),
    378                 'explain': explain
    379             })
    380             self.send_header("Content-Type", self.error_content_type)
    381         self.end_headers()
    382 
    383         if self.command != 'HEAD' and content:
    384             self.wfile.write(content)
    385 
    386     error_message_format = DEFAULT_ERROR_MESSAGE
    387     error_content_type = DEFAULT_ERROR_CONTENT_TYPE
    388 
    389     def send_response(self, code, message=None):
    390         """Send the response header and log the response code.
    391 
    392         Also send two standard headers with the server software
    393         version and the current date.
    394 
    395         """
    396         self.log_request(code)
    397         if message is None:
    398             if code in self.responses:
    399                 message = self.responses[code][0]
    400             else:
    401                 message = ''
    402         if self.request_version != 'HTTP/0.9':
    403             self.wfile.write("%s %d %s\r\n" %
    404                              (self.protocol_version, code, message))
    405             # print (self.protocol_version, code, message)
    406         self.send_header('Server', self.version_string())
    407         self.send_header('Date', self.date_time_string())
    408 
    409     def send_header(self, keyword, value):
    410         """Send a MIME header."""
    411         if self.request_version != 'HTTP/0.9':
    412             self.wfile.write("%s: %s\r\n" % (keyword, value))
    413 
    414         if keyword.lower() == 'connection':
    415             if value.lower() == 'close':
    416                 self.close_connection = 1
    417             elif value.lower() == 'keep-alive':
    418                 self.close_connection = 0
    419 
    420     def end_headers(self):
    421         """Send the blank line ending the MIME headers."""
    422         if self.request_version != 'HTTP/0.9':
    423             self.wfile.write("\r\n")
    424 
    425     def log_request(self, code='-', size='-'):
    426         """Log an accepted request.
    427 
    428         This is called by send_response().
    429 
    430         """
    431 
    432         self.log_message('"%s" %s %s',
    433                          self.requestline, str(code), str(size))
    434 
    435     def log_error(self, format, *args):
    436         """Log an error.
    437 
    438         This is called when a request cannot be fulfilled.  By
    439         default it passes the message on to log_message().
    440 
    441         Arguments are the same as for log_message().
    442 
    443         XXX This should go to the separate error log.
    444 
    445         """
    446 
    447         self.log_message(format, *args)
    448 
    449     def log_message(self, format, *args):
    450         """Log an arbitrary message.
    451 
    452         This is used by all other logging functions.  Override
    453         it if you have specific logging wishes.
    454 
    455         The first argument, FORMAT, is a format string for the
    456         message to be logged.  If the format string contains
    457         any % escapes requiring parameters, they should be
    458         specified as subsequent arguments (it's just like
    459         printf!).
    460 
    461         The client ip address and current date/time are prefixed to every
    462         message.
    463 
    464         """
    465 
    466         sys.stderr.write("%s - - [%s] %s\n" %
    467                          (self.client_address[0],
    468                           self.log_date_time_string(),
    469                           format%args))
    470 
    471     def version_string(self):
    472         """Return the server software version string."""
    473         return self.server_version + ' ' + self.sys_version
    474 
    475     def date_time_string(self, timestamp=None):
    476         """Return the current date and time formatted for a message header."""
    477         if timestamp is None:
    478             timestamp = time.time()
    479         year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
    480         s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
    481                 self.weekdayname[wd],
    482                 day, self.monthname[month], year,
    483                 hh, mm, ss)
    484         return s
    485 
    486     def log_date_time_string(self):
    487         """Return the current time formatted for logging."""
    488         now = time.time()
    489         year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
    490         s = "%02d/%3s/%04d %02d:%02d:%02d" % (
    491                 day, self.monthname[month], year, hh, mm, ss)
    492         return s
    493 
    494     weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    495 
    496     monthname = [None,
    497                  'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    498                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    499 
    500     def address_string(self):
    501         """Return the client address formatted for logging.
    502 
    503         This version looks up the full hostname using gethostbyaddr(),
    504         and tries to find a name that contains at least one dot.
    505 
    506         """
    507 
    508         host, port = self.client_address[:2]
    509         return socket.getfqdn(host)
    510 
    511     # Essentially static class variables
    512 
    513     # The version of the HTTP protocol we support.
    514     # Set this to HTTP/1.1 to enable automatic keepalive
    515     protocol_version = "HTTP/1.0"
    516 
    517     # The Message-like class used to parse headers
    518     MessageClass = mimetools.Message
    519 
    520     # Table mapping response codes to messages; entries have the
    521     # form {code: (shortmessage, longmessage)}.
    522     # See RFC 2616.
    523     responses = {
    524         100: ('Continue', 'Request received, please continue'),
    525         101: ('Switching Protocols',
    526               'Switching to new protocol; obey Upgrade header'),
    527 
    528         200: ('OK', 'Request fulfilled, document follows'),
    529         201: ('Created', 'Document created, URL follows'),
    530         202: ('Accepted',
    531               'Request accepted, processing continues off-line'),
    532         203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
    533         204: ('No Content', 'Request fulfilled, nothing follows'),
    534         205: ('Reset Content', 'Clear input form for further input.'),
    535         206: ('Partial Content', 'Partial content follows.'),
    536 
    537         300: ('Multiple Choices',
    538               'Object has several resources -- see URI list'),
    539         301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
    540         302: ('Found', 'Object moved temporarily -- see URI list'),
    541         303: ('See Other', 'Object moved -- see Method and URL list'),
    542         304: ('Not Modified',
    543               'Document has not changed since given time'),
    544         305: ('Use Proxy',
    545               'You must use proxy specified in Location to access this '
    546               'resource.'),
    547         307: ('Temporary Redirect',
    548               'Object moved temporarily -- see URI list'),
    549 
    550         400: ('Bad Request',
    551               'Bad request syntax or unsupported method'),
    552         401: ('Unauthorized',
    553               'No permission -- see authorization schemes'),
    554         402: ('Payment Required',
    555               'No payment -- see charging schemes'),
    556         403: ('Forbidden',
    557               'Request forbidden -- authorization will not help'),
    558         404: ('Not Found', 'Nothing matches the given URI'),
    559         405: ('Method Not Allowed',
    560               'Specified method is invalid for this resource.'),
    561         406: ('Not Acceptable', 'URI not available in preferred format.'),
    562         407: ('Proxy Authentication Required', 'You must authenticate with '
    563               'this proxy before proceeding.'),
    564         408: ('Request Timeout', 'Request timed out; try again later.'),
    565         409: ('Conflict', 'Request conflict.'),
    566         410: ('Gone',
    567               'URI no longer exists and has been permanently removed.'),
    568         411: ('Length Required', 'Client must specify Content-Length.'),
    569         412: ('Precondition Failed', 'Precondition in headers is false.'),
    570         413: ('Request Entity Too Large', 'Entity is too large.'),
    571         414: ('Request-URI Too Long', 'URI is too long.'),
    572         415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
    573         416: ('Requested Range Not Satisfiable',
    574               'Cannot satisfy request range.'),
    575         417: ('Expectation Failed',
    576               'Expect condition could not be satisfied.'),
    577 
    578         500: ('Internal Server Error', 'Server got itself in trouble'),
    579         501: ('Not Implemented',
    580               'Server does not support this operation'),
    581         502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
    582         503: ('Service Unavailable',
    583               'The server cannot process the request due to a high load'),
    584         504: ('Gateway Timeout',
    585               'The gateway server did not receive a timely response'),
    586         505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
    587         }
    588 
    589 
    590 def test(HandlerClass = BaseHTTPRequestHandler,
    591          ServerClass = HTTPServer, protocol="HTTP/1.0"):
    592     """Test the HTTP request handler class.
    593 
    594     This runs an HTTP server on port 8000 (or the first command line
    595     argument).
    596 
    597     """
    598 
    599     if sys.argv[1:]:
    600         port = int(sys.argv[1])
    601     else:
    602         port = 8000
    603     server_address = ('', port)
    604 
    605     HandlerClass.protocol_version = protocol
    606     httpd = ServerClass(server_address, HandlerClass)
    607 
    608     sa = httpd.socket.getsockname()
    609     print "Serving HTTP on", sa[0], "port", sa[1], "..."
    610     httpd.serve_forever()
    611 
    612 
    613 if __name__ == '__main__':
    614     test()
    615