Home | History | Annotate | Download | only in Lib
      1 """HTTP server base class.
      2 
      3 Note: the class in this module doesn't implement any HTTP request; see
      4 SimpleHTTPServer for simple implementations of GET, HEAD and POST
      5 (including CGI scripts).  It does, however, optionally implement HTTP/1.1
      6 persistent connections, as of version 0.3.
      7 
      8 Contents:
      9 
     10 - BaseHTTPRequestHandler: HTTP request handler base class
     11 - test: test function
     12 
     13 XXX To do:
     14 
     15 - log requests even later (to capture byte count)
     16 - log user-agent header and other interesting goodies
     17 - send error log to separate file
     18 """
     19 
     20 
     21 # See also:

     22 #

     23 # HTTP Working Group                                        T. Berners-Lee

     24 # INTERNET-DRAFT                                            R. T. Fielding

     25 # <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen

     26 # Expires September 8, 1995                                  March 8, 1995

     27 #

     28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt

     29 #

     30 # and

     31 #

     32 # Network Working Group                                      R. Fielding

     33 # Request for Comments: 2616                                       et al

     34 # Obsoletes: 2068                                              June 1999

     35 # Category: Standards Track

     36 #

     37 # URL: http://www.faqs.org/rfcs/rfc2616.html

     38 
     39 # Log files

     40 # ---------

     41 #

     42 # Here's a quote from the NCSA httpd docs about log file format.

     43 #

     44 # | The logfile format is as follows. Each line consists of:

     45 # |

     46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb

     47 # |

     48 # |        host: Either the DNS name or the IP number of the remote client

     49 # |        rfc931: Any information returned by identd for this person,

     50 # |                - otherwise.

     51 # |        authuser: If user sent a userid for authentication, the user name,

     52 # |                  - otherwise.

     53 # |        DD: Day

     54 # |        Mon: Month (calendar name)

     55 # |        YYYY: Year

     56 # |        hh: hour (24-hour format, the machine's timezone)

     57 # |        mm: minutes

     58 # |        ss: seconds

     59 # |        request: The first line of the HTTP request as sent by the client.

     60 # |        ddd: the status code returned by the server, - if not available.

     61 # |        bbbb: the total number of bytes sent,

     62 # |              *not including the HTTP/1.0 header*, - if not available

     63 # |

     64 # | You can determine the name of the file accessed through request.

     65 #

     66 # (Actually, the latter is only true if you know the server configuration

     67 # at the time the request was made!)

     68 
     69 __version__ = "0.3"
     70 
     71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
     72 
     73 import sys
     74 import time
     75 import socket # For gethostbyaddr()

     76 from warnings import filterwarnings, catch_warnings
     77 with catch_warnings():
     78     if sys.py3kwarning:
     79         filterwarnings("ignore", ".*mimetools has been removed",
     80                         DeprecationWarning)
     81     import mimetools
     82 import SocketServer
     83 
     84 # Default error message template

     85 DEFAULT_ERROR_MESSAGE = """\
     86 <head>
     87 <title>Error response</title>
     88 </head>
     89 <body>
     90 <h1>Error response</h1>
     91 <p>Error code %(code)d.
     92 <p>Message: %(message)s.
     93 <p>Error code explanation: %(code)s = %(explain)s.
     94 </body>
     95 """
     96 
     97 DEFAULT_ERROR_CONTENT_TYPE = "text/html"
     98 
     99 def _quote_html(html):
    100     return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
    101 
    102 class HTTPServer(SocketServer.TCPServer):
    103 
    104     allow_reuse_address = 1    # Seems to make sense in testing environment

    105 
    106     def server_bind(self):
    107         """Override server_bind to store the server name."""
    108         SocketServer.TCPServer.server_bind(self)
    109         host, port = self.socket.getsockname()[:2]
    110         self.server_name = socket.getfqdn(host)
    111         self.server_port = port
    112 
    113 
    114 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
    115 
    116     """HTTP request handler base class.
    117 
    118     The following explanation of HTTP serves to guide you through the
    119     code as well as to expose any misunderstandings I may have about
    120     HTTP (so you don't need to read the code to figure out I'm wrong
    121     :-).
    122 
    123     HTTP (HyperText Transfer Protocol) is an extensible protocol on
    124     top of a reliable stream transport (e.g. TCP/IP).  The protocol
    125     recognizes three parts to a request:
    126 
    127     1. One line identifying the request type and path
    128     2. An optional set of RFC-822-style headers
    129     3. An optional data part
    130 
    131     The headers and data are separated by a blank line.
    132 
    133     The first line of the request has the form
    134 
    135     <command> <path> <version>
    136 
    137     where <command> is a (case-sensitive) keyword such as GET or POST,
    138     <path> is a string containing path information for the request,
    139     and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    140     <path> is encoded using the URL encoding scheme (using %xx to signify
    141     the ASCII character with hex code xx).
    142 
    143     The specification specifies that lines are separated by CRLF but
    144     for compatibility with the widest range of clients recommends
    145     servers also handle LF.  Similarly, whitespace in the request line
    146     is treated sensibly (allowing multiple spaces between components
    147     and allowing trailing whitespace).
    148 
    149     Similarly, for output, lines ought to be separated by CRLF pairs
    150     but most clients grok LF characters just fine.
    151 
    152     If the first line of the request has the form
    153 
    154     <command> <path>
    155 
    156     (i.e. <version> is left out) then this is assumed to be an HTTP
    157     0.9 request; this form has no optional headers and data part and
    158     the reply consists of just the data.
    159 
    160     The reply form of the HTTP 1.x protocol again has three parts:
    161 
    162     1. One line giving the response code
    163     2. An optional set of RFC-822-style headers
    164     3. The data
    165 
    166     Again, the headers and data are separated by a blank line.
    167 
    168     The response code line has the form
    169 
    170     <version> <responsecode> <responsestring>
    171 
    172     where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
    173     <responsecode> is a 3-digit response code indicating success or
    174     failure of the request, and <responsestring> is an optional
    175     human-readable string explaining what the response code means.
    176 
    177     This server parses the request and the headers, and then calls a
    178     function specific to the request type (<command>).  Specifically,
    179     a request SPAM will be handled by a method do_SPAM().  If no
    180     such method exists the server sends an error response to the
    181     client.  If it exists, it is called with no arguments:
    182 
    183     do_SPAM()
    184 
    185     Note that the request name is case sensitive (i.e. SPAM and spam
    186     are different requests).
    187 
    188     The various request details are stored in instance variables:
    189 
    190     - client_address is the client IP address in the form (host,
    191     port);
    192 
    193     - command, path and version are the broken-down request line;
    194 
    195     - headers is an instance of mimetools.Message (or a derived
    196     class) containing the header information;
    197 
    198     - rfile is a file object open for reading positioned at the
    199     start of the optional input data part;
    200 
    201     - wfile is a file object open for writing.
    202 
    203     IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
    204 
    205     The first thing to be written must be the response line.  Then
    206     follow 0 or more header lines, then a blank line, and then the
    207     actual data (if any).  The meaning of the header lines depends on
    208     the command executed by the server; in most cases, when data is
    209     returned, there should be at least one header line of the form
    210 
    211     Content-type: <type>/<subtype>
    212 
    213     where <type> and <subtype> should be registered MIME types,
    214     e.g. "text/html" or "text/plain".
    215 
    216     """
    217 
    218     # The Python system version, truncated to its first component.

    219     sys_version = "Python/" + sys.version.split()[0]
    220 
    221     # The server software version.  You may want to override this.

    222     # The format is multiple whitespace-separated strings,

    223     # where each string is of the form name[/version].

    224     server_version = "BaseHTTP/" + __version__
    225 
    226     # The default request version.  This only affects responses up until

    227     # the point where the request line is parsed, so it mainly decides what

    228     # the client gets back when sending a malformed request line.

    229     # Most web servers default to HTTP 0.9, i.e. don't send a status line.

    230     default_request_version = "HTTP/0.9"
    231 
    232     def parse_request(self):
    233         """Parse a request (internal).
    234 
    235         The request should be stored in self.raw_requestline; the results
    236         are in self.command, self.path, self.request_version and
    237         self.headers.
    238 
    239         Return True for success, False for failure; on failure, an
    240         error is sent back.
    241 
    242         """
    243         self.command = None  # set in case of error on the first line

    244         self.request_version = version = self.default_request_version
    245         self.close_connection = 1
    246         requestline = self.raw_requestline
    247         requestline = requestline.rstrip('\r\n')
    248         self.requestline = requestline
    249         words = requestline.split()
    250         if len(words) == 3:
    251             command, path, version = words
    252             if version[:5] != 'HTTP/':
    253                 self.send_error(400, "Bad request version (%r)" % version)
    254                 return False
    255             try:
    256                 base_version_number = version.split('/', 1)[1]
    257                 version_number = base_version_number.split(".")
    258                 # RFC 2145 section 3.1 says there can be only one "." and

    259                 #   - major and minor numbers MUST be treated as

    260                 #      separate integers;

    261                 #   - HTTP/2.4 is a lower version than HTTP/2.13, which in

    262                 #      turn is lower than HTTP/12.3;

    263                 #   - Leading zeros MUST be ignored by recipients.

    264                 if len(version_number) != 2:
    265                     raise ValueError
    266                 version_number = int(version_number[0]), int(version_number[1])
    267             except (ValueError, IndexError):
    268                 self.send_error(400, "Bad request version (%r)" % version)
    269                 return False
    270             if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
    271                 self.close_connection = 0
    272             if version_number >= (2, 0):
    273                 self.send_error(505,
    274                           "Invalid HTTP Version (%s)" % base_version_number)
    275                 return False
    276         elif len(words) == 2:
    277             command, path = words
    278             self.close_connection = 1
    279             if command != 'GET':
    280                 self.send_error(400,
    281                                 "Bad HTTP/0.9 request type (%r)" % command)
    282                 return False
    283         elif not words:
    284             return False
    285         else:
    286             self.send_error(400, "Bad request syntax (%r)" % requestline)
    287             return False
    288         self.command, self.path, self.request_version = command, path, version
    289 
    290         # Examine the headers and look for a Connection directive

    291         self.headers = self.MessageClass(self.rfile, 0)
    292 
    293         conntype = self.headers.get('Connection', "")
    294         if conntype.lower() == 'close':
    295             self.close_connection = 1
    296         elif (conntype.lower() == 'keep-alive' and
    297               self.protocol_version >= "HTTP/1.1"):
    298             self.close_connection = 0
    299         return True
    300 
    301     def handle_one_request(self):
    302         """Handle a single HTTP request.
    303 
    304         You normally don't need to override this method; see the class
    305         __doc__ string for information on how to handle specific HTTP
    306         commands such as GET and POST.
    307 
    308         """
    309         try:
    310             self.raw_requestline = self.rfile.readline(65537)
    311             if len(self.raw_requestline) > 65536:
    312                 self.requestline = ''
    313                 self.request_version = ''
    314                 self.command = ''
    315                 self.send_error(414)
    316                 return
    317             if not self.raw_requestline:
    318                 self.close_connection = 1
    319                 return
    320             if not self.parse_request():
    321                 # An error code has been sent, just exit

    322                 return
    323             mname = 'do_' + self.command
    324             if not hasattr(self, mname):
    325                 self.send_error(501, "Unsupported method (%r)" % self.command)
    326                 return
    327             method = getattr(self, mname)
    328             method()
    329             self.wfile.flush() #actually send the response if not already done.

    330         except socket.timeout, e:
    331             #a read or a write timed out.  Discard this connection

    332             self.log_error("Request timed out: %r", e)
    333             self.close_connection = 1
    334             return
    335 
    336     def handle(self):
    337         """Handle multiple requests if necessary."""
    338         self.close_connection = 1
    339 
    340         self.handle_one_request()
    341         while not self.close_connection:
    342             self.handle_one_request()
    343 
    344     def send_error(self, code, message=None):
    345         """Send and log an error reply.
    346 
    347         Arguments are the error code, and a detailed message.
    348         The detailed message defaults to the short entry matching the
    349         response code.
    350 
    351         This sends an error response (so it must be called before any
    352         output has been generated), logs the error, and finally sends
    353         a piece of HTML explaining the error to the user.
    354 
    355         """
    356 
    357         try:
    358             short, long = self.responses[code]
    359         except KeyError:
    360             short, long = '???', '???'
    361         if message is None:
    362             message = short
    363         explain = long
    364         self.log_error("code %d, message %s", code, message)
    365         # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)

    366         content = (self.error_message_format %
    367                    {'code': code, 'message': _quote_html(message), 'explain': explain})
    368         self.send_response(code, message)
    369         self.send_header("Content-Type", self.error_content_type)
    370         self.send_header('Connection', 'close')
    371         self.end_headers()
    372         if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
    373             self.wfile.write(content)
    374 
    375     error_message_format = DEFAULT_ERROR_MESSAGE
    376     error_content_type = DEFAULT_ERROR_CONTENT_TYPE
    377 
    378     def send_response(self, code, message=None):
    379         """Send the response header and log the response code.
    380 
    381         Also send two standard headers with the server software
    382         version and the current date.
    383 
    384         """
    385         self.log_request(code)
    386         if message is None:
    387             if code in self.responses:
    388                 message = self.responses[code][0]
    389             else:
    390                 message = ''
    391         if self.request_version != 'HTTP/0.9':
    392             self.wfile.write("%s %d %s\r\n" %
    393                              (self.protocol_version, code, message))
    394             # print (self.protocol_version, code, message)

    395         self.send_header('Server', self.version_string())
    396         self.send_header('Date', self.date_time_string())
    397 
    398     def send_header(self, keyword, value):
    399         """Send a MIME header."""
    400         if self.request_version != 'HTTP/0.9':
    401             self.wfile.write("%s: %s\r\n" % (keyword, value))
    402 
    403         if keyword.lower() == 'connection':
    404             if value.lower() == 'close':
    405                 self.close_connection = 1
    406             elif value.lower() == 'keep-alive':
    407                 self.close_connection = 0
    408 
    409     def end_headers(self):
    410         """Send the blank line ending the MIME headers."""
    411         if self.request_version != 'HTTP/0.9':
    412             self.wfile.write("\r\n")
    413 
    414     def log_request(self, code='-', size='-'):
    415         """Log an accepted request.
    416 
    417         This is called by send_response().
    418 
    419         """
    420 
    421         self.log_message('"%s" %s %s',
    422                          self.requestline, str(code), str(size))
    423 
    424     def log_error(self, format, *args):
    425         """Log an error.
    426 
    427         This is called when a request cannot be fulfilled.  By
    428         default it passes the message on to log_message().
    429 
    430         Arguments are the same as for log_message().
    431 
    432         XXX This should go to the separate error log.
    433 
    434         """
    435 
    436         self.log_message(format, *args)
    437 
    438     def log_message(self, format, *args):
    439         """Log an arbitrary message.
    440 
    441         This is used by all other logging functions.  Override
    442         it if you have specific logging wishes.
    443 
    444         The first argument, FORMAT, is a format string for the
    445         message to be logged.  If the format string contains
    446         any % escapes requiring parameters, they should be
    447         specified as subsequent arguments (it's just like
    448         printf!).
    449 
    450         The client ip address and current date/time are prefixed to every
    451         message.
    452 
    453         """
    454 
    455         sys.stderr.write("%s - - [%s] %s\n" %
    456                          (self.client_address[0],
    457                           self.log_date_time_string(),
    458                           format%args))
    459 
    460     def version_string(self):
    461         """Return the server software version string."""
    462         return self.server_version + ' ' + self.sys_version
    463 
    464     def date_time_string(self, timestamp=None):
    465         """Return the current date and time formatted for a message header."""
    466         if timestamp is None:
    467             timestamp = time.time()
    468         year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
    469         s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
    470                 self.weekdayname[wd],
    471                 day, self.monthname[month], year,
    472                 hh, mm, ss)
    473         return s
    474 
    475     def log_date_time_string(self):
    476         """Return the current time formatted for logging."""
    477         now = time.time()
    478         year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
    479         s = "%02d/%3s/%04d %02d:%02d:%02d" % (
    480                 day, self.monthname[month], year, hh, mm, ss)
    481         return s
    482 
    483     weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    484 
    485     monthname = [None,
    486                  'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    487                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    488 
    489     def address_string(self):
    490         """Return the client address formatted for logging.
    491 
    492         This version looks up the full hostname using gethostbyaddr(),
    493         and tries to find a name that contains at least one dot.
    494 
    495         """
    496 
    497         host, port = self.client_address[:2]
    498         return socket.getfqdn(host)
    499 
    500     # Essentially static class variables

    501 
    502     # The version of the HTTP protocol we support.

    503     # Set this to HTTP/1.1 to enable automatic keepalive

    504     protocol_version = "HTTP/1.0"
    505 
    506     # The Message-like class used to parse headers

    507     MessageClass = mimetools.Message
    508 
    509     # Table mapping response codes to messages; entries have the

    510     # form {code: (shortmessage, longmessage)}.

    511     # See RFC 2616.

    512     responses = {
    513         100: ('Continue', 'Request received, please continue'),
    514         101: ('Switching Protocols',
    515               'Switching to new protocol; obey Upgrade header'),
    516 
    517         200: ('OK', 'Request fulfilled, document follows'),
    518         201: ('Created', 'Document created, URL follows'),
    519         202: ('Accepted',
    520               'Request accepted, processing continues off-line'),
    521         203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
    522         204: ('No Content', 'Request fulfilled, nothing follows'),
    523         205: ('Reset Content', 'Clear input form for further input.'),
    524         206: ('Partial Content', 'Partial content follows.'),
    525 
    526         300: ('Multiple Choices',
    527               'Object has several resources -- see URI list'),
    528         301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
    529         302: ('Found', 'Object moved temporarily -- see URI list'),
    530         303: ('See Other', 'Object moved -- see Method and URL list'),
    531         304: ('Not Modified',
    532               'Document has not changed since given time'),
    533         305: ('Use Proxy',
    534               'You must use proxy specified in Location to access this '
    535               'resource.'),
    536         307: ('Temporary Redirect',
    537               'Object moved temporarily -- see URI list'),
    538 
    539         400: ('Bad Request',
    540               'Bad request syntax or unsupported method'),
    541         401: ('Unauthorized',
    542               'No permission -- see authorization schemes'),
    543         402: ('Payment Required',
    544               'No payment -- see charging schemes'),
    545         403: ('Forbidden',
    546               'Request forbidden -- authorization will not help'),
    547         404: ('Not Found', 'Nothing matches the given URI'),
    548         405: ('Method Not Allowed',
    549               'Specified method is invalid for this resource.'),
    550         406: ('Not Acceptable', 'URI not available in preferred format.'),
    551         407: ('Proxy Authentication Required', 'You must authenticate with '
    552               'this proxy before proceeding.'),
    553         408: ('Request Timeout', 'Request timed out; try again later.'),
    554         409: ('Conflict', 'Request conflict.'),
    555         410: ('Gone',
    556               'URI no longer exists and has been permanently removed.'),
    557         411: ('Length Required', 'Client must specify Content-Length.'),
    558         412: ('Precondition Failed', 'Precondition in headers is false.'),
    559         413: ('Request Entity Too Large', 'Entity is too large.'),
    560         414: ('Request-URI Too Long', 'URI is too long.'),
    561         415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
    562         416: ('Requested Range Not Satisfiable',
    563               'Cannot satisfy request range.'),
    564         417: ('Expectation Failed',
    565               'Expect condition could not be satisfied.'),
    566 
    567         500: ('Internal Server Error', 'Server got itself in trouble'),
    568         501: ('Not Implemented',
    569               'Server does not support this operation'),
    570         502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
    571         503: ('Service Unavailable',
    572               'The server cannot process the request due to a high load'),
    573         504: ('Gateway Timeout',
    574               'The gateway server did not receive a timely response'),
    575         505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
    576         }
    577 
    578 
    579 def test(HandlerClass = BaseHTTPRequestHandler,
    580          ServerClass = HTTPServer, protocol="HTTP/1.0"):
    581     """Test the HTTP request handler class.
    582 
    583     This runs an HTTP server on port 8000 (or the first command line
    584     argument).
    585 
    586     """
    587 
    588     if sys.argv[1:]:
    589         port = int(sys.argv[1])
    590     else:
    591         port = 8000
    592     server_address = ('', port)
    593 
    594     HandlerClass.protocol_version = protocol
    595     httpd = ServerClass(server_address, HandlerClass)
    596 
    597     sa = httpd.socket.getsockname()
    598     print "Serving HTTP on", sa[0], "port", sa[1], "..."
    599     httpd.serve_forever()
    600 
    601 
    602 if __name__ == '__main__':
    603     test()
    604