Home | History | Annotate | Download | only in http
      1 """HTTP server classes.
      2 
      3 Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
      4 SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
      5 and CGIHTTPRequestHandler for CGI scripts.
      6 
      7 It does, however, optionally implement HTTP/1.1 persistent connections,
      8 as of version 0.3.
      9 
     10 Notes on CGIHTTPRequestHandler
     11 ------------------------------
     12 
     13 This class implements GET and POST requests to cgi-bin scripts.
     14 
     15 If the os.fork() function is not present (e.g. on Windows),
     16 subprocess.Popen() is used as a fallback, with slightly altered semantics.
     17 
     18 In all cases, the implementation is intentionally naive -- all
     19 requests are executed synchronously.
     20 
     21 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
     22 -- it may execute arbitrary Python code or external programs.
     23 
     24 Note that status code 200 is sent prior to execution of a CGI script, so
     25 scripts cannot send other status codes such as 302 (redirect).
     26 
     27 XXX To do:
     28 
     29 - log requests even later (to capture byte count)
     30 - log user-agent header and other interesting goodies
     31 - send error log to separate file
     32 """
     33 
     34 
     35 # See also:
     36 #
     37 # HTTP Working Group                                        T. Berners-Lee
     38 # INTERNET-DRAFT                                            R. T. Fielding
     39 # <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
     40 # Expires September 8, 1995                                  March 8, 1995
     41 #
     42 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
     43 #
     44 # and
     45 #
     46 # Network Working Group                                      R. Fielding
     47 # Request for Comments: 2616                                       et al
     48 # Obsoletes: 2068                                              June 1999
     49 # Category: Standards Track
     50 #
     51 # URL: http://www.faqs.org/rfcs/rfc2616.html
     52 
     53 # Log files
     54 # ---------
     55 #
     56 # Here's a quote from the NCSA httpd docs about log file format.
     57 #
     58 # | The logfile format is as follows. Each line consists of:
     59 # |
     60 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
     61 # |
     62 # |        host: Either the DNS name or the IP number of the remote client
     63 # |        rfc931: Any information returned by identd for this person,
     64 # |                - otherwise.
     65 # |        authuser: If user sent a userid for authentication, the user name,
     66 # |                  - otherwise.
     67 # |        DD: Day
     68 # |        Mon: Month (calendar name)
     69 # |        YYYY: Year
     70 # |        hh: hour (24-hour format, the machine's timezone)
     71 # |        mm: minutes
     72 # |        ss: seconds
     73 # |        request: The first line of the HTTP request as sent by the client.
     74 # |        ddd: the status code returned by the server, - if not available.
     75 # |        bbbb: the total number of bytes sent,
     76 # |              *not including the HTTP/1.0 header*, - if not available
     77 # |
     78 # | You can determine the name of the file accessed through request.
     79 #
     80 # (Actually, the latter is only true if you know the server configuration
     81 # at the time the request was made!)
     82 
     83 __version__ = "0.6"
     84 
     85 __all__ = [
     86     "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
     87     "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
     88 ]
     89 
     90 import copy
     91 import datetime
     92 import email.utils
     93 import html
     94 import http.client
     95 import io
     96 import mimetypes
     97 import os
     98 import posixpath
     99 import select
    100 import shutil
    101 import socket # For gethostbyaddr()
    102 import socketserver
    103 import sys
    104 import time
    105 import urllib.parse
    106 from functools import partial
    107 
    108 from http import HTTPStatus
    109 
    110 
    111 # Default error message template
    112 DEFAULT_ERROR_MESSAGE = """\
    113 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    114         "http://www.w3.org/TR/html4/strict.dtd">
    115 <html>
    116     <head>
    117         <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
    118         <title>Error response</title>
    119     </head>
    120     <body>
    121         <h1>Error response</h1>
    122         <p>Error code: %(code)d</p>
    123         <p>Message: %(message)s.</p>
    124         <p>Error code explanation: %(code)s - %(explain)s.</p>
    125     </body>
    126 </html>
    127 """
    128 
    129 DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
    130 
    131 class HTTPServer(socketserver.TCPServer):
    132 
    133     allow_reuse_address = 1    # Seems to make sense in testing environment
    134 
    135     def server_bind(self):
    136         """Override server_bind to store the server name."""
    137         socketserver.TCPServer.server_bind(self)
    138         host, port = self.server_address[:2]
    139         self.server_name = socket.getfqdn(host)
    140         self.server_port = port
    141 
    142 
    143 class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
    144     daemon_threads = True
    145 
    146 
    147 class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
    148 
    149     """HTTP request handler base class.
    150 
    151     The following explanation of HTTP serves to guide you through the
    152     code as well as to expose any misunderstandings I may have about
    153     HTTP (so you don't need to read the code to figure out I'm wrong
    154     :-).
    155 
    156     HTTP (HyperText Transfer Protocol) is an extensible protocol on
    157     top of a reliable stream transport (e.g. TCP/IP).  The protocol
    158     recognizes three parts to a request:
    159 
    160     1. One line identifying the request type and path
    161     2. An optional set of RFC-822-style headers
    162     3. An optional data part
    163 
    164     The headers and data are separated by a blank line.
    165 
    166     The first line of the request has the form
    167 
    168     <command> <path> <version>
    169 
    170     where <command> is a (case-sensitive) keyword such as GET or POST,
    171     <path> is a string containing path information for the request,
    172     and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
    173     <path> is encoded using the URL encoding scheme (using %xx to signify
    174     the ASCII character with hex code xx).
    175 
    176     The specification specifies that lines are separated by CRLF but
    177     for compatibility with the widest range of clients recommends
    178     servers also handle LF.  Similarly, whitespace in the request line
    179     is treated sensibly (allowing multiple spaces between components
    180     and allowing trailing whitespace).
    181 
    182     Similarly, for output, lines ought to be separated by CRLF pairs
    183     but most clients grok LF characters just fine.
    184 
    185     If the first line of the request has the form
    186 
    187     <command> <path>
    188 
    189     (i.e. <version> is left out) then this is assumed to be an HTTP
    190     0.9 request; this form has no optional headers and data part and
    191     the reply consists of just the data.
    192 
    193     The reply form of the HTTP 1.x protocol again has three parts:
    194 
    195     1. One line giving the response code
    196     2. An optional set of RFC-822-style headers
    197     3. The data
    198 
    199     Again, the headers and data are separated by a blank line.
    200 
    201     The response code line has the form
    202 
    203     <version> <responsecode> <responsestring>
    204 
    205     where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
    206     <responsecode> is a 3-digit response code indicating success or
    207     failure of the request, and <responsestring> is an optional
    208     human-readable string explaining what the response code means.
    209 
    210     This server parses the request and the headers, and then calls a
    211     function specific to the request type (<command>).  Specifically,
    212     a request SPAM will be handled by a method do_SPAM().  If no
    213     such method exists the server sends an error response to the
    214     client.  If it exists, it is called with no arguments:
    215 
    216     do_SPAM()
    217 
    218     Note that the request name is case sensitive (i.e. SPAM and spam
    219     are different requests).
    220 
    221     The various request details are stored in instance variables:
    222 
    223     - client_address is the client IP address in the form (host,
    224     port);
    225 
    226     - command, path and version are the broken-down request line;
    227 
    228     - headers is an instance of email.message.Message (or a derived
    229     class) containing the header information;
    230 
    231     - rfile is a file object open for reading positioned at the
    232     start of the optional input data part;
    233 
    234     - wfile is a file object open for writing.
    235 
    236     IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
    237 
    238     The first thing to be written must be the response line.  Then
    239     follow 0 or more header lines, then a blank line, and then the
    240     actual data (if any).  The meaning of the header lines depends on
    241     the command executed by the server; in most cases, when data is
    242     returned, there should be at least one header line of the form
    243 
    244     Content-type: <type>/<subtype>
    245 
    246     where <type> and <subtype> should be registered MIME types,
    247     e.g. "text/html" or "text/plain".
    248 
    249     """
    250 
    251     # The Python system version, truncated to its first component.
    252     sys_version = "Python/" + sys.version.split()[0]
    253 
    254     # The server software version.  You may want to override this.
    255     # The format is multiple whitespace-separated strings,
    256     # where each string is of the form name[/version].
    257     server_version = "BaseHTTP/" + __version__
    258 
    259     error_message_format = DEFAULT_ERROR_MESSAGE
    260     error_content_type = DEFAULT_ERROR_CONTENT_TYPE
    261 
    262     # The default request version.  This only affects responses up until
    263     # the point where the request line is parsed, so it mainly decides what
    264     # the client gets back when sending a malformed request line.
    265     # Most web servers default to HTTP 0.9, i.e. don't send a status line.
    266     default_request_version = "HTTP/0.9"
    267 
    268     def parse_request(self):
    269         """Parse a request (internal).
    270 
    271         The request should be stored in self.raw_requestline; the results
    272         are in self.command, self.path, self.request_version and
    273         self.headers.
    274 
    275         Return True for success, False for failure; on failure, any relevant
    276         error response has already been sent back.
    277 
    278         """
    279         self.command = None  # set in case of error on the first line
    280         self.request_version = version = self.default_request_version
    281         self.close_connection = True
    282         requestline = str(self.raw_requestline, 'iso-8859-1')
    283         requestline = requestline.rstrip('\r\n')
    284         self.requestline = requestline
    285         words = requestline.split()
    286         if len(words) == 0:
    287             return False
    288 
    289         if len(words) >= 3:  # Enough to determine protocol version
    290             version = words[-1]
    291             try:
    292                 if not version.startswith('HTTP/'):
    293                     raise ValueError
    294                 base_version_number = version.split('/', 1)[1]
    295                 version_number = base_version_number.split(".")
    296                 # RFC 2145 section 3.1 says there can be only one "." and
    297                 #   - major and minor numbers MUST be treated as
    298                 #      separate integers;
    299                 #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
    300                 #      turn is lower than HTTP/12.3;
    301                 #   - Leading zeros MUST be ignored by recipients.
    302                 if len(version_number) != 2:
    303                     raise ValueError
    304                 version_number = int(version_number[0]), int(version_number[1])
    305             except (ValueError, IndexError):
    306                 self.send_error(
    307                     HTTPStatus.BAD_REQUEST,
    308                     "Bad request version (%r)" % version)
    309                 return False
    310             if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
    311                 self.close_connection = False
    312             if version_number >= (2, 0):
    313                 self.send_error(
    314                     HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
    315                     "Invalid HTTP version (%s)" % base_version_number)
    316                 return False
    317             self.request_version = version
    318 
    319         if not 2 <= len(words) <= 3:
    320             self.send_error(
    321                 HTTPStatus.BAD_REQUEST,
    322                 "Bad request syntax (%r)" % requestline)
    323             return False
    324         command, path = words[:2]
    325         if len(words) == 2:
    326             self.close_connection = True
    327             if command != 'GET':
    328                 self.send_error(
    329                     HTTPStatus.BAD_REQUEST,
    330                     "Bad HTTP/0.9 request type (%r)" % command)
    331                 return False
    332         self.command, self.path = command, path
    333 
    334         # Examine the headers and look for a Connection directive.
    335         try:
    336             self.headers = http.client.parse_headers(self.rfile,
    337                                                      _class=self.MessageClass)
    338         except http.client.LineTooLong as err:
    339             self.send_error(
    340                 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
    341                 "Line too long",
    342                 str(err))
    343             return False
    344         except http.client.HTTPException as err:
    345             self.send_error(
    346                 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
    347                 "Too many headers",
    348                 str(err)
    349             )
    350             return False
    351 
    352         conntype = self.headers.get('Connection', "")
    353         if conntype.lower() == 'close':
    354             self.close_connection = True
    355         elif (conntype.lower() == 'keep-alive' and
    356               self.protocol_version >= "HTTP/1.1"):
    357             self.close_connection = False
    358         # Examine the headers and look for an Expect directive
    359         expect = self.headers.get('Expect', "")
    360         if (expect.lower() == "100-continue" and
    361                 self.protocol_version >= "HTTP/1.1" and
    362                 self.request_version >= "HTTP/1.1"):
    363             if not self.handle_expect_100():
    364                 return False
    365         return True
    366 
    367     def handle_expect_100(self):
    368         """Decide what to do with an "Expect: 100-continue" header.
    369 
    370         If the client is expecting a 100 Continue response, we must
    371         respond with either a 100 Continue or a final response before
    372         waiting for the request body. The default is to always respond
    373         with a 100 Continue. You can behave differently (for example,
    374         reject unauthorized requests) by overriding this method.
    375 
    376         This method should either return True (possibly after sending
    377         a 100 Continue response) or send an error response and return
    378         False.
    379 
    380         """
    381         self.send_response_only(HTTPStatus.CONTINUE)
    382         self.end_headers()
    383         return True
    384 
    385     def handle_one_request(self):
    386         """Handle a single HTTP request.
    387 
    388         You normally don't need to override this method; see the class
    389         __doc__ string for information on how to handle specific HTTP
    390         commands such as GET and POST.
    391 
    392         """
    393         try:
    394             self.raw_requestline = self.rfile.readline(65537)
    395             if len(self.raw_requestline) > 65536:
    396                 self.requestline = ''
    397                 self.request_version = ''
    398                 self.command = ''
    399                 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
    400                 return
    401             if not self.raw_requestline:
    402                 self.close_connection = True
    403                 return
    404             if not self.parse_request():
    405                 # An error code has been sent, just exit
    406                 return
    407             mname = 'do_' + self.command
    408             if not hasattr(self, mname):
    409                 self.send_error(
    410                     HTTPStatus.NOT_IMPLEMENTED,
    411                     "Unsupported method (%r)" % self.command)
    412                 return
    413             method = getattr(self, mname)
    414             method()
    415             self.wfile.flush() #actually send the response if not already done.
    416         except socket.timeout as e:
    417             #a read or a write timed out.  Discard this connection
    418             self.log_error("Request timed out: %r", e)
    419             self.close_connection = True
    420             return
    421 
    422     def handle(self):
    423         """Handle multiple requests if necessary."""
    424         self.close_connection = True
    425 
    426         self.handle_one_request()
    427         while not self.close_connection:
    428             self.handle_one_request()
    429 
    430     def send_error(self, code, message=None, explain=None):
    431         """Send and log an error reply.
    432 
    433         Arguments are
    434         * code:    an HTTP error code
    435                    3 digits
    436         * message: a simple optional 1 line reason phrase.
    437                    *( HTAB / SP / VCHAR / %x80-FF )
    438                    defaults to short entry matching the response code
    439         * explain: a detailed message defaults to the long entry
    440                    matching the response code.
    441 
    442         This sends an error response (so it must be called before any
    443         output has been generated), logs the error, and finally sends
    444         a piece of HTML explaining the error to the user.
    445 
    446         """
    447 
    448         try:
    449             shortmsg, longmsg = self.responses[code]
    450         except KeyError:
    451             shortmsg, longmsg = '???', '???'
    452         if message is None:
    453             message = shortmsg
    454         if explain is None:
    455             explain = longmsg
    456         self.log_error("code %d, message %s", code, message)
    457         self.send_response(code, message)
    458         self.send_header('Connection', 'close')
    459 
    460         # Message body is omitted for cases described in:
    461         #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
    462         #  - RFC7231: 6.3.6. 205(Reset Content)
    463         body = None
    464         if (code >= 200 and
    465             code not in (HTTPStatus.NO_CONTENT,
    466                          HTTPStatus.RESET_CONTENT,
    467                          HTTPStatus.NOT_MODIFIED)):
    468             # HTML encode to prevent Cross Site Scripting attacks
    469             # (see bug #1100201)
    470             content = (self.error_message_format % {
    471                 'code': code,
    472                 'message': html.escape(message, quote=False),
    473                 'explain': html.escape(explain, quote=False)
    474             })
    475             body = content.encode('UTF-8', 'replace')
    476             self.send_header("Content-Type", self.error_content_type)
    477             self.send_header('Content-Length', str(len(body)))
    478         self.end_headers()
    479 
    480         if self.command != 'HEAD' and body:
    481             self.wfile.write(body)
    482 
    483     def send_response(self, code, message=None):
    484         """Add the response header to the headers buffer and log the
    485         response code.
    486 
    487         Also send two standard headers with the server software
    488         version and the current date.
    489 
    490         """
    491         self.log_request(code)
    492         self.send_response_only(code, message)
    493         self.send_header('Server', self.version_string())
    494         self.send_header('Date', self.date_time_string())
    495 
    496     def send_response_only(self, code, message=None):
    497         """Send the response header only."""
    498         if self.request_version != 'HTTP/0.9':
    499             if message is None:
    500                 if code in self.responses:
    501                     message = self.responses[code][0]
    502                 else:
    503                     message = ''
    504             if not hasattr(self, '_headers_buffer'):
    505                 self._headers_buffer = []
    506             self._headers_buffer.append(("%s %d %s\r\n" %
    507                     (self.protocol_version, code, message)).encode(
    508                         'latin-1', 'strict'))
    509 
    510     def send_header(self, keyword, value):
    511         """Send a MIME header to the headers buffer."""
    512         if self.request_version != 'HTTP/0.9':
    513             if not hasattr(self, '_headers_buffer'):
    514                 self._headers_buffer = []
    515             self._headers_buffer.append(
    516                 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
    517 
    518         if keyword.lower() == 'connection':
    519             if value.lower() == 'close':
    520                 self.close_connection = True
    521             elif value.lower() == 'keep-alive':
    522                 self.close_connection = False
    523 
    524     def end_headers(self):
    525         """Send the blank line ending the MIME headers."""
    526         if self.request_version != 'HTTP/0.9':
    527             self._headers_buffer.append(b"\r\n")
    528             self.flush_headers()
    529 
    530     def flush_headers(self):
    531         if hasattr(self, '_headers_buffer'):
    532             self.wfile.write(b"".join(self._headers_buffer))
    533             self._headers_buffer = []
    534 
    535     def log_request(self, code='-', size='-'):
    536         """Log an accepted request.
    537 
    538         This is called by send_response().
    539 
    540         """
    541         if isinstance(code, HTTPStatus):
    542             code = code.value
    543         self.log_message('"%s" %s %s',
    544                          self.requestline, str(code), str(size))
    545 
    546     def log_error(self, format, *args):
    547         """Log an error.
    548 
    549         This is called when a request cannot be fulfilled.  By
    550         default it passes the message on to log_message().
    551 
    552         Arguments are the same as for log_message().
    553 
    554         XXX This should go to the separate error log.
    555 
    556         """
    557 
    558         self.log_message(format, *args)
    559 
    560     def log_message(self, format, *args):
    561         """Log an arbitrary message.
    562 
    563         This is used by all other logging functions.  Override
    564         it if you have specific logging wishes.
    565 
    566         The first argument, FORMAT, is a format string for the
    567         message to be logged.  If the format string contains
    568         any % escapes requiring parameters, they should be
    569         specified as subsequent arguments (it's just like
    570         printf!).
    571 
    572         The client ip and current date/time are prefixed to
    573         every message.
    574 
    575         """
    576 
    577         sys.stderr.write("%s - - [%s] %s\n" %
    578                          (self.address_string(),
    579                           self.log_date_time_string(),
    580                           format%args))
    581 
    582     def version_string(self):
    583         """Return the server software version string."""
    584         return self.server_version + ' ' + self.sys_version
    585 
    586     def date_time_string(self, timestamp=None):
    587         """Return the current date and time formatted for a message header."""
    588         if timestamp is None:
    589             timestamp = time.time()
    590         return email.utils.formatdate(timestamp, usegmt=True)
    591 
    592     def log_date_time_string(self):
    593         """Return the current time formatted for logging."""
    594         now = time.time()
    595         year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
    596         s = "%02d/%3s/%04d %02d:%02d:%02d" % (
    597                 day, self.monthname[month], year, hh, mm, ss)
    598         return s
    599 
    600     weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    601 
    602     monthname = [None,
    603                  'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    604                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    605 
    606     def address_string(self):
    607         """Return the client address."""
    608 
    609         return self.client_address[0]
    610 
    611     # Essentially static class variables
    612 
    613     # The version of the HTTP protocol we support.
    614     # Set this to HTTP/1.1 to enable automatic keepalive
    615     protocol_version = "HTTP/1.0"
    616 
    617     # MessageClass used to parse headers
    618     MessageClass = http.client.HTTPMessage
    619 
    620     # hack to maintain backwards compatibility
    621     responses = {
    622         v: (v.phrase, v.description)
    623         for v in HTTPStatus.__members__.values()
    624     }
    625 
    626 
    627 class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
    628 
    629     """Simple HTTP request handler with GET and HEAD commands.
    630 
    631     This serves files from the current directory and any of its
    632     subdirectories.  The MIME type for files is determined by
    633     calling the .guess_type() method.
    634 
    635     The GET and HEAD requests are identical except that the HEAD
    636     request omits the actual contents of the file.
    637 
    638     """
    639 
    640     server_version = "SimpleHTTP/" + __version__
    641 
    642     def __init__(self, *args, directory=None, **kwargs):
    643         if directory is None:
    644             directory = os.getcwd()
    645         self.directory = directory
    646         super().__init__(*args, **kwargs)
    647 
    648     def do_GET(self):
    649         """Serve a GET request."""
    650         f = self.send_head()
    651         if f:
    652             try:
    653                 self.copyfile(f, self.wfile)
    654             finally:
    655                 f.close()
    656 
    657     def do_HEAD(self):
    658         """Serve a HEAD request."""
    659         f = self.send_head()
    660         if f:
    661             f.close()
    662 
    663     def send_head(self):
    664         """Common code for GET and HEAD commands.
    665 
    666         This sends the response code and MIME headers.
    667 
    668         Return value is either a file object (which has to be copied
    669         to the outputfile by the caller unless the command was HEAD,
    670         and must be closed by the caller under all circumstances), or
    671         None, in which case the caller has nothing further to do.
    672 
    673         """
    674         path = self.translate_path(self.path)
    675         f = None
    676         if os.path.isdir(path):
    677             parts = urllib.parse.urlsplit(self.path)
    678             if not parts.path.endswith('/'):
    679                 # redirect browser - doing basically what apache does
    680                 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
    681                 new_parts = (parts[0], parts[1], parts[2] + '/',
    682                              parts[3], parts[4])
    683                 new_url = urllib.parse.urlunsplit(new_parts)
    684                 self.send_header("Location", new_url)
    685                 self.end_headers()
    686                 return None
    687             for index in "index.html", "index.htm":
    688                 index = os.path.join(path, index)
    689                 if os.path.exists(index):
    690                     path = index
    691                     break
    692             else:
    693                 return self.list_directory(path)
    694         ctype = self.guess_type(path)
    695         try:
    696             f = open(path, 'rb')
    697         except OSError:
    698             self.send_error(HTTPStatus.NOT_FOUND, "File not found")
    699             return None
    700 
    701         try:
    702             fs = os.fstat(f.fileno())
    703             # Use browser cache if possible
    704             if ("If-Modified-Since" in self.headers
    705                     and "If-None-Match" not in self.headers):
    706                 # compare If-Modified-Since and time of last file modification
    707                 try:
    708                     ims = email.utils.parsedate_to_datetime(
    709                         self.headers["If-Modified-Since"])
    710                 except (TypeError, IndexError, OverflowError, ValueError):
    711                     # ignore ill-formed values
    712                     pass
    713                 else:
    714                     if ims.tzinfo is None:
    715                         # obsolete format with no timezone, cf.
    716                         # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
    717                         ims = ims.replace(tzinfo=datetime.timezone.utc)
    718                     if ims.tzinfo is datetime.timezone.utc:
    719                         # compare to UTC datetime of last modification
    720                         last_modif = datetime.datetime.fromtimestamp(
    721                             fs.st_mtime, datetime.timezone.utc)
    722                         # remove microseconds, like in If-Modified-Since
    723                         last_modif = last_modif.replace(microsecond=0)
    724 
    725                         if last_modif <= ims:
    726                             self.send_response(HTTPStatus.NOT_MODIFIED)
    727                             self.end_headers()
    728                             f.close()
    729                             return None
    730 
    731             self.send_response(HTTPStatus.OK)
    732             self.send_header("Content-type", ctype)
    733             self.send_header("Content-Length", str(fs[6]))
    734             self.send_header("Last-Modified",
    735                 self.date_time_string(fs.st_mtime))
    736             self.end_headers()
    737             return f
    738         except:
    739             f.close()
    740             raise
    741 
    742     def list_directory(self, path):
    743         """Helper to produce a directory listing (absent index.html).
    744 
    745         Return value is either a file object, or None (indicating an
    746         error).  In either case, the headers are sent, making the
    747         interface the same as for send_head().
    748 
    749         """
    750         try:
    751             list = os.listdir(path)
    752         except OSError:
    753             self.send_error(
    754                 HTTPStatus.NOT_FOUND,
    755                 "No permission to list directory")
    756             return None
    757         list.sort(key=lambda a: a.lower())
    758         r = []
    759         try:
    760             displaypath = urllib.parse.unquote(self.path,
    761                                                errors='surrogatepass')
    762         except UnicodeDecodeError:
    763             displaypath = urllib.parse.unquote(path)
    764         displaypath = html.escape(displaypath, quote=False)
    765         enc = sys.getfilesystemencoding()
    766         title = 'Directory listing for %s' % displaypath
    767         r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
    768                  '"http://www.w3.org/TR/html4/strict.dtd">')
    769         r.append('<html>\n<head>')
    770         r.append('<meta http-equiv="Content-Type" '
    771                  'content="text/html; charset=%s">' % enc)
    772         r.append('<title>%s</title>\n</head>' % title)
    773         r.append('<body>\n<h1>%s</h1>' % title)
    774         r.append('<hr>\n<ul>')
    775         for name in list:
    776             fullname = os.path.join(path, name)
    777             displayname = linkname = name
    778             # Append / for directories or @ for symbolic links
    779             if os.path.isdir(fullname):
    780                 displayname = name + "/"
    781                 linkname = name + "/"
    782             if os.path.islink(fullname):
    783                 displayname = name + "@"
    784                 # Note: a link to a directory displays with @ and links with /
    785             r.append('<li><a href="%s">%s</a></li>'
    786                     % (urllib.parse.quote(linkname,
    787                                           errors='surrogatepass'),
    788                        html.escape(displayname, quote=False)))
    789         r.append('</ul>\n<hr>\n</body>\n</html>\n')
    790         encoded = '\n'.join(r).encode(enc, 'surrogateescape')
    791         f = io.BytesIO()
    792         f.write(encoded)
    793         f.seek(0)
    794         self.send_response(HTTPStatus.OK)
    795         self.send_header("Content-type", "text/html; charset=%s" % enc)
    796         self.send_header("Content-Length", str(len(encoded)))
    797         self.end_headers()
    798         return f
    799 
    800     def translate_path(self, path):
    801         """Translate a /-separated PATH to the local filename syntax.
    802 
    803         Components that mean special things to the local file system
    804         (e.g. drive or directory names) are ignored.  (XXX They should
    805         probably be diagnosed.)
    806 
    807         """
    808         # abandon query parameters
    809         path = path.split('?',1)[0]
    810         path = path.split('#',1)[0]
    811         # Don't forget explicit trailing slash when normalizing. Issue17324
    812         trailing_slash = path.rstrip().endswith('/')
    813         try:
    814             path = urllib.parse.unquote(path, errors='surrogatepass')
    815         except UnicodeDecodeError:
    816             path = urllib.parse.unquote(path)
    817         path = posixpath.normpath(path)
    818         words = path.split('/')
    819         words = filter(None, words)
    820         path = self.directory
    821         for word in words:
    822             if os.path.dirname(word) or word in (os.curdir, os.pardir):
    823                 # Ignore components that are not a simple file/directory name
    824                 continue
    825             path = os.path.join(path, word)
    826         if trailing_slash:
    827             path += '/'
    828         return path
    829 
    830     def copyfile(self, source, outputfile):
    831         """Copy all data between two file objects.
    832 
    833         The SOURCE argument is a file object open for reading
    834         (or anything with a read() method) and the DESTINATION
    835         argument is a file object open for writing (or
    836         anything with a write() method).
    837 
    838         The only reason for overriding this would be to change
    839         the block size or perhaps to replace newlines by CRLF
    840         -- note however that this the default server uses this
    841         to copy binary data as well.
    842 
    843         """
    844         shutil.copyfileobj(source, outputfile)
    845 
    846     def guess_type(self, path):
    847         """Guess the type of a file.
    848 
    849         Argument is a PATH (a filename).
    850 
    851         Return value is a string of the form type/subtype,
    852         usable for a MIME Content-type header.
    853 
    854         The default implementation looks the file's extension
    855         up in the table self.extensions_map, using application/octet-stream
    856         as a default; however it would be permissible (if
    857         slow) to look inside the data to make a better guess.
    858 
    859         """
    860 
    861         base, ext = posixpath.splitext(path)
    862         if ext in self.extensions_map:
    863             return self.extensions_map[ext]
    864         ext = ext.lower()
    865         if ext in self.extensions_map:
    866             return self.extensions_map[ext]
    867         else:
    868             return self.extensions_map['']
    869 
    870     if not mimetypes.inited:
    871         mimetypes.init() # try to read system mime.types
    872     extensions_map = mimetypes.types_map.copy()
    873     extensions_map.update({
    874         '': 'application/octet-stream', # Default
    875         '.py': 'text/plain',
    876         '.c': 'text/plain',
    877         '.h': 'text/plain',
    878         })
    879 
    880 
    881 # Utilities for CGIHTTPRequestHandler
    882 
    883 def _url_collapse_path(path):
    884     """
    885     Given a URL path, remove extra '/'s and '.' path elements and collapse
    886     any '..' references and returns a collapsed path.
    887 
    888     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
    889     The utility of this function is limited to is_cgi method and helps
    890     preventing some security attacks.
    891 
    892     Returns: The reconstituted URL, which will always start with a '/'.
    893 
    894     Raises: IndexError if too many '..' occur within the path.
    895 
    896     """
    897     # Query component should not be involved.
    898     path, _, query = path.partition('?')
    899     path = urllib.parse.unquote(path)
    900 
    901     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
    902     # path semantics rather than local operating system semantics.
    903     path_parts = path.split('/')
    904     head_parts = []
    905     for part in path_parts[:-1]:
    906         if part == '..':
    907             head_parts.pop() # IndexError if more '..' than prior parts
    908         elif part and part != '.':
    909             head_parts.append( part )
    910     if path_parts:
    911         tail_part = path_parts.pop()
    912         if tail_part:
    913             if tail_part == '..':
    914                 head_parts.pop()
    915                 tail_part = ''
    916             elif tail_part == '.':
    917                 tail_part = ''
    918     else:
    919         tail_part = ''
    920 
    921     if query:
    922         tail_part = '?'.join((tail_part, query))
    923 
    924     splitpath = ('/' + '/'.join(head_parts), tail_part)
    925     collapsed_path = "/".join(splitpath)
    926 
    927     return collapsed_path
    928 
    929 
    930 
    931 nobody = None
    932 
    933 def nobody_uid():
    934     """Internal routine to get nobody's uid"""
    935     global nobody
    936     if nobody:
    937         return nobody
    938     try:
    939         import pwd
    940     except ImportError:
    941         return -1
    942     try:
    943         nobody = pwd.getpwnam('nobody')[2]
    944     except KeyError:
    945         nobody = 1 + max(x[2] for x in pwd.getpwall())
    946     return nobody
    947 
    948 
    949 def executable(path):
    950     """Test for executable file."""
    951     return os.access(path, os.X_OK)
    952 
    953 
    954 class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
    955 
    956     """Complete HTTP server with GET, HEAD and POST commands.
    957 
    958     GET and HEAD also support running CGI scripts.
    959 
    960     The POST command is *only* implemented for CGI scripts.
    961 
    962     """
    963 
    964     # Determine platform specifics
    965     have_fork = hasattr(os, 'fork')
    966 
    967     # Make rfile unbuffered -- we need to read one line and then pass
    968     # the rest to a subprocess, so we can't use buffered input.
    969     rbufsize = 0
    970 
    971     def do_POST(self):
    972         """Serve a POST request.
    973 
    974         This is only implemented for CGI scripts.
    975 
    976         """
    977 
    978         if self.is_cgi():
    979             self.run_cgi()
    980         else:
    981             self.send_error(
    982                 HTTPStatus.NOT_IMPLEMENTED,
    983                 "Can only POST to CGI scripts")
    984 
    985     def send_head(self):
    986         """Version of send_head that support CGI scripts"""
    987         if self.is_cgi():
    988             return self.run_cgi()
    989         else:
    990             return SimpleHTTPRequestHandler.send_head(self)
    991 
    992     def is_cgi(self):
    993         """Test whether self.path corresponds to a CGI script.
    994 
    995         Returns True and updates the cgi_info attribute to the tuple
    996         (dir, rest) if self.path requires running a CGI script.
    997         Returns False otherwise.
    998 
    999         If any exception is raised, the caller should assume that
   1000         self.path was rejected as invalid and act accordingly.
   1001 
   1002         The default implementation tests whether the normalized url
   1003         path begins with one of the strings in self.cgi_directories
   1004         (and the next character is a '/' or the end of the string).
   1005 
   1006         """
   1007         collapsed_path = _url_collapse_path(self.path)
   1008         dir_sep = collapsed_path.find('/', 1)
   1009         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
   1010         if head in self.cgi_directories:
   1011             self.cgi_info = head, tail
   1012             return True
   1013         return False
   1014 
   1015 
   1016     cgi_directories = ['/cgi-bin', '/htbin']
   1017 
   1018     def is_executable(self, path):
   1019         """Test whether argument path is an executable file."""
   1020         return executable(path)
   1021 
   1022     def is_python(self, path):
   1023         """Test whether argument path is a Python script."""
   1024         head, tail = os.path.splitext(path)
   1025         return tail.lower() in (".py", ".pyw")
   1026 
   1027     def run_cgi(self):
   1028         """Execute a CGI script."""
   1029         dir, rest = self.cgi_info
   1030         path = dir + '/' + rest
   1031         i = path.find('/', len(dir)+1)
   1032         while i >= 0:
   1033             nextdir = path[:i]
   1034             nextrest = path[i+1:]
   1035 
   1036             scriptdir = self.translate_path(nextdir)
   1037             if os.path.isdir(scriptdir):
   1038                 dir, rest = nextdir, nextrest
   1039                 i = path.find('/', len(dir)+1)
   1040             else:
   1041                 break
   1042 
   1043         # find an explicit query string, if present.
   1044         rest, _, query = rest.partition('?')
   1045 
   1046         # dissect the part after the directory name into a script name &
   1047         # a possible additional path, to be stored in PATH_INFO.
   1048         i = rest.find('/')
   1049         if i >= 0:
   1050             script, rest = rest[:i], rest[i:]
   1051         else:
   1052             script, rest = rest, ''
   1053 
   1054         scriptname = dir + '/' + script
   1055         scriptfile = self.translate_path(scriptname)
   1056         if not os.path.exists(scriptfile):
   1057             self.send_error(
   1058                 HTTPStatus.NOT_FOUND,
   1059                 "No such CGI script (%r)" % scriptname)
   1060             return
   1061         if not os.path.isfile(scriptfile):
   1062             self.send_error(
   1063                 HTTPStatus.FORBIDDEN,
   1064                 "CGI script is not a plain file (%r)" % scriptname)
   1065             return
   1066         ispy = self.is_python(scriptname)
   1067         if self.have_fork or not ispy:
   1068             if not self.is_executable(scriptfile):
   1069                 self.send_error(
   1070                     HTTPStatus.FORBIDDEN,
   1071                     "CGI script is not executable (%r)" % scriptname)
   1072                 return
   1073 
   1074         # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
   1075         # XXX Much of the following could be prepared ahead of time!
   1076         env = copy.deepcopy(os.environ)
   1077         env['SERVER_SOFTWARE'] = self.version_string()
   1078         env['SERVER_NAME'] = self.server.server_name
   1079         env['GATEWAY_INTERFACE'] = 'CGI/1.1'
   1080         env['SERVER_PROTOCOL'] = self.protocol_version
   1081         env['SERVER_PORT'] = str(self.server.server_port)
   1082         env['REQUEST_METHOD'] = self.command
   1083         uqrest = urllib.parse.unquote(rest)
   1084         env['PATH_INFO'] = uqrest
   1085         env['PATH_TRANSLATED'] = self.translate_path(uqrest)
   1086         env['SCRIPT_NAME'] = scriptname
   1087         if query:
   1088             env['QUERY_STRING'] = query
   1089         env['REMOTE_ADDR'] = self.client_address[0]
   1090         authorization = self.headers.get("authorization")
   1091         if authorization:
   1092             authorization = authorization.split()
   1093             if len(authorization) == 2:
   1094                 import base64, binascii
   1095                 env['AUTH_TYPE'] = authorization[0]
   1096                 if authorization[0].lower() == "basic":
   1097                     try:
   1098                         authorization = authorization[1].encode('ascii')
   1099                         authorization = base64.decodebytes(authorization).\
   1100                                         decode('ascii')
   1101                     except (binascii.Error, UnicodeError):
   1102                         pass
   1103                     else:
   1104                         authorization = authorization.split(':')
   1105                         if len(authorization) == 2:
   1106                             env['REMOTE_USER'] = authorization[0]
   1107         # XXX REMOTE_IDENT
   1108         if self.headers.get('content-type') is None:
   1109             env['CONTENT_TYPE'] = self.headers.get_content_type()
   1110         else:
   1111             env['CONTENT_TYPE'] = self.headers['content-type']
   1112         length = self.headers.get('content-length')
   1113         if length:
   1114             env['CONTENT_LENGTH'] = length
   1115         referer = self.headers.get('referer')
   1116         if referer:
   1117             env['HTTP_REFERER'] = referer
   1118         accept = []
   1119         for line in self.headers.getallmatchingheaders('accept'):
   1120             if line[:1] in "\t\n\r ":
   1121                 accept.append(line.strip())
   1122             else:
   1123                 accept = accept + line[7:].split(',')
   1124         env['HTTP_ACCEPT'] = ','.join(accept)
   1125         ua = self.headers.get('user-agent')
   1126         if ua:
   1127             env['HTTP_USER_AGENT'] = ua
   1128         co = filter(None, self.headers.get_all('cookie', []))
   1129         cookie_str = ', '.join(co)
   1130         if cookie_str:
   1131             env['HTTP_COOKIE'] = cookie_str
   1132         # XXX Other HTTP_* headers
   1133         # Since we're setting the env in the parent, provide empty
   1134         # values to override previously set values
   1135         for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
   1136                   'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
   1137             env.setdefault(k, "")
   1138 
   1139         self.send_response(HTTPStatus.OK, "Script output follows")
   1140         self.flush_headers()
   1141 
   1142         decoded_query = query.replace('+', ' ')
   1143 
   1144         if self.have_fork:
   1145             # Unix -- fork as we should
   1146             args = [script]
   1147             if '=' not in decoded_query:
   1148                 args.append(decoded_query)
   1149             nobody = nobody_uid()
   1150             self.wfile.flush() # Always flush before forking
   1151             pid = os.fork()
   1152             if pid != 0:
   1153                 # Parent
   1154                 pid, sts = os.waitpid(pid, 0)
   1155                 # throw away additional data [see bug #427345]
   1156                 while select.select([self.rfile], [], [], 0)[0]:
   1157                     if not self.rfile.read(1):
   1158                         break
   1159                 if sts:
   1160                     self.log_error("CGI script exit status %#x", sts)
   1161                 return
   1162             # Child
   1163             try:
   1164                 try:
   1165                     os.setuid(nobody)
   1166                 except OSError:
   1167                     pass
   1168                 os.dup2(self.rfile.fileno(), 0)
   1169                 os.dup2(self.wfile.fileno(), 1)
   1170                 os.execve(scriptfile, args, env)
   1171             except:
   1172                 self.server.handle_error(self.request, self.client_address)
   1173                 os._exit(127)
   1174 
   1175         else:
   1176             # Non-Unix -- use subprocess
   1177             import subprocess
   1178             cmdline = [scriptfile]
   1179             if self.is_python(scriptfile):
   1180                 interp = sys.executable
   1181                 if interp.lower().endswith("w.exe"):
   1182                     # On Windows, use python.exe, not pythonw.exe
   1183                     interp = interp[:-5] + interp[-4:]
   1184                 cmdline = [interp, '-u'] + cmdline
   1185             if '=' not in query:
   1186                 cmdline.append(query)
   1187             self.log_message("command: %s", subprocess.list2cmdline(cmdline))
   1188             try:
   1189                 nbytes = int(length)
   1190             except (TypeError, ValueError):
   1191                 nbytes = 0
   1192             p = subprocess.Popen(cmdline,
   1193                                  stdin=subprocess.PIPE,
   1194                                  stdout=subprocess.PIPE,
   1195                                  stderr=subprocess.PIPE,
   1196                                  env = env
   1197                                  )
   1198             if self.command.lower() == "post" and nbytes > 0:
   1199                 data = self.rfile.read(nbytes)
   1200             else:
   1201                 data = None
   1202             # throw away additional data [see bug #427345]
   1203             while select.select([self.rfile._sock], [], [], 0)[0]:
   1204                 if not self.rfile._sock.recv(1):
   1205                     break
   1206             stdout, stderr = p.communicate(data)
   1207             self.wfile.write(stdout)
   1208             if stderr:
   1209                 self.log_error('%s', stderr)
   1210             p.stderr.close()
   1211             p.stdout.close()
   1212             status = p.returncode
   1213             if status:
   1214                 self.log_error("CGI script exit status %#x", status)
   1215             else:
   1216                 self.log_message("CGI script exited OK")
   1217 
   1218 
   1219 def test(HandlerClass=BaseHTTPRequestHandler,
   1220          ServerClass=ThreadingHTTPServer,
   1221          protocol="HTTP/1.0", port=8000, bind=""):
   1222     """Test the HTTP request handler class.
   1223 
   1224     This runs an HTTP server on port 8000 (or the port argument).
   1225 
   1226     """
   1227     server_address = (bind, port)
   1228 
   1229     HandlerClass.protocol_version = protocol
   1230     with ServerClass(server_address, HandlerClass) as httpd:
   1231         sa = httpd.socket.getsockname()
   1232         serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
   1233         print(serve_message.format(host=sa[0], port=sa[1]))
   1234         try:
   1235             httpd.serve_forever()
   1236         except KeyboardInterrupt:
   1237             print("\nKeyboard interrupt received, exiting.")
   1238             sys.exit(0)
   1239 
   1240 if __name__ == '__main__':
   1241     import argparse
   1242 
   1243     parser = argparse.ArgumentParser()
   1244     parser.add_argument('--cgi', action='store_true',
   1245                        help='Run as CGI Server')
   1246     parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
   1247                         help='Specify alternate bind address '
   1248                              '[default: all interfaces]')
   1249     parser.add_argument('--directory', '-d', default=os.getcwd(),
   1250                         help='Specify alternative directory '
   1251                         '[default:current directory]')
   1252     parser.add_argument('port', action='store',
   1253                         default=8000, type=int,
   1254                         nargs='?',
   1255                         help='Specify alternate port [default: 8000]')
   1256     args = parser.parse_args()
   1257     if args.cgi:
   1258         handler_class = CGIHTTPRequestHandler
   1259     else:
   1260         handler_class = partial(SimpleHTTPRequestHandler,
   1261                                 directory=args.directory)
   1262     test(HandlerClass=handler_class, port=args.port, bind=args.bind)
   1263