Home | History | Annotate | Download | only in Lib
      1 """CGI-savvy HTTP Server.
      2 
      3 This module builds on SimpleHTTPServer by implementing GET and POST
      4 requests to cgi-bin scripts.
      5 
      6 If the os.fork() function is not present (e.g. on Windows),
      7 os.popen2() is used as a fallback, with slightly altered semantics; if
      8 that function is not present either (e.g. on Macintosh), only Python
      9 scripts are supported, and they are executed by the current process.
     10 
     11 In all cases, the implementation is intentionally naive -- all
     12 requests are executed sychronously.
     13 
     14 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
     15 -- it may execute arbitrary Python code or external programs.
     16 
     17 Note that status code 200 is sent prior to execution of a CGI script, so
     18 scripts cannot send other status codes such as 302 (redirect).
     19 """
     20 
     21 
     22 __version__ = "0.4"
     23 
     24 __all__ = ["CGIHTTPRequestHandler"]
     25 
     26 import os
     27 import sys
     28 import urllib
     29 import BaseHTTPServer
     30 import SimpleHTTPServer
     31 import select
     32 import copy
     33 
     34 
     35 class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
     36 
     37     """Complete HTTP server with GET, HEAD and POST commands.
     38 
     39     GET and HEAD also support running CGI scripts.
     40 
     41     The POST command is *only* implemented for CGI scripts.
     42 
     43     """
     44 
     45     # Determine platform specifics

     46     have_fork = hasattr(os, 'fork')
     47     have_popen2 = hasattr(os, 'popen2')
     48     have_popen3 = hasattr(os, 'popen3')
     49 
     50     # Make rfile unbuffered -- we need to read one line and then pass

     51     # the rest to a subprocess, so we can't use buffered input.

     52     rbufsize = 0
     53 
     54     def do_POST(self):
     55         """Serve a POST request.
     56 
     57         This is only implemented for CGI scripts.
     58 
     59         """
     60 
     61         if self.is_cgi():
     62             self.run_cgi()
     63         else:
     64             self.send_error(501, "Can only POST to CGI scripts")
     65 
     66     def send_head(self):
     67         """Version of send_head that support CGI scripts"""
     68         if self.is_cgi():
     69             return self.run_cgi()
     70         else:
     71             return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
     72 
     73     def is_cgi(self):
     74         """Test whether self.path corresponds to a CGI script.
     75 
     76         Returns True and updates the cgi_info attribute to the tuple
     77         (dir, rest) if self.path requires running a CGI script.
     78         Returns False otherwise.
     79 
     80         If any exception is raised, the caller should assume that
     81         self.path was rejected as invalid and act accordingly.
     82 
     83         The default implementation tests whether the normalized url
     84         path begins with one of the strings in self.cgi_directories
     85         (and the next character is a '/' or the end of the string).
     86         """
     87         splitpath = _url_collapse_path_split(self.path)
     88         if splitpath[0] in self.cgi_directories:
     89             self.cgi_info = splitpath
     90             return True
     91         return False
     92 
     93     cgi_directories = ['/cgi-bin', '/htbin']
     94 
     95     def is_executable(self, path):
     96         """Test whether argument path is an executable file."""
     97         return executable(path)
     98 
     99     def is_python(self, path):
    100         """Test whether argument path is a Python script."""
    101         head, tail = os.path.splitext(path)
    102         return tail.lower() in (".py", ".pyw")
    103 
    104     def run_cgi(self):
    105         """Execute a CGI script."""
    106         path = self.path
    107         dir, rest = self.cgi_info
    108 
    109         i = path.find('/', len(dir) + 1)
    110         while i >= 0:
    111             nextdir = path[:i]
    112             nextrest = path[i+1:]
    113 
    114             scriptdir = self.translate_path(nextdir)
    115             if os.path.isdir(scriptdir):
    116                 dir, rest = nextdir, nextrest
    117                 i = path.find('/', len(dir) + 1)
    118             else:
    119                 break
    120 
    121         # find an explicit query string, if present.

    122         i = rest.rfind('?')
    123         if i >= 0:
    124             rest, query = rest[:i], rest[i+1:]
    125         else:
    126             query = ''
    127 
    128         # dissect the part after the directory name into a script name &

    129         # a possible additional path, to be stored in PATH_INFO.

    130         i = rest.find('/')
    131         if i >= 0:
    132             script, rest = rest[:i], rest[i:]
    133         else:
    134             script, rest = rest, ''
    135 
    136         scriptname = dir + '/' + script
    137         scriptfile = self.translate_path(scriptname)
    138         if not os.path.exists(scriptfile):
    139             self.send_error(404, "No such CGI script (%r)" % scriptname)
    140             return
    141         if not os.path.isfile(scriptfile):
    142             self.send_error(403, "CGI script is not a plain file (%r)" %
    143                             scriptname)
    144             return
    145         ispy = self.is_python(scriptname)
    146         if not ispy:
    147             if not (self.have_fork or self.have_popen2 or self.have_popen3):
    148                 self.send_error(403, "CGI script is not a Python script (%r)" %
    149                                 scriptname)
    150                 return
    151             if not self.is_executable(scriptfile):
    152                 self.send_error(403, "CGI script is not executable (%r)" %
    153                                 scriptname)
    154                 return
    155 
    156         # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html

    157         # XXX Much of the following could be prepared ahead of time!

    158         env = copy.deepcopy(os.environ)
    159         env['SERVER_SOFTWARE'] = self.version_string()
    160         env['SERVER_NAME'] = self.server.server_name
    161         env['GATEWAY_INTERFACE'] = 'CGI/1.1'
    162         env['SERVER_PROTOCOL'] = self.protocol_version
    163         env['SERVER_PORT'] = str(self.server.server_port)
    164         env['REQUEST_METHOD'] = self.command
    165         uqrest = urllib.unquote(rest)
    166         env['PATH_INFO'] = uqrest
    167         env['PATH_TRANSLATED'] = self.translate_path(uqrest)
    168         env['SCRIPT_NAME'] = scriptname
    169         if query:
    170             env['QUERY_STRING'] = query
    171         host = self.address_string()
    172         if host != self.client_address[0]:
    173             env['REMOTE_HOST'] = host
    174         env['REMOTE_ADDR'] = self.client_address[0]
    175         authorization = self.headers.getheader("authorization")
    176         if authorization:
    177             authorization = authorization.split()
    178             if len(authorization) == 2:
    179                 import base64, binascii
    180                 env['AUTH_TYPE'] = authorization[0]
    181                 if authorization[0].lower() == "basic":
    182                     try:
    183                         authorization = base64.decodestring(authorization[1])
    184                     except binascii.Error:
    185                         pass
    186                     else:
    187                         authorization = authorization.split(':')
    188                         if len(authorization) == 2:
    189                             env['REMOTE_USER'] = authorization[0]
    190         # XXX REMOTE_IDENT

    191         if self.headers.typeheader is None:
    192             env['CONTENT_TYPE'] = self.headers.type
    193         else:
    194             env['CONTENT_TYPE'] = self.headers.typeheader
    195         length = self.headers.getheader('content-length')
    196         if length:
    197             env['CONTENT_LENGTH'] = length
    198         referer = self.headers.getheader('referer')
    199         if referer:
    200             env['HTTP_REFERER'] = referer
    201         accept = []
    202         for line in self.headers.getallmatchingheaders('accept'):
    203             if line[:1] in "\t\n\r ":
    204                 accept.append(line.strip())
    205             else:
    206                 accept = accept + line[7:].split(',')
    207         env['HTTP_ACCEPT'] = ','.join(accept)
    208         ua = self.headers.getheader('user-agent')
    209         if ua:
    210             env['HTTP_USER_AGENT'] = ua
    211         co = filter(None, self.headers.getheaders('cookie'))
    212         if co:
    213             env['HTTP_COOKIE'] = ', '.join(co)
    214         # XXX Other HTTP_* headers

    215         # Since we're setting the env in the parent, provide empty

    216         # values to override previously set values

    217         for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
    218                   'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
    219             env.setdefault(k, "")
    220 
    221         self.send_response(200, "Script output follows")
    222 
    223         decoded_query = query.replace('+', ' ')
    224 
    225         if self.have_fork:
    226             # Unix -- fork as we should

    227             args = [script]
    228             if '=' not in decoded_query:
    229                 args.append(decoded_query)
    230             nobody = nobody_uid()
    231             self.wfile.flush() # Always flush before forking

    232             pid = os.fork()
    233             if pid != 0:
    234                 # Parent

    235                 pid, sts = os.waitpid(pid, 0)
    236                 # throw away additional data [see bug #427345]

    237                 while select.select([self.rfile], [], [], 0)[0]:
    238                     if not self.rfile.read(1):
    239                         break
    240                 if sts:
    241                     self.log_error("CGI script exit status %#x", sts)
    242                 return
    243             # Child

    244             try:
    245                 try:
    246                     os.setuid(nobody)
    247                 except os.error:
    248                     pass
    249                 os.dup2(self.rfile.fileno(), 0)
    250                 os.dup2(self.wfile.fileno(), 1)
    251                 os.execve(scriptfile, args, env)
    252             except:
    253                 self.server.handle_error(self.request, self.client_address)
    254                 os._exit(127)
    255 
    256         else:
    257             # Non Unix - use subprocess

    258             import subprocess
    259             cmdline = [scriptfile]
    260             if self.is_python(scriptfile):
    261                 interp = sys.executable
    262                 if interp.lower().endswith("w.exe"):
    263                     # On Windows, use python.exe, not pythonw.exe

    264                     interp = interp[:-5] + interp[-4:]
    265                 cmdline = [interp, '-u'] + cmdline
    266             if '=' not in query:
    267                 cmdline.append(query)
    268 
    269             self.log_message("command: %s", subprocess.list2cmdline(cmdline))
    270             try:
    271                 nbytes = int(length)
    272             except (TypeError, ValueError):
    273                 nbytes = 0
    274             p = subprocess.Popen(cmdline,
    275                                  stdin = subprocess.PIPE,
    276                                  stdout = subprocess.PIPE,
    277                                  stderr = subprocess.PIPE,
    278                                  env = env
    279                                 )
    280             if self.command.lower() == "post" and nbytes > 0:
    281                 data = self.rfile.read(nbytes)
    282             else:
    283                 data = None
    284             # throw away additional data [see bug #427345]

    285             while select.select([self.rfile._sock], [], [], 0)[0]:
    286                 if not self.rfile._sock.recv(1):
    287                     break
    288             stdout, stderr = p.communicate(data)
    289             self.wfile.write(stdout)
    290             if stderr:
    291                 self.log_error('%s', stderr)
    292             p.stderr.close()
    293             p.stdout.close()
    294             status = p.returncode
    295             if status:
    296                 self.log_error("CGI script exit status %#x", status)
    297             else:
    298                 self.log_message("CGI script exited OK")
    299 
    300 
    301 # TODO(gregory.p.smith): Move this into an appropriate library.

    302 def _url_collapse_path_split(path):
    303     """
    304     Given a URL path, remove extra '/'s and '.' path elements and collapse
    305     any '..' references.
    306 
    307     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
    308 
    309     Returns: A tuple of (head, tail) where tail is everything after the final /
    310     and head is everything before it.  Head will always start with a '/' and,
    311     if it contains anything else, never have a trailing '/'.
    312 
    313     Raises: IndexError if too many '..' occur within the path.
    314     """
    315     # Similar to os.path.split(os.path.normpath(path)) but specific to URL

    316     # path semantics rather than local operating system semantics.

    317     path_parts = []
    318     for part in path.split('/'):
    319         if part == '.':
    320             path_parts.append('')
    321         else:
    322             path_parts.append(part)
    323     # Filter out blank non trailing parts before consuming the '..'.

    324     path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
    325     if path_parts:
    326         tail_part = path_parts.pop()
    327     else:
    328         tail_part = ''
    329     head_parts = []
    330     for part in path_parts:
    331         if part == '..':
    332             head_parts.pop()
    333         else:
    334             head_parts.append(part)
    335     if tail_part and tail_part == '..':
    336         head_parts.pop()
    337         tail_part = ''
    338     return ('/' + '/'.join(head_parts), tail_part)
    339 
    340 
    341 nobody = None
    342 
    343 def nobody_uid():
    344     """Internal routine to get nobody's uid"""
    345     global nobody
    346     if nobody:
    347         return nobody
    348     try:
    349         import pwd
    350     except ImportError:
    351         return -1
    352     try:
    353         nobody = pwd.getpwnam('nobody')[2]
    354     except KeyError:
    355         nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
    356     return nobody
    357 
    358 
    359 def executable(path):
    360     """Test for executable file."""
    361     try:
    362         st = os.stat(path)
    363     except os.error:
    364         return False
    365     return st.st_mode & 0111 != 0
    366 
    367 
    368 def test(HandlerClass = CGIHTTPRequestHandler,
    369          ServerClass = BaseHTTPServer.HTTPServer):
    370     SimpleHTTPServer.test(HandlerClass, ServerClass)
    371 
    372 
    373 if __name__ == '__main__':
    374     test()
    375