Home | History | Annotate | Download | only in Lib
      1 """CGI-savvy HTTP Server.
      2 
      3 This module builds on SimpleHTTPServer by implementing GET and POST
      4 requests to cgi-bin scripts.
      5 
      6 If the os.fork() function is not present (e.g. on Windows),
      7 os.popen2() is used as a fallback, with slightly altered semantics; if
      8 that function is not present either (e.g. on Macintosh), only Python
      9 scripts are supported, and they are executed by the current process.
     10 
     11 In all cases, the implementation is intentionally naive -- all
     12 requests are executed sychronously.
     13 
     14 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
     15 -- it may execute arbitrary Python code or external programs.
     16 
     17 Note that status code 200 is sent prior to execution of a CGI script, so
     18 scripts cannot send other status codes such as 302 (redirect).
     19 """
     20 
     21 
     22 __version__ = "0.4"
     23 
     24 __all__ = ["CGIHTTPRequestHandler"]
     25 
     26 import os
     27 import sys
     28 import urllib
     29 import BaseHTTPServer
     30 import SimpleHTTPServer
     31 import select
     32 import copy
     33 
     34 
     35 class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
     36 
     37     """Complete HTTP server with GET, HEAD and POST commands.
     38 
     39     GET and HEAD also support running CGI scripts.
     40 
     41     The POST command is *only* implemented for CGI scripts.
     42 
     43     """
     44 
     45     # Determine platform specifics
     46     have_fork = hasattr(os, 'fork')
     47     have_popen2 = hasattr(os, 'popen2')
     48     have_popen3 = hasattr(os, 'popen3')
     49 
     50     # Make rfile unbuffered -- we need to read one line and then pass
     51     # the rest to a subprocess, so we can't use buffered input.
     52     rbufsize = 0
     53 
     54     def do_POST(self):
     55         """Serve a POST request.
     56 
     57         This is only implemented for CGI scripts.
     58 
     59         """
     60 
     61         if self.is_cgi():
     62             self.run_cgi()
     63         else:
     64             self.send_error(501, "Can only POST to CGI scripts")
     65 
     66     def send_head(self):
     67         """Version of send_head that support CGI scripts"""
     68         if self.is_cgi():
     69             return self.run_cgi()
     70         else:
     71             return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
     72 
     73     def is_cgi(self):
     74         """Test whether self.path corresponds to a CGI script.
     75 
     76         Returns True and updates the cgi_info attribute to the tuple
     77         (dir, rest) if self.path requires running a CGI script.
     78         Returns False otherwise.
     79 
     80         If any exception is raised, the caller should assume that
     81         self.path was rejected as invalid and act accordingly.
     82 
     83         The default implementation tests whether the normalized url
     84         path begins with one of the strings in self.cgi_directories
     85         (and the next character is a '/' or the end of the string).
     86         """
     87         collapsed_path = _url_collapse_path(self.path)
     88         dir_sep = collapsed_path.find('/', 1)
     89         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
     90         if head in self.cgi_directories:
     91             self.cgi_info = head, tail
     92             return True
     93         return False
     94 
     95     cgi_directories = ['/cgi-bin', '/htbin']
     96 
     97     def is_executable(self, path):
     98         """Test whether argument path is an executable file."""
     99         return executable(path)
    100 
    101     def is_python(self, path):
    102         """Test whether argument path is a Python script."""
    103         head, tail = os.path.splitext(path)
    104         return tail.lower() in (".py", ".pyw")
    105 
    106     def run_cgi(self):
    107         """Execute a CGI script."""
    108         dir, rest = self.cgi_info
    109         path = dir + '/' + rest
    110         i = path.find('/', len(dir)+1)
    111         while i >= 0:
    112             nextdir = path[:i]
    113             nextrest = path[i+1:]
    114 
    115             scriptdir = self.translate_path(nextdir)
    116             if os.path.isdir(scriptdir):
    117                 dir, rest = nextdir, nextrest
    118                 i = path.find('/', len(dir)+1)
    119             else:
    120                 break
    121 
    122         # find an explicit query string, if present.
    123         rest, _, query = rest.partition('?')
    124 
    125         # dissect the part after the directory name into a script name &
    126         # a possible additional path, to be stored in PATH_INFO.
    127         i = rest.find('/')
    128         if i >= 0:
    129             script, rest = rest[:i], rest[i:]
    130         else:
    131             script, rest = rest, ''
    132 
    133         scriptname = dir + '/' + script
    134         scriptfile = self.translate_path(scriptname)
    135         if not os.path.exists(scriptfile):
    136             self.send_error(404, "No such CGI script (%r)" % scriptname)
    137             return
    138         if not os.path.isfile(scriptfile):
    139             self.send_error(403, "CGI script is not a plain file (%r)" %
    140                             scriptname)
    141             return
    142         ispy = self.is_python(scriptname)
    143         if not ispy:
    144             if not (self.have_fork or self.have_popen2 or self.have_popen3):
    145                 self.send_error(403, "CGI script is not a Python script (%r)" %
    146                                 scriptname)
    147                 return
    148             if not self.is_executable(scriptfile):
    149                 self.send_error(403, "CGI script is not executable (%r)" %
    150                                 scriptname)
    151                 return
    152 
    153         # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
    154         # XXX Much of the following could be prepared ahead of time!
    155         env = copy.deepcopy(os.environ)
    156         env['SERVER_SOFTWARE'] = self.version_string()
    157         env['SERVER_NAME'] = self.server.server_name
    158         env['GATEWAY_INTERFACE'] = 'CGI/1.1'
    159         env['SERVER_PROTOCOL'] = self.protocol_version
    160         env['SERVER_PORT'] = str(self.server.server_port)
    161         env['REQUEST_METHOD'] = self.command
    162         uqrest = urllib.unquote(rest)
    163         env['PATH_INFO'] = uqrest
    164         env['PATH_TRANSLATED'] = self.translate_path(uqrest)
    165         env['SCRIPT_NAME'] = scriptname
    166         if query:
    167             env['QUERY_STRING'] = query
    168         host = self.address_string()
    169         if host != self.client_address[0]:
    170             env['REMOTE_HOST'] = host
    171         env['REMOTE_ADDR'] = self.client_address[0]
    172         authorization = self.headers.getheader("authorization")
    173         if authorization:
    174             authorization = authorization.split()
    175             if len(authorization) == 2:
    176                 import base64, binascii
    177                 env['AUTH_TYPE'] = authorization[0]
    178                 if authorization[0].lower() == "basic":
    179                     try:
    180                         authorization = base64.decodestring(authorization[1])
    181                     except binascii.Error:
    182                         pass
    183                     else:
    184                         authorization = authorization.split(':')
    185                         if len(authorization) == 2:
    186                             env['REMOTE_USER'] = authorization[0]
    187         # XXX REMOTE_IDENT
    188         if self.headers.typeheader is None:
    189             env['CONTENT_TYPE'] = self.headers.type
    190         else:
    191             env['CONTENT_TYPE'] = self.headers.typeheader
    192         length = self.headers.getheader('content-length')
    193         if length:
    194             env['CONTENT_LENGTH'] = length
    195         referer = self.headers.getheader('referer')
    196         if referer:
    197             env['HTTP_REFERER'] = referer
    198         accept = []
    199         for line in self.headers.getallmatchingheaders('accept'):
    200             if line[:1] in "\t\n\r ":
    201                 accept.append(line.strip())
    202             else:
    203                 accept = accept + line[7:].split(',')
    204         env['HTTP_ACCEPT'] = ','.join(accept)
    205         ua = self.headers.getheader('user-agent')
    206         if ua:
    207             env['HTTP_USER_AGENT'] = ua
    208         co = filter(None, self.headers.getheaders('cookie'))
    209         if co:
    210             env['HTTP_COOKIE'] = ', '.join(co)
    211         # XXX Other HTTP_* headers
    212         # Since we're setting the env in the parent, provide empty
    213         # values to override previously set values
    214         for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
    215                   'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
    216             env.setdefault(k, "")
    217 
    218         self.send_response(200, "Script output follows")
    219 
    220         decoded_query = query.replace('+', ' ')
    221 
    222         if self.have_fork:
    223             # Unix -- fork as we should
    224             args = [script]
    225             if '=' not in decoded_query:
    226                 args.append(decoded_query)
    227             nobody = nobody_uid()
    228             self.wfile.flush() # Always flush before forking
    229             pid = os.fork()
    230             if pid != 0:
    231                 # Parent
    232                 pid, sts = os.waitpid(pid, 0)
    233                 # throw away additional data [see bug #427345]
    234                 while select.select([self.rfile], [], [], 0)[0]:
    235                     if not self.rfile.read(1):
    236                         break
    237                 if sts:
    238                     self.log_error("CGI script exit status %#x", sts)
    239                 return
    240             # Child
    241             try:
    242                 try:
    243                     os.setuid(nobody)
    244                 except os.error:
    245                     pass
    246                 os.dup2(self.rfile.fileno(), 0)
    247                 os.dup2(self.wfile.fileno(), 1)
    248                 os.execve(scriptfile, args, env)
    249             except:
    250                 self.server.handle_error(self.request, self.client_address)
    251                 os._exit(127)
    252 
    253         else:
    254             # Non Unix - use subprocess
    255             import subprocess
    256             cmdline = [scriptfile]
    257             if self.is_python(scriptfile):
    258                 interp = sys.executable
    259                 if interp.lower().endswith("w.exe"):
    260                     # On Windows, use python.exe, not pythonw.exe
    261                     interp = interp[:-5] + interp[-4:]
    262                 cmdline = [interp, '-u'] + cmdline
    263             if '=' not in query:
    264                 cmdline.append(query)
    265 
    266             self.log_message("command: %s", subprocess.list2cmdline(cmdline))
    267             try:
    268                 nbytes = int(length)
    269             except (TypeError, ValueError):
    270                 nbytes = 0
    271             p = subprocess.Popen(cmdline,
    272                                  stdin = subprocess.PIPE,
    273                                  stdout = subprocess.PIPE,
    274                                  stderr = subprocess.PIPE,
    275                                  env = env
    276                                 )
    277             if self.command.lower() == "post" and nbytes > 0:
    278                 data = self.rfile.read(nbytes)
    279             else:
    280                 data = None
    281             # throw away additional data [see bug #427345]
    282             while select.select([self.rfile._sock], [], [], 0)[0]:
    283                 if not self.rfile._sock.recv(1):
    284                     break
    285             stdout, stderr = p.communicate(data)
    286             self.wfile.write(stdout)
    287             if stderr:
    288                 self.log_error('%s', stderr)
    289             p.stderr.close()
    290             p.stdout.close()
    291             status = p.returncode
    292             if status:
    293                 self.log_error("CGI script exit status %#x", status)
    294             else:
    295                 self.log_message("CGI script exited OK")
    296 
    297 
    298 def _url_collapse_path(path):
    299     """
    300     Given a URL path, remove extra '/'s and '.' path elements and collapse
    301     any '..' references and returns a colllapsed path.
    302 
    303     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
    304     The utility of this function is limited to is_cgi method and helps
    305     preventing some security attacks.
    306 
    307     Returns: The reconstituted URL, which will always start with a '/'.
    308 
    309     Raises: IndexError if too many '..' occur within the path.
    310 
    311     """
    312     # Query component should not be involved.
    313     path, _, query = path.partition('?')
    314     path = urllib.unquote(path)
    315 
    316     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
    317     # path semantics rather than local operating system semantics.
    318     path_parts = path.split('/')
    319     head_parts = []
    320     for part in path_parts[:-1]:
    321         if part == '..':
    322             head_parts.pop() # IndexError if more '..' than prior parts
    323         elif part and part != '.':
    324             head_parts.append( part )
    325     if path_parts:
    326         tail_part = path_parts.pop()
    327         if tail_part:
    328             if tail_part == '..':
    329                 head_parts.pop()
    330                 tail_part = ''
    331             elif tail_part == '.':
    332                 tail_part = ''
    333     else:
    334         tail_part = ''
    335 
    336     if query:
    337         tail_part = '?'.join((tail_part, query))
    338 
    339     splitpath = ('/' + '/'.join(head_parts), tail_part)
    340     collapsed_path = "/".join(splitpath)
    341 
    342     return collapsed_path
    343 
    344 
    345 nobody = None
    346 
    347 def nobody_uid():
    348     """Internal routine to get nobody's uid"""
    349     global nobody
    350     if nobody:
    351         return nobody
    352     try:
    353         import pwd
    354     except ImportError:
    355         return -1
    356     try:
    357         nobody = pwd.getpwnam('nobody')[2]
    358     except KeyError:
    359         nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
    360     return nobody
    361 
    362 
    363 def executable(path):
    364     """Test for executable file."""
    365     try:
    366         st = os.stat(path)
    367     except os.error:
    368         return False
    369     return st.st_mode & 0111 != 0
    370 
    371 
    372 def test(HandlerClass = CGIHTTPRequestHandler,
    373          ServerClass = BaseHTTPServer.HTTPServer):
    374     SimpleHTTPServer.test(HandlerClass, ServerClass)
    375 
    376 
    377 if __name__ == '__main__':
    378     test()
    379