Home | History | Annotate | Download | only in python2.7
      1 """CGI-savvy HTTP Server.
      2 
      3 This module builds on SimpleHTTPServer by implementing GET and POST
      4 requests to cgi-bin scripts.
      5 
      6 If the os.fork() function is not present (e.g. on Windows),
      7 os.popen2() is used as a fallback, with slightly altered semantics; if
      8 that function is not present either (e.g. on Macintosh), only Python
      9 scripts are supported, and they are executed by the current process.
     10 
     11 In all cases, the implementation is intentionally naive -- all
     12 requests are executed sychronously.
     13 
     14 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
     15 -- it may execute arbitrary Python code or external programs.
     16 
     17 Note that status code 200 is sent prior to execution of a CGI script, so
     18 scripts cannot send other status codes such as 302 (redirect).
     19 """
     20 
     21 
     22 __version__ = "0.4"
     23 
     24 __all__ = ["CGIHTTPRequestHandler"]
     25 
     26 import os
     27 import sys
     28 import urllib
     29 import BaseHTTPServer
     30 import SimpleHTTPServer
     31 import select
     32 import copy
     33 
     34 
     35 class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
     36 
     37     """Complete HTTP server with GET, HEAD and POST commands.
     38 
     39     GET and HEAD also support running CGI scripts.
     40 
     41     The POST command is *only* implemented for CGI scripts.
     42 
     43     """
     44 
     45     # Determine platform specifics
     46     have_fork = hasattr(os, 'fork')
     47     have_popen2 = hasattr(os, 'popen2')
     48     have_popen3 = hasattr(os, 'popen3')
     49 
     50     # Make rfile unbuffered -- we need to read one line and then pass
     51     # the rest to a subprocess, so we can't use buffered input.
     52     rbufsize = 0
     53 
     54     def do_POST(self):
     55         """Serve a POST request.
     56 
     57         This is only implemented for CGI scripts.
     58 
     59         """
     60 
     61         if self.is_cgi():
     62             self.run_cgi()
     63         else:
     64             self.send_error(501, "Can only POST to CGI scripts")
     65 
     66     def send_head(self):
     67         """Version of send_head that support CGI scripts"""
     68         if self.is_cgi():
     69             return self.run_cgi()
     70         else:
     71             return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
     72 
     73     def is_cgi(self):
     74         """Test whether self.path corresponds to a CGI script.
     75 
     76         Returns True and updates the cgi_info attribute to the tuple
     77         (dir, rest) if self.path requires running a CGI script.
     78         Returns False otherwise.
     79 
     80         If any exception is raised, the caller should assume that
     81         self.path was rejected as invalid and act accordingly.
     82 
     83         The default implementation tests whether the normalized url
     84         path begins with one of the strings in self.cgi_directories
     85         (and the next character is a '/' or the end of the string).
     86         """
     87         collapsed_path = _url_collapse_path(self.path)
     88         dir_sep = collapsed_path.find('/', 1)
     89         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
     90         if head in self.cgi_directories:
     91             self.cgi_info = head, tail
     92             return True
     93         return False
     94 
     95     cgi_directories = ['/cgi-bin', '/htbin']
     96 
     97     def is_executable(self, path):
     98         """Test whether argument path is an executable file."""
     99         return executable(path)
    100 
    101     def is_python(self, path):
    102         """Test whether argument path is a Python script."""
    103         head, tail = os.path.splitext(path)
    104         return tail.lower() in (".py", ".pyw")
    105 
    106     def run_cgi(self):
    107         """Execute a CGI script."""
    108         path = self.path
    109         dir, rest = self.cgi_info
    110 
    111         i = path.find('/', len(dir) + 1)
    112         while i >= 0:
    113             nextdir = path[:i]
    114             nextrest = path[i+1:]
    115 
    116             scriptdir = self.translate_path(nextdir)
    117             if os.path.isdir(scriptdir):
    118                 dir, rest = nextdir, nextrest
    119                 i = path.find('/', len(dir) + 1)
    120             else:
    121                 break
    122 
    123         # find an explicit query string, if present.
    124         i = rest.rfind('?')
    125         if i >= 0:
    126             rest, query = rest[:i], rest[i+1:]
    127         else:
    128             query = ''
    129 
    130         # dissect the part after the directory name into a script name &
    131         # a possible additional path, to be stored in PATH_INFO.
    132         i = rest.find('/')
    133         if i >= 0:
    134             script, rest = rest[:i], rest[i:]
    135         else:
    136             script, rest = rest, ''
    137 
    138         scriptname = dir + '/' + script
    139         scriptfile = self.translate_path(scriptname)
    140         if not os.path.exists(scriptfile):
    141             self.send_error(404, "No such CGI script (%r)" % scriptname)
    142             return
    143         if not os.path.isfile(scriptfile):
    144             self.send_error(403, "CGI script is not a plain file (%r)" %
    145                             scriptname)
    146             return
    147         ispy = self.is_python(scriptname)
    148         if not ispy:
    149             if not (self.have_fork or self.have_popen2 or self.have_popen3):
    150                 self.send_error(403, "CGI script is not a Python script (%r)" %
    151                                 scriptname)
    152                 return
    153             if not self.is_executable(scriptfile):
    154                 self.send_error(403, "CGI script is not executable (%r)" %
    155                                 scriptname)
    156                 return
    157 
    158         # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
    159         # XXX Much of the following could be prepared ahead of time!
    160         env = copy.deepcopy(os.environ)
    161         env['SERVER_SOFTWARE'] = self.version_string()
    162         env['SERVER_NAME'] = self.server.server_name
    163         env['GATEWAY_INTERFACE'] = 'CGI/1.1'
    164         env['SERVER_PROTOCOL'] = self.protocol_version
    165         env['SERVER_PORT'] = str(self.server.server_port)
    166         env['REQUEST_METHOD'] = self.command
    167         uqrest = urllib.unquote(rest)
    168         env['PATH_INFO'] = uqrest
    169         env['PATH_TRANSLATED'] = self.translate_path(uqrest)
    170         env['SCRIPT_NAME'] = scriptname
    171         if query:
    172             env['QUERY_STRING'] = query
    173         host = self.address_string()
    174         if host != self.client_address[0]:
    175             env['REMOTE_HOST'] = host
    176         env['REMOTE_ADDR'] = self.client_address[0]
    177         authorization = self.headers.getheader("authorization")
    178         if authorization:
    179             authorization = authorization.split()
    180             if len(authorization) == 2:
    181                 import base64, binascii
    182                 env['AUTH_TYPE'] = authorization[0]
    183                 if authorization[0].lower() == "basic":
    184                     try:
    185                         authorization = base64.decodestring(authorization[1])
    186                     except binascii.Error:
    187                         pass
    188                     else:
    189                         authorization = authorization.split(':')
    190                         if len(authorization) == 2:
    191                             env['REMOTE_USER'] = authorization[0]
    192         # XXX REMOTE_IDENT
    193         if self.headers.typeheader is None:
    194             env['CONTENT_TYPE'] = self.headers.type
    195         else:
    196             env['CONTENT_TYPE'] = self.headers.typeheader
    197         length = self.headers.getheader('content-length')
    198         if length:
    199             env['CONTENT_LENGTH'] = length
    200         referer = self.headers.getheader('referer')
    201         if referer:
    202             env['HTTP_REFERER'] = referer
    203         accept = []
    204         for line in self.headers.getallmatchingheaders('accept'):
    205             if line[:1] in "\t\n\r ":
    206                 accept.append(line.strip())
    207             else:
    208                 accept = accept + line[7:].split(',')
    209         env['HTTP_ACCEPT'] = ','.join(accept)
    210         ua = self.headers.getheader('user-agent')
    211         if ua:
    212             env['HTTP_USER_AGENT'] = ua
    213         co = filter(None, self.headers.getheaders('cookie'))
    214         if co:
    215             env['HTTP_COOKIE'] = ', '.join(co)
    216         # XXX Other HTTP_* headers
    217         # Since we're setting the env in the parent, provide empty
    218         # values to override previously set values
    219         for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
    220                   'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
    221             env.setdefault(k, "")
    222 
    223         self.send_response(200, "Script output follows")
    224 
    225         decoded_query = query.replace('+', ' ')
    226 
    227         if self.have_fork:
    228             # Unix -- fork as we should
    229             args = [script]
    230             if '=' not in decoded_query:
    231                 args.append(decoded_query)
    232             nobody = nobody_uid()
    233             self.wfile.flush() # Always flush before forking
    234             pid = os.fork()
    235             if pid != 0:
    236                 # Parent
    237                 pid, sts = os.waitpid(pid, 0)
    238                 # throw away additional data [see bug #427345]
    239                 while select.select([self.rfile], [], [], 0)[0]:
    240                     if not self.rfile.read(1):
    241                         break
    242                 if sts:
    243                     self.log_error("CGI script exit status %#x", sts)
    244                 return
    245             # Child
    246             try:
    247                 try:
    248                     os.setuid(nobody)
    249                 except os.error:
    250                     pass
    251                 os.dup2(self.rfile.fileno(), 0)
    252                 os.dup2(self.wfile.fileno(), 1)
    253                 os.execve(scriptfile, args, env)
    254             except:
    255                 self.server.handle_error(self.request, self.client_address)
    256                 os._exit(127)
    257 
    258         else:
    259             # Non Unix - use subprocess
    260             import subprocess
    261             cmdline = [scriptfile]
    262             if self.is_python(scriptfile):
    263                 interp = sys.executable
    264                 if interp.lower().endswith("w.exe"):
    265                     # On Windows, use python.exe, not pythonw.exe
    266                     interp = interp[:-5] + interp[-4:]
    267                 cmdline = [interp, '-u'] + cmdline
    268             if '=' not in query:
    269                 cmdline.append(query)
    270 
    271             self.log_message("command: %s", subprocess.list2cmdline(cmdline))
    272             try:
    273                 nbytes = int(length)
    274             except (TypeError, ValueError):
    275                 nbytes = 0
    276             p = subprocess.Popen(cmdline,
    277                                  stdin = subprocess.PIPE,
    278                                  stdout = subprocess.PIPE,
    279                                  stderr = subprocess.PIPE,
    280                                  env = env
    281                                 )
    282             if self.command.lower() == "post" and nbytes > 0:
    283                 data = self.rfile.read(nbytes)
    284             else:
    285                 data = None
    286             # throw away additional data [see bug #427345]
    287             while select.select([self.rfile._sock], [], [], 0)[0]:
    288                 if not self.rfile._sock.recv(1):
    289                     break
    290             stdout, stderr = p.communicate(data)
    291             self.wfile.write(stdout)
    292             if stderr:
    293                 self.log_error('%s', stderr)
    294             p.stderr.close()
    295             p.stdout.close()
    296             status = p.returncode
    297             if status:
    298                 self.log_error("CGI script exit status %#x", status)
    299             else:
    300                 self.log_message("CGI script exited OK")
    301 
    302 
    303 def _url_collapse_path(path):
    304     """
    305     Given a URL path, remove extra '/'s and '.' path elements and collapse
    306     any '..' references and returns a colllapsed path.
    307 
    308     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
    309     The utility of this function is limited to is_cgi method and helps
    310     preventing some security attacks.
    311 
    312     Returns: A tuple of (head, tail) where tail is everything after the final /
    313     and head is everything before it.  Head will always start with a '/' and,
    314     if it contains anything else, never have a trailing '/'.
    315 
    316     Raises: IndexError if too many '..' occur within the path.
    317 
    318     """
    319     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
    320     # path semantics rather than local operating system semantics.
    321     path_parts = path.split('/')
    322     head_parts = []
    323     for part in path_parts[:-1]:
    324         if part == '..':
    325             head_parts.pop() # IndexError if more '..' than prior parts
    326         elif part and part != '.':
    327             head_parts.append( part )
    328     if path_parts:
    329         tail_part = path_parts.pop()
    330         if tail_part:
    331             if tail_part == '..':
    332                 head_parts.pop()
    333                 tail_part = ''
    334             elif tail_part == '.':
    335                 tail_part = ''
    336     else:
    337         tail_part = ''
    338 
    339     splitpath = ('/' + '/'.join(head_parts), tail_part)
    340     collapsed_path = "/".join(splitpath)
    341 
    342     return collapsed_path
    343 
    344 
    345 nobody = None
    346 
    347 def nobody_uid():
    348     """Internal routine to get nobody's uid"""
    349     global nobody
    350     if nobody:
    351         return nobody
    352     try:
    353         import pwd
    354     except ImportError:
    355         return -1
    356     try:
    357         nobody = pwd.getpwnam('nobody')[2]
    358     except KeyError:
    359         nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
    360     return nobody
    361 
    362 
    363 def executable(path):
    364     """Test for executable file."""
    365     try:
    366         st = os.stat(path)
    367     except os.error:
    368         return False
    369     return st.st_mode & 0111 != 0
    370 
    371 
    372 def test(HandlerClass = CGIHTTPRequestHandler,
    373          ServerClass = BaseHTTPServer.HTTPServer):
    374     SimpleHTTPServer.test(HandlerClass, ServerClass)
    375 
    376 
    377 if __name__ == '__main__':
    378     test()
    379