1 """CGI-savvy HTTP Server. 2 3 This module builds on SimpleHTTPServer by implementing GET and POST 4 requests to cgi-bin scripts. 5 6 If the os.fork() function is not present (e.g. on Windows), 7 os.popen2() is used as a fallback, with slightly altered semantics; if 8 that function is not present either (e.g. on Macintosh), only Python 9 scripts are supported, and they are executed by the current process. 10 11 In all cases, the implementation is intentionally naive -- all 12 requests are executed sychronously. 13 14 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 15 -- it may execute arbitrary Python code or external programs. 16 17 Note that status code 200 is sent prior to execution of a CGI script, so 18 scripts cannot send other status codes such as 302 (redirect). 19 """ 20 21 22 __version__ = "0.4" 23 24 __all__ = ["CGIHTTPRequestHandler"] 25 26 import os 27 import sys 28 import urllib 29 import BaseHTTPServer 30 import SimpleHTTPServer 31 import select 32 import copy 33 34 35 class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): 36 37 """Complete HTTP server with GET, HEAD and POST commands. 38 39 GET and HEAD also support running CGI scripts. 40 41 The POST command is *only* implemented for CGI scripts. 42 43 """ 44 45 # Determine platform specifics 46 have_fork = hasattr(os, 'fork') 47 have_popen2 = hasattr(os, 'popen2') 48 have_popen3 = hasattr(os, 'popen3') 49 50 # Make rfile unbuffered -- we need to read one line and then pass 51 # the rest to a subprocess, so we can't use buffered input. 52 rbufsize = 0 53 54 def do_POST(self): 55 """Serve a POST request. 56 57 This is only implemented for CGI scripts. 58 59 """ 60 61 if self.is_cgi(): 62 self.run_cgi() 63 else: 64 self.send_error(501, "Can only POST to CGI scripts") 65 66 def send_head(self): 67 """Version of send_head that support CGI scripts""" 68 if self.is_cgi(): 69 return self.run_cgi() 70 else: 71 return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) 72 73 def is_cgi(self): 74 """Test whether self.path corresponds to a CGI script. 75 76 Returns True and updates the cgi_info attribute to the tuple 77 (dir, rest) if self.path requires running a CGI script. 78 Returns False otherwise. 79 80 If any exception is raised, the caller should assume that 81 self.path was rejected as invalid and act accordingly. 82 83 The default implementation tests whether the normalized url 84 path begins with one of the strings in self.cgi_directories 85 (and the next character is a '/' or the end of the string). 86 """ 87 collapsed_path = _url_collapse_path(self.path) 88 dir_sep = collapsed_path.find('/', 1) 89 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 90 if head in self.cgi_directories: 91 self.cgi_info = head, tail 92 return True 93 return False 94 95 cgi_directories = ['/cgi-bin', '/htbin'] 96 97 def is_executable(self, path): 98 """Test whether argument path is an executable file.""" 99 return executable(path) 100 101 def is_python(self, path): 102 """Test whether argument path is a Python script.""" 103 head, tail = os.path.splitext(path) 104 return tail.lower() in (".py", ".pyw") 105 106 def run_cgi(self): 107 """Execute a CGI script.""" 108 path = self.path 109 dir, rest = self.cgi_info 110 111 i = path.find('/', len(dir) + 1) 112 while i >= 0: 113 nextdir = path[:i] 114 nextrest = path[i+1:] 115 116 scriptdir = self.translate_path(nextdir) 117 if os.path.isdir(scriptdir): 118 dir, rest = nextdir, nextrest 119 i = path.find('/', len(dir) + 1) 120 else: 121 break 122 123 # find an explicit query string, if present. 124 i = rest.rfind('?') 125 if i >= 0: 126 rest, query = rest[:i], rest[i+1:] 127 else: 128 query = '' 129 130 # dissect the part after the directory name into a script name & 131 # a possible additional path, to be stored in PATH_INFO. 132 i = rest.find('/') 133 if i >= 0: 134 script, rest = rest[:i], rest[i:] 135 else: 136 script, rest = rest, '' 137 138 scriptname = dir + '/' + script 139 scriptfile = self.translate_path(scriptname) 140 if not os.path.exists(scriptfile): 141 self.send_error(404, "No such CGI script (%r)" % scriptname) 142 return 143 if not os.path.isfile(scriptfile): 144 self.send_error(403, "CGI script is not a plain file (%r)" % 145 scriptname) 146 return 147 ispy = self.is_python(scriptname) 148 if not ispy: 149 if not (self.have_fork or self.have_popen2 or self.have_popen3): 150 self.send_error(403, "CGI script is not a Python script (%r)" % 151 scriptname) 152 return 153 if not self.is_executable(scriptfile): 154 self.send_error(403, "CGI script is not executable (%r)" % 155 scriptname) 156 return 157 158 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 159 # XXX Much of the following could be prepared ahead of time! 160 env = copy.deepcopy(os.environ) 161 env['SERVER_SOFTWARE'] = self.version_string() 162 env['SERVER_NAME'] = self.server.server_name 163 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 164 env['SERVER_PROTOCOL'] = self.protocol_version 165 env['SERVER_PORT'] = str(self.server.server_port) 166 env['REQUEST_METHOD'] = self.command 167 uqrest = urllib.unquote(rest) 168 env['PATH_INFO'] = uqrest 169 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 170 env['SCRIPT_NAME'] = scriptname 171 if query: 172 env['QUERY_STRING'] = query 173 host = self.address_string() 174 if host != self.client_address[0]: 175 env['REMOTE_HOST'] = host 176 env['REMOTE_ADDR'] = self.client_address[0] 177 authorization = self.headers.getheader("authorization") 178 if authorization: 179 authorization = authorization.split() 180 if len(authorization) == 2: 181 import base64, binascii 182 env['AUTH_TYPE'] = authorization[0] 183 if authorization[0].lower() == "basic": 184 try: 185 authorization = base64.decodestring(authorization[1]) 186 except binascii.Error: 187 pass 188 else: 189 authorization = authorization.split(':') 190 if len(authorization) == 2: 191 env['REMOTE_USER'] = authorization[0] 192 # XXX REMOTE_IDENT 193 if self.headers.typeheader is None: 194 env['CONTENT_TYPE'] = self.headers.type 195 else: 196 env['CONTENT_TYPE'] = self.headers.typeheader 197 length = self.headers.getheader('content-length') 198 if length: 199 env['CONTENT_LENGTH'] = length 200 referer = self.headers.getheader('referer') 201 if referer: 202 env['HTTP_REFERER'] = referer 203 accept = [] 204 for line in self.headers.getallmatchingheaders('accept'): 205 if line[:1] in "\t\n\r ": 206 accept.append(line.strip()) 207 else: 208 accept = accept + line[7:].split(',') 209 env['HTTP_ACCEPT'] = ','.join(accept) 210 ua = self.headers.getheader('user-agent') 211 if ua: 212 env['HTTP_USER_AGENT'] = ua 213 co = filter(None, self.headers.getheaders('cookie')) 214 if co: 215 env['HTTP_COOKIE'] = ', '.join(co) 216 # XXX Other HTTP_* headers 217 # Since we're setting the env in the parent, provide empty 218 # values to override previously set values 219 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 220 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 221 env.setdefault(k, "") 222 223 self.send_response(200, "Script output follows") 224 225 decoded_query = query.replace('+', ' ') 226 227 if self.have_fork: 228 # Unix -- fork as we should 229 args = [script] 230 if '=' not in decoded_query: 231 args.append(decoded_query) 232 nobody = nobody_uid() 233 self.wfile.flush() # Always flush before forking 234 pid = os.fork() 235 if pid != 0: 236 # Parent 237 pid, sts = os.waitpid(pid, 0) 238 # throw away additional data [see bug #427345] 239 while select.select([self.rfile], [], [], 0)[0]: 240 if not self.rfile.read(1): 241 break 242 if sts: 243 self.log_error("CGI script exit status %#x", sts) 244 return 245 # Child 246 try: 247 try: 248 os.setuid(nobody) 249 except os.error: 250 pass 251 os.dup2(self.rfile.fileno(), 0) 252 os.dup2(self.wfile.fileno(), 1) 253 os.execve(scriptfile, args, env) 254 except: 255 self.server.handle_error(self.request, self.client_address) 256 os._exit(127) 257 258 else: 259 # Non Unix - use subprocess 260 import subprocess 261 cmdline = [scriptfile] 262 if self.is_python(scriptfile): 263 interp = sys.executable 264 if interp.lower().endswith("w.exe"): 265 # On Windows, use python.exe, not pythonw.exe 266 interp = interp[:-5] + interp[-4:] 267 cmdline = [interp, '-u'] + cmdline 268 if '=' not in query: 269 cmdline.append(query) 270 271 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 272 try: 273 nbytes = int(length) 274 except (TypeError, ValueError): 275 nbytes = 0 276 p = subprocess.Popen(cmdline, 277 stdin = subprocess.PIPE, 278 stdout = subprocess.PIPE, 279 stderr = subprocess.PIPE, 280 env = env 281 ) 282 if self.command.lower() == "post" and nbytes > 0: 283 data = self.rfile.read(nbytes) 284 else: 285 data = None 286 # throw away additional data [see bug #427345] 287 while select.select([self.rfile._sock], [], [], 0)[0]: 288 if not self.rfile._sock.recv(1): 289 break 290 stdout, stderr = p.communicate(data) 291 self.wfile.write(stdout) 292 if stderr: 293 self.log_error('%s', stderr) 294 p.stderr.close() 295 p.stdout.close() 296 status = p.returncode 297 if status: 298 self.log_error("CGI script exit status %#x", status) 299 else: 300 self.log_message("CGI script exited OK") 301 302 303 def _url_collapse_path(path): 304 """ 305 Given a URL path, remove extra '/'s and '.' path elements and collapse 306 any '..' references and returns a colllapsed path. 307 308 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 309 The utility of this function is limited to is_cgi method and helps 310 preventing some security attacks. 311 312 Returns: A tuple of (head, tail) where tail is everything after the final / 313 and head is everything before it. Head will always start with a '/' and, 314 if it contains anything else, never have a trailing '/'. 315 316 Raises: IndexError if too many '..' occur within the path. 317 318 """ 319 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 320 # path semantics rather than local operating system semantics. 321 path_parts = path.split('/') 322 head_parts = [] 323 for part in path_parts[:-1]: 324 if part == '..': 325 head_parts.pop() # IndexError if more '..' than prior parts 326 elif part and part != '.': 327 head_parts.append( part ) 328 if path_parts: 329 tail_part = path_parts.pop() 330 if tail_part: 331 if tail_part == '..': 332 head_parts.pop() 333 tail_part = '' 334 elif tail_part == '.': 335 tail_part = '' 336 else: 337 tail_part = '' 338 339 splitpath = ('/' + '/'.join(head_parts), tail_part) 340 collapsed_path = "/".join(splitpath) 341 342 return collapsed_path 343 344 345 nobody = None 346 347 def nobody_uid(): 348 """Internal routine to get nobody's uid""" 349 global nobody 350 if nobody: 351 return nobody 352 try: 353 import pwd 354 except ImportError: 355 return -1 356 try: 357 nobody = pwd.getpwnam('nobody')[2] 358 except KeyError: 359 nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) 360 return nobody 361 362 363 def executable(path): 364 """Test for executable file.""" 365 try: 366 st = os.stat(path) 367 except os.error: 368 return False 369 return st.st_mode & 0111 != 0 370 371 372 def test(HandlerClass = CGIHTTPRequestHandler, 373 ServerClass = BaseHTTPServer.HTTPServer): 374 SimpleHTTPServer.test(HandlerClass, ServerClass) 375 376 377 if __name__ == '__main__': 378 test() 379