1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """A simple HTTP proxy server.""" 6 7 import BaseHTTPServer 8 import copy 9 import os 10 import socket 11 import SocketServer 12 import threading 13 import urllib 14 import urllib2 15 from urlparse import urlparse 16 17 _HOST = '127.0.0.1' 18 19 20 class _ProxyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): 21 """Request handler for the proxy server.""" 22 23 # Disables buffering, which causes problems with certain sites. 24 rbufsize = 0 25 26 def __init__(self, request, client_addr, server): 27 BaseHTTPServer.BaseHTTPRequestHandler.__init__( 28 self, request, client_addr, server) 29 30 def _GetHandler(self): 31 """GET handler for requests that will be processed by the server.""" 32 try: 33 url = urllib.urlopen(self.path, proxies={'http:' : '127.0.0.1'}) 34 except IOError, err: 35 self.wfile.write(err) 36 return 37 data = url.read() 38 self.wfile.write(data) 39 url.close() 40 41 def do_GET(self): 42 """Handles GET requests.""" 43 if self._ShouldHandleRequest(): 44 self._LogRequest() 45 self._GetHandler() 46 else: 47 self._GenericResponseHandler() 48 49 def do_CONNECT(self): 50 """Handles CONNECT requests.""" 51 self._GenericResponseHandler() 52 53 def do_HEAD(self): 54 """Handles HEAD requests.""" 55 self.do_GET() 56 57 def do_POST(self): 58 """Handles POST requests.""" 59 self.do_GET() 60 61 def do_PUT(self): 62 """Handles PUT requests.""" 63 self.do_GET() 64 65 def _GenericResponseHandler(self): 66 """Sends a dummy reponse for HTTP requests not handled by the server.""" 67 # Handle dropped connections. 68 try: 69 self.send_response(200) 70 except (socket.error, socket.gaierror): 71 return 72 contents = 'Default response given for path: %s' % self.path 73 self.send_header('Content-Type', 'text/html') 74 self.send_header('Content-Length', len(contents)) 75 self.end_headers() 76 if (self.command != 'HEAD'): 77 self.wfile.write(contents) 78 79 def _ShouldHandleRequest(self): 80 """Determines if a request should be processed by the server.""" 81 if self.server.ShouldHandleAllRequests(): 82 return True 83 (scheme, netloc, path, params, query, flag) = urlparse(self.path, 'http') 84 paths = self.server.GetPaths() 85 if(any([netloc.find(url) >= 0 for url in paths]) or 86 any([self.path.find(url) >= 0 for url in paths])): 87 return True 88 return False 89 90 def _LogRequest(self): 91 """Logs requests handled by the server to a buffer.""" 92 self.server.AddHandledRequest(self.requestline) 93 94 def log_request(self, *args, **kwargs): 95 """Overridden base class method that disables request logging.""" 96 pass 97 98 99 class ProxyServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer): 100 """Creates a threaded proxy server.""" 101 102 def __init__(self, port=0, paths=[], handle_all=True): 103 """Initializes proxy server settings. 104 105 Args: 106 port: Server port number. If zero, the server will select a free port. 107 paths: A list containing urls the server will process. If |handle_all| is 108 False, the server will only process urls in this list. URLs should be 109 passed as follows: ['http://www.google.com', '...',]. 110 handle_all: Flag that determines if the server will process all requests. 111 """ 112 BaseHTTPServer.HTTPServer.__init__( 113 self, (_HOST, port), _ProxyRequestHandler, True) 114 self._stopped = False 115 self._serving = False 116 self._lock = threading.RLock() 117 self._paths = list(paths) 118 self._handle_all = handle_all 119 self._handled_requests = [] 120 self.StartServer() 121 122 def GetPort(self): 123 """Returns the port number the server is serving on.""" 124 return self.server_port 125 126 def StartServer(self): 127 """Starts the proxy server in a new thread.""" 128 if self._stopped: 129 raise RuntimeError('Cannot restart server.') 130 if not self._serving: 131 self._serving = True 132 thread = WorkerThread(self) 133 thread.start() 134 135 def Shutdown(self): 136 """Shuts down the server.""" 137 if not self._serving: 138 raise RuntimeError('Server is currently inactive.') 139 self._serving = False 140 self._stopped = True 141 try: 142 urllib2.urlopen('http://%s:%s' % (self.server_name, self.server_port)) 143 except urllib2.URLError: 144 pass 145 self.server_close() 146 147 def handle_request(self): 148 """Handles requests while the |_serving| flag is True.""" 149 while self._serving: 150 BaseHTTPServer.HTTPServer.handle_request(self) 151 152 def ShouldHandleAllRequests(self): 153 """Determines if server should handle all requests.""" 154 return self._handle_all 155 156 def AddHandledRequest(self, request): 157 """Appends requests handled by the server to |_handled_requests|.""" 158 try: 159 self._lock.acquire() 160 self._handled_requests.append(request) 161 finally: 162 self._lock.release() 163 164 def GetHandledRequests(self): 165 """Returns requests handled by the server.""" 166 try: 167 self._lock.acquire() 168 return copy.deepcopy(self._handled_requests) 169 finally: 170 self._lock.release() 171 172 def GetPaths(self): 173 """Returns list of urls that will be handled by the server.""" 174 return self._paths 175 176 177 class WorkerThread(threading.Thread): 178 """Creates a worker thread.""" 179 180 def __init__(self, server): 181 threading.Thread.__init__(self) 182 self._server = server 183 184 def run(self): 185 """Overridden base class method.""" 186 print 'Serving on port: %s' % self._server.server_port 187 self._server.daemon_threads = True 188 self._server.handle_request() 189