1 #! /usr/bin/env python 2 # 3 # Class for profiling python code. rev 1.0 6/2/94 4 # 5 # Written by James Roskind 6 # Based on prior profile module by Sjoerd Mullender... 7 # which was hacked somewhat by: Guido van Rossum 8 9 """Class for profiling Python code.""" 10 11 # Copyright Disney Enterprises, Inc. All Rights Reserved. 12 # Licensed to PSF under a Contributor Agreement 13 # 14 # Licensed under the Apache License, Version 2.0 (the "License"); 15 # you may not use this file except in compliance with the License. 16 # You may obtain a copy of the License at 17 # 18 # http://www.apache.org/licenses/LICENSE-2.0 19 # 20 # Unless required by applicable law or agreed to in writing, software 21 # distributed under the License is distributed on an "AS IS" BASIS, 22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 23 # either express or implied. See the License for the specific language 24 # governing permissions and limitations under the License. 25 26 27 import sys 28 import os 29 import time 30 import marshal 31 from optparse import OptionParser 32 33 __all__ = ["run", "runctx", "help", "Profile"] 34 35 # Sample timer for use with 36 #i_count = 0 37 #def integer_timer(): 38 # global i_count 39 # i_count = i_count + 1 40 # return i_count 41 #itimes = integer_timer # replace with C coded timer returning integers 42 43 #************************************************************************** 44 # The following are the static member functions for the profiler class 45 # Note that an instance of Profile() is *not* needed to call them. 46 #************************************************************************** 47 48 def run(statement, filename=None, sort=-1): 49 """Run statement under profiler optionally saving results in filename 50 51 This function takes a single argument that can be passed to the 52 "exec" statement, and an optional file name. In all cases this 53 routine attempts to "exec" its first argument and gather profiling 54 statistics from the execution. If no file name is present, then this 55 function automatically prints a simple profiling report, sorted by the 56 standard name string (file/line/function-name) that is presented in 57 each line. 58 """ 59 prof = Profile() 60 try: 61 prof = prof.run(statement) 62 except SystemExit: 63 pass 64 if filename is not None: 65 prof.dump_stats(filename) 66 else: 67 return prof.print_stats(sort) 68 69 def runctx(statement, globals, locals, filename=None, sort=-1): 70 """Run statement under profiler, supplying your own globals and locals, 71 optionally saving results in filename. 72 73 statement and filename have the same semantics as profile.run 74 """ 75 prof = Profile() 76 try: 77 prof = prof.runctx(statement, globals, locals) 78 except SystemExit: 79 pass 80 81 if filename is not None: 82 prof.dump_stats(filename) 83 else: 84 return prof.print_stats(sort) 85 86 # Backwards compatibility. 87 def help(): 88 print "Documentation for the profile module can be found " 89 print "in the Python Library Reference, section 'The Python Profiler'." 90 91 if hasattr(os, "times"): 92 def _get_time_times(timer=os.times): 93 t = timer() 94 return t[0] + t[1] 95 96 # Using getrusage(3) is better than clock(3) if available: 97 # on some systems (e.g. FreeBSD), getrusage has a higher resolution 98 # Furthermore, on a POSIX system, returns microseconds, which 99 # wrap around after 36min. 100 _has_res = 0 101 try: 102 import resource 103 resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF) 104 def _get_time_resource(timer=resgetrusage): 105 t = timer() 106 return t[0] + t[1] 107 _has_res = 1 108 except ImportError: 109 pass 110 111 class Profile: 112 """Profiler class. 113 114 self.cur is always a tuple. Each such tuple corresponds to a stack 115 frame that is currently active (self.cur[-2]). The following are the 116 definitions of its members. We use this external "parallel stack" to 117 avoid contaminating the program that we are profiling. (old profiler 118 used to write into the frames local dictionary!!) Derived classes 119 can change the definition of some entries, as long as they leave 120 [-2:] intact (frame and previous tuple). In case an internal error is 121 detected, the -3 element is used as the function name. 122 123 [ 0] = Time that needs to be charged to the parent frame's function. 124 It is used so that a function call will not have to access the 125 timing data for the parent frame. 126 [ 1] = Total time spent in this frame's function, excluding time in 127 subfunctions (this latter is tallied in cur[2]). 128 [ 2] = Total time spent in subfunctions, excluding time executing the 129 frame's function (this latter is tallied in cur[1]). 130 [-3] = Name of the function that corresponds to this frame. 131 [-2] = Actual frame that we correspond to (used to sync exception handling). 132 [-1] = Our parent 6-tuple (corresponds to frame.f_back). 133 134 Timing data for each function is stored as a 5-tuple in the dictionary 135 self.timings[]. The index is always the name stored in self.cur[-3]. 136 The following are the definitions of the members: 137 138 [0] = The number of times this function was called, not counting direct 139 or indirect recursion, 140 [1] = Number of times this function appears on the stack, minus one 141 [2] = Total time spent internal to this function 142 [3] = Cumulative time that this function was present on the stack. In 143 non-recursive functions, this is the total execution time from start 144 to finish of each invocation of a function, including time spent in 145 all subfunctions. 146 [4] = A dictionary indicating for each function name, the number of times 147 it was called by us. 148 """ 149 150 bias = 0 # calibration constant 151 152 def __init__(self, timer=None, bias=None): 153 self.timings = {} 154 self.cur = None 155 self.cmd = "" 156 self.c_func_name = "" 157 158 if bias is None: 159 bias = self.bias 160 self.bias = bias # Materialize in local dict for lookup speed. 161 162 if not timer: 163 if _has_res: 164 self.timer = resgetrusage 165 self.dispatcher = self.trace_dispatch 166 self.get_time = _get_time_resource 167 elif hasattr(time, 'clock'): 168 self.timer = self.get_time = time.clock 169 self.dispatcher = self.trace_dispatch_i 170 elif hasattr(os, 'times'): 171 self.timer = os.times 172 self.dispatcher = self.trace_dispatch 173 self.get_time = _get_time_times 174 else: 175 self.timer = self.get_time = time.time 176 self.dispatcher = self.trace_dispatch_i 177 else: 178 self.timer = timer 179 t = self.timer() # test out timer function 180 try: 181 length = len(t) 182 except TypeError: 183 self.get_time = timer 184 self.dispatcher = self.trace_dispatch_i 185 else: 186 if length == 2: 187 self.dispatcher = self.trace_dispatch 188 else: 189 self.dispatcher = self.trace_dispatch_l 190 # This get_time() implementation needs to be defined 191 # here to capture the passed-in timer in the parameter 192 # list (for performance). Note that we can't assume 193 # the timer() result contains two values in all 194 # cases. 195 def get_time_timer(timer=timer, sum=sum): 196 return sum(timer()) 197 self.get_time = get_time_timer 198 self.t = self.get_time() 199 self.simulate_call('profiler') 200 201 # Heavily optimized dispatch routine for os.times() timer 202 203 def trace_dispatch(self, frame, event, arg): 204 timer = self.timer 205 t = timer() 206 t = t[0] + t[1] - self.t - self.bias 207 208 if event == "c_call": 209 self.c_func_name = arg.__name__ 210 211 if self.dispatch[event](self, frame,t): 212 t = timer() 213 self.t = t[0] + t[1] 214 else: 215 r = timer() 216 self.t = r[0] + r[1] - t # put back unrecorded delta 217 218 # Dispatch routine for best timer program (return = scalar, fastest if 219 # an integer but float works too -- and time.clock() relies on that). 220 221 def trace_dispatch_i(self, frame, event, arg): 222 timer = self.timer 223 t = timer() - self.t - self.bias 224 225 if event == "c_call": 226 self.c_func_name = arg.__name__ 227 228 if self.dispatch[event](self, frame, t): 229 self.t = timer() 230 else: 231 self.t = timer() - t # put back unrecorded delta 232 233 # Dispatch routine for macintosh (timer returns time in ticks of 234 # 1/60th second) 235 236 def trace_dispatch_mac(self, frame, event, arg): 237 timer = self.timer 238 t = timer()/60.0 - self.t - self.bias 239 240 if event == "c_call": 241 self.c_func_name = arg.__name__ 242 243 if self.dispatch[event](self, frame, t): 244 self.t = timer()/60.0 245 else: 246 self.t = timer()/60.0 - t # put back unrecorded delta 247 248 # SLOW generic dispatch routine for timer returning lists of numbers 249 250 def trace_dispatch_l(self, frame, event, arg): 251 get_time = self.get_time 252 t = get_time() - self.t - self.bias 253 254 if event == "c_call": 255 self.c_func_name = arg.__name__ 256 257 if self.dispatch[event](self, frame, t): 258 self.t = get_time() 259 else: 260 self.t = get_time() - t # put back unrecorded delta 261 262 # In the event handlers, the first 3 elements of self.cur are unpacked 263 # into vrbls w/ 3-letter names. The last two characters are meant to be 264 # mnemonic: 265 # _pt self.cur[0] "parent time" time to be charged to parent frame 266 # _it self.cur[1] "internal time" time spent directly in the function 267 # _et self.cur[2] "external time" time spent in subfunctions 268 269 def trace_dispatch_exception(self, frame, t): 270 rpt, rit, ret, rfn, rframe, rcur = self.cur 271 if (rframe is not frame) and rcur: 272 return self.trace_dispatch_return(rframe, t) 273 self.cur = rpt, rit+t, ret, rfn, rframe, rcur 274 return 1 275 276 277 def trace_dispatch_call(self, frame, t): 278 if self.cur and frame.f_back is not self.cur[-2]: 279 rpt, rit, ret, rfn, rframe, rcur = self.cur 280 if not isinstance(rframe, Profile.fake_frame): 281 assert rframe.f_back is frame.f_back, ("Bad call", rfn, 282 rframe, rframe.f_back, 283 frame, frame.f_back) 284 self.trace_dispatch_return(rframe, 0) 285 assert (self.cur is None or \ 286 frame.f_back is self.cur[-2]), ("Bad call", 287 self.cur[-3]) 288 fcode = frame.f_code 289 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name) 290 self.cur = (t, 0, 0, fn, frame, self.cur) 291 timings = self.timings 292 if fn in timings: 293 cc, ns, tt, ct, callers = timings[fn] 294 timings[fn] = cc, ns + 1, tt, ct, callers 295 else: 296 timings[fn] = 0, 0, 0, 0, {} 297 return 1 298 299 def trace_dispatch_c_call (self, frame, t): 300 fn = ("", 0, self.c_func_name) 301 self.cur = (t, 0, 0, fn, frame, self.cur) 302 timings = self.timings 303 if fn in timings: 304 cc, ns, tt, ct, callers = timings[fn] 305 timings[fn] = cc, ns+1, tt, ct, callers 306 else: 307 timings[fn] = 0, 0, 0, 0, {} 308 return 1 309 310 def trace_dispatch_return(self, frame, t): 311 if frame is not self.cur[-2]: 312 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3]) 313 self.trace_dispatch_return(self.cur[-2], 0) 314 315 # Prefix "r" means part of the Returning or exiting frame. 316 # Prefix "p" means part of the Previous or Parent or older frame. 317 318 rpt, rit, ret, rfn, frame, rcur = self.cur 319 rit = rit + t 320 frame_total = rit + ret 321 322 ppt, pit, pet, pfn, pframe, pcur = rcur 323 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur 324 325 timings = self.timings 326 cc, ns, tt, ct, callers = timings[rfn] 327 if not ns: 328 # This is the only occurrence of the function on the stack. 329 # Else this is a (directly or indirectly) recursive call, and 330 # its cumulative time will get updated when the topmost call to 331 # it returns. 332 ct = ct + frame_total 333 cc = cc + 1 334 335 if pfn in callers: 336 callers[pfn] = callers[pfn] + 1 # hack: gather more 337 # stats such as the amount of time added to ct courtesy 338 # of this specific call, and the contribution to cc 339 # courtesy of this call. 340 else: 341 callers[pfn] = 1 342 343 timings[rfn] = cc, ns - 1, tt + rit, ct, callers 344 345 return 1 346 347 348 dispatch = { 349 "call": trace_dispatch_call, 350 "exception": trace_dispatch_exception, 351 "return": trace_dispatch_return, 352 "c_call": trace_dispatch_c_call, 353 "c_exception": trace_dispatch_return, # the C function returned 354 "c_return": trace_dispatch_return, 355 } 356 357 358 # The next few functions play with self.cmd. By carefully preloading 359 # our parallel stack, we can force the profiled result to include 360 # an arbitrary string as the name of the calling function. 361 # We use self.cmd as that string, and the resulting stats look 362 # very nice :-). 363 364 def set_cmd(self, cmd): 365 if self.cur[-1]: return # already set 366 self.cmd = cmd 367 self.simulate_call(cmd) 368 369 class fake_code: 370 def __init__(self, filename, line, name): 371 self.co_filename = filename 372 self.co_line = line 373 self.co_name = name 374 self.co_firstlineno = 0 375 376 def __repr__(self): 377 return repr((self.co_filename, self.co_line, self.co_name)) 378 379 class fake_frame: 380 def __init__(self, code, prior): 381 self.f_code = code 382 self.f_back = prior 383 384 def simulate_call(self, name): 385 code = self.fake_code('profile', 0, name) 386 if self.cur: 387 pframe = self.cur[-2] 388 else: 389 pframe = None 390 frame = self.fake_frame(code, pframe) 391 self.dispatch['call'](self, frame, 0) 392 393 # collect stats from pending stack, including getting final 394 # timings for self.cmd frame. 395 396 def simulate_cmd_complete(self): 397 get_time = self.get_time 398 t = get_time() - self.t 399 while self.cur[-1]: 400 # We *can* cause assertion errors here if 401 # dispatch_trace_return checks for a frame match! 402 self.dispatch['return'](self, self.cur[-2], t) 403 t = 0 404 self.t = get_time() - t 405 406 407 def print_stats(self, sort=-1): 408 import pstats 409 pstats.Stats(self).strip_dirs().sort_stats(sort). \ 410 print_stats() 411 412 def dump_stats(self, file): 413 f = open(file, 'wb') 414 self.create_stats() 415 marshal.dump(self.stats, f) 416 f.close() 417 418 def create_stats(self): 419 self.simulate_cmd_complete() 420 self.snapshot_stats() 421 422 def snapshot_stats(self): 423 self.stats = {} 424 for func, (cc, ns, tt, ct, callers) in self.timings.iteritems(): 425 callers = callers.copy() 426 nc = 0 427 for callcnt in callers.itervalues(): 428 nc += callcnt 429 self.stats[func] = cc, nc, tt, ct, callers 430 431 432 # The following two methods can be called by clients to use 433 # a profiler to profile a statement, given as a string. 434 435 def run(self, cmd): 436 import __main__ 437 dict = __main__.__dict__ 438 return self.runctx(cmd, dict, dict) 439 440 def runctx(self, cmd, globals, locals): 441 self.set_cmd(cmd) 442 sys.setprofile(self.dispatcher) 443 try: 444 exec cmd in globals, locals 445 finally: 446 sys.setprofile(None) 447 return self 448 449 # This method is more useful to profile a single function call. 450 def runcall(self, func, *args, **kw): 451 self.set_cmd(repr(func)) 452 sys.setprofile(self.dispatcher) 453 try: 454 return func(*args, **kw) 455 finally: 456 sys.setprofile(None) 457 458 459 #****************************************************************** 460 # The following calculates the overhead for using a profiler. The 461 # problem is that it takes a fair amount of time for the profiler 462 # to stop the stopwatch (from the time it receives an event). 463 # Similarly, there is a delay from the time that the profiler 464 # re-starts the stopwatch before the user's code really gets to 465 # continue. The following code tries to measure the difference on 466 # a per-event basis. 467 # 468 # Note that this difference is only significant if there are a lot of 469 # events, and relatively little user code per event. For example, 470 # code with small functions will typically benefit from having the 471 # profiler calibrated for the current platform. This *could* be 472 # done on the fly during init() time, but it is not worth the 473 # effort. Also note that if too large a value specified, then 474 # execution time on some functions will actually appear as a 475 # negative number. It is *normal* for some functions (with very 476 # low call counts) to have such negative stats, even if the 477 # calibration figure is "correct." 478 # 479 # One alternative to profile-time calibration adjustments (i.e., 480 # adding in the magic little delta during each event) is to track 481 # more carefully the number of events (and cumulatively, the number 482 # of events during sub functions) that are seen. If this were 483 # done, then the arithmetic could be done after the fact (i.e., at 484 # display time). Currently, we track only call/return events. 485 # These values can be deduced by examining the callees and callers 486 # vectors for each functions. Hence we *can* almost correct the 487 # internal time figure at print time (note that we currently don't 488 # track exception event processing counts). Unfortunately, there 489 # is currently no similar information for cumulative sub-function 490 # time. It would not be hard to "get all this info" at profiler 491 # time. Specifically, we would have to extend the tuples to keep 492 # counts of this in each frame, and then extend the defs of timing 493 # tuples to include the significant two figures. I'm a bit fearful 494 # that this additional feature will slow the heavily optimized 495 # event/time ratio (i.e., the profiler would run slower, fur a very 496 # low "value added" feature.) 497 #************************************************************** 498 499 def calibrate(self, m, verbose=0): 500 if self.__class__ is not Profile: 501 raise TypeError("Subclasses must override .calibrate().") 502 503 saved_bias = self.bias 504 self.bias = 0 505 try: 506 return self._calibrate_inner(m, verbose) 507 finally: 508 self.bias = saved_bias 509 510 def _calibrate_inner(self, m, verbose): 511 get_time = self.get_time 512 513 # Set up a test case to be run with and without profiling. Include 514 # lots of calls, because we're trying to quantify stopwatch overhead. 515 # Do not raise any exceptions, though, because we want to know 516 # exactly how many profile events are generated (one call event, + 517 # one return event, per Python-level call). 518 519 def f1(n): 520 for i in range(n): 521 x = 1 522 523 def f(m, f1=f1): 524 for i in range(m): 525 f1(100) 526 527 f(m) # warm up the cache 528 529 # elapsed_noprofile <- time f(m) takes without profiling. 530 t0 = get_time() 531 f(m) 532 t1 = get_time() 533 elapsed_noprofile = t1 - t0 534 if verbose: 535 print "elapsed time without profiling =", elapsed_noprofile 536 537 # elapsed_profile <- time f(m) takes with profiling. The difference 538 # is profiling overhead, only some of which the profiler subtracts 539 # out on its own. 540 p = Profile() 541 t0 = get_time() 542 p.runctx('f(m)', globals(), locals()) 543 t1 = get_time() 544 elapsed_profile = t1 - t0 545 if verbose: 546 print "elapsed time with profiling =", elapsed_profile 547 548 # reported_time <- "CPU seconds" the profiler charged to f and f1. 549 total_calls = 0.0 550 reported_time = 0.0 551 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \ 552 p.timings.items(): 553 if funcname in ("f", "f1"): 554 total_calls += cc 555 reported_time += tt 556 557 if verbose: 558 print "'CPU seconds' profiler reported =", reported_time 559 print "total # calls =", total_calls 560 if total_calls != m + 1: 561 raise ValueError("internal error: total calls = %d" % total_calls) 562 563 # reported_time - elapsed_noprofile = overhead the profiler wasn't 564 # able to measure. Divide by twice the number of calls (since there 565 # are two profiler events per call in this test) to get the hidden 566 # overhead per event. 567 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls 568 if verbose: 569 print "mean stopwatch overhead per profile event =", mean 570 return mean 571 572 #**************************************************************************** 573 def Stats(*args): 574 print 'Report generating functions are in the "pstats" module\a' 575 576 def main(): 577 usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." 578 parser = OptionParser(usage=usage) 579 parser.allow_interspersed_args = False 580 parser.add_option('-o', '--outfile', dest="outfile", 581 help="Save stats to <outfile>", default=None) 582 parser.add_option('-s', '--sort', dest="sort", 583 help="Sort order when printing to stdout, based on pstats.Stats class", 584 default=-1) 585 586 if not sys.argv[1:]: 587 parser.print_usage() 588 sys.exit(2) 589 590 (options, args) = parser.parse_args() 591 sys.argv[:] = args 592 593 if len(args) > 0: 594 progname = args[0] 595 sys.path.insert(0, os.path.dirname(progname)) 596 with open(progname, 'rb') as fp: 597 code = compile(fp.read(), progname, 'exec') 598 globs = { 599 '__file__': progname, 600 '__name__': '__main__', 601 '__package__': None, 602 } 603 runctx(code, globs, None, options.outfile, options.sort) 604 else: 605 parser.print_usage() 606 return parser 607 608 # When invoked as main program, invoke the profiler on a script 609 if __name__ == '__main__': 610 main() 611