1 #! /usr/bin/env python 2 # 3 # Class for profiling python code. rev 1.0 6/2/94 4 # 5 # Based on prior profile module by Sjoerd Mullender... 6 # which was hacked somewhat by: Guido van Rossum 7 8 """Class for profiling Python code.""" 9 10 # Copyright 1994, by InfoSeek Corporation, all rights reserved. 11 # Written by James Roskind 12 # 13 # Permission to use, copy, modify, and distribute this Python software 14 # and its associated documentation for any purpose (subject to the 15 # restriction in the following sentence) without fee is hereby granted, 16 # provided that the above copyright notice appears in all copies, and 17 # that both that copyright notice and this permission notice appear in 18 # supporting documentation, and that the name of InfoSeek not be used in 19 # advertising or publicity pertaining to distribution of the software 20 # without specific, written prior permission. This permission is 21 # explicitly restricted to the copying and modification of the software 22 # to remain in Python, compiled Python, or other languages (such as C) 23 # wherein the modified or derived code is exclusively imported into a 24 # Python module. 25 # 26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY 29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER 30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF 31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 33 34 35 36 import sys 37 import os 38 import time 39 import marshal 40 from optparse import OptionParser 41 42 __all__ = ["run", "runctx", "help", "Profile"] 43 44 # Sample timer for use with 45 #i_count = 0 46 #def integer_timer(): 47 # global i_count 48 # i_count = i_count + 1 49 # return i_count 50 #itimes = integer_timer # replace with C coded timer returning integers 51 52 #************************************************************************** 53 # The following are the static member functions for the profiler class 54 # Note that an instance of Profile() is *not* needed to call them. 55 #************************************************************************** 56 57 def run(statement, filename=None, sort=-1): 58 """Run statement under profiler optionally saving results in filename 59 60 This function takes a single argument that can be passed to the 61 "exec" statement, and an optional file name. In all cases this 62 routine attempts to "exec" its first argument and gather profiling 63 statistics from the execution. If no file name is present, then this 64 function automatically prints a simple profiling report, sorted by the 65 standard name string (file/line/function-name) that is presented in 66 each line. 67 """ 68 prof = Profile() 69 try: 70 prof = prof.run(statement) 71 except SystemExit: 72 pass 73 if filename is not None: 74 prof.dump_stats(filename) 75 else: 76 return prof.print_stats(sort) 77 78 def runctx(statement, globals, locals, filename=None, sort=-1): 79 """Run statement under profiler, supplying your own globals and locals, 80 optionally saving results in filename. 81 82 statement and filename have the same semantics as profile.run 83 """ 84 prof = Profile() 85 try: 86 prof = prof.runctx(statement, globals, locals) 87 except SystemExit: 88 pass 89 90 if filename is not None: 91 prof.dump_stats(filename) 92 else: 93 return prof.print_stats(sort) 94 95 # Backwards compatibility. 96 def help(): 97 print "Documentation for the profile module can be found " 98 print "in the Python Library Reference, section 'The Python Profiler'." 99 100 if hasattr(os, "times"): 101 def _get_time_times(timer=os.times): 102 t = timer() 103 return t[0] + t[1] 104 105 # Using getrusage(3) is better than clock(3) if available: 106 # on some systems (e.g. FreeBSD), getrusage has a higher resolution 107 # Furthermore, on a POSIX system, returns microseconds, which 108 # wrap around after 36min. 109 _has_res = 0 110 try: 111 import resource 112 resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF) 113 def _get_time_resource(timer=resgetrusage): 114 t = timer() 115 return t[0] + t[1] 116 _has_res = 1 117 except ImportError: 118 pass 119 120 class Profile: 121 """Profiler class. 122 123 self.cur is always a tuple. Each such tuple corresponds to a stack 124 frame that is currently active (self.cur[-2]). The following are the 125 definitions of its members. We use this external "parallel stack" to 126 avoid contaminating the program that we are profiling. (old profiler 127 used to write into the frames local dictionary!!) Derived classes 128 can change the definition of some entries, as long as they leave 129 [-2:] intact (frame and previous tuple). In case an internal error is 130 detected, the -3 element is used as the function name. 131 132 [ 0] = Time that needs to be charged to the parent frame's function. 133 It is used so that a function call will not have to access the 134 timing data for the parent frame. 135 [ 1] = Total time spent in this frame's function, excluding time in 136 subfunctions (this latter is tallied in cur[2]). 137 [ 2] = Total time spent in subfunctions, excluding time executing the 138 frame's function (this latter is tallied in cur[1]). 139 [-3] = Name of the function that corresponds to this frame. 140 [-2] = Actual frame that we correspond to (used to sync exception handling). 141 [-1] = Our parent 6-tuple (corresponds to frame.f_back). 142 143 Timing data for each function is stored as a 5-tuple in the dictionary 144 self.timings[]. The index is always the name stored in self.cur[-3]. 145 The following are the definitions of the members: 146 147 [0] = The number of times this function was called, not counting direct 148 or indirect recursion, 149 [1] = Number of times this function appears on the stack, minus one 150 [2] = Total time spent internal to this function 151 [3] = Cumulative time that this function was present on the stack. In 152 non-recursive functions, this is the total execution time from start 153 to finish of each invocation of a function, including time spent in 154 all subfunctions. 155 [4] = A dictionary indicating for each function name, the number of times 156 it was called by us. 157 """ 158 159 bias = 0 # calibration constant 160 161 def __init__(self, timer=None, bias=None): 162 self.timings = {} 163 self.cur = None 164 self.cmd = "" 165 self.c_func_name = "" 166 167 if bias is None: 168 bias = self.bias 169 self.bias = bias # Materialize in local dict for lookup speed. 170 171 if not timer: 172 if _has_res: 173 self.timer = resgetrusage 174 self.dispatcher = self.trace_dispatch 175 self.get_time = _get_time_resource 176 elif hasattr(time, 'clock'): 177 self.timer = self.get_time = time.clock 178 self.dispatcher = self.trace_dispatch_i 179 elif hasattr(os, 'times'): 180 self.timer = os.times 181 self.dispatcher = self.trace_dispatch 182 self.get_time = _get_time_times 183 else: 184 self.timer = self.get_time = time.time 185 self.dispatcher = self.trace_dispatch_i 186 else: 187 self.timer = timer 188 t = self.timer() # test out timer function 189 try: 190 length = len(t) 191 except TypeError: 192 self.get_time = timer 193 self.dispatcher = self.trace_dispatch_i 194 else: 195 if length == 2: 196 self.dispatcher = self.trace_dispatch 197 else: 198 self.dispatcher = self.trace_dispatch_l 199 # This get_time() implementation needs to be defined 200 # here to capture the passed-in timer in the parameter 201 # list (for performance). Note that we can't assume 202 # the timer() result contains two values in all 203 # cases. 204 def get_time_timer(timer=timer, sum=sum): 205 return sum(timer()) 206 self.get_time = get_time_timer 207 self.t = self.get_time() 208 self.simulate_call('profiler') 209 210 # Heavily optimized dispatch routine for os.times() timer 211 212 def trace_dispatch(self, frame, event, arg): 213 timer = self.timer 214 t = timer() 215 t = t[0] + t[1] - self.t - self.bias 216 217 if event == "c_call": 218 self.c_func_name = arg.__name__ 219 220 if self.dispatch[event](self, frame,t): 221 t = timer() 222 self.t = t[0] + t[1] 223 else: 224 r = timer() 225 self.t = r[0] + r[1] - t # put back unrecorded delta 226 227 # Dispatch routine for best timer program (return = scalar, fastest if 228 # an integer but float works too -- and time.clock() relies on that). 229 230 def trace_dispatch_i(self, frame, event, arg): 231 timer = self.timer 232 t = timer() - self.t - self.bias 233 234 if event == "c_call": 235 self.c_func_name = arg.__name__ 236 237 if self.dispatch[event](self, frame, t): 238 self.t = timer() 239 else: 240 self.t = timer() - t # put back unrecorded delta 241 242 # Dispatch routine for macintosh (timer returns time in ticks of 243 # 1/60th second) 244 245 def trace_dispatch_mac(self, frame, event, arg): 246 timer = self.timer 247 t = timer()/60.0 - self.t - self.bias 248 249 if event == "c_call": 250 self.c_func_name = arg.__name__ 251 252 if self.dispatch[event](self, frame, t): 253 self.t = timer()/60.0 254 else: 255 self.t = timer()/60.0 - t # put back unrecorded delta 256 257 # SLOW generic dispatch routine for timer returning lists of numbers 258 259 def trace_dispatch_l(self, frame, event, arg): 260 get_time = self.get_time 261 t = get_time() - self.t - self.bias 262 263 if event == "c_call": 264 self.c_func_name = arg.__name__ 265 266 if self.dispatch[event](self, frame, t): 267 self.t = get_time() 268 else: 269 self.t = get_time() - t # put back unrecorded delta 270 271 # In the event handlers, the first 3 elements of self.cur are unpacked 272 # into vrbls w/ 3-letter names. The last two characters are meant to be 273 # mnemonic: 274 # _pt self.cur[0] "parent time" time to be charged to parent frame 275 # _it self.cur[1] "internal time" time spent directly in the function 276 # _et self.cur[2] "external time" time spent in subfunctions 277 278 def trace_dispatch_exception(self, frame, t): 279 rpt, rit, ret, rfn, rframe, rcur = self.cur 280 if (rframe is not frame) and rcur: 281 return self.trace_dispatch_return(rframe, t) 282 self.cur = rpt, rit+t, ret, rfn, rframe, rcur 283 return 1 284 285 286 def trace_dispatch_call(self, frame, t): 287 if self.cur and frame.f_back is not self.cur[-2]: 288 rpt, rit, ret, rfn, rframe, rcur = self.cur 289 if not isinstance(rframe, Profile.fake_frame): 290 assert rframe.f_back is frame.f_back, ("Bad call", rfn, 291 rframe, rframe.f_back, 292 frame, frame.f_back) 293 self.trace_dispatch_return(rframe, 0) 294 assert (self.cur is None or \ 295 frame.f_back is self.cur[-2]), ("Bad call", 296 self.cur[-3]) 297 fcode = frame.f_code 298 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name) 299 self.cur = (t, 0, 0, fn, frame, self.cur) 300 timings = self.timings 301 if fn in timings: 302 cc, ns, tt, ct, callers = timings[fn] 303 timings[fn] = cc, ns + 1, tt, ct, callers 304 else: 305 timings[fn] = 0, 0, 0, 0, {} 306 return 1 307 308 def trace_dispatch_c_call (self, frame, t): 309 fn = ("", 0, self.c_func_name) 310 self.cur = (t, 0, 0, fn, frame, self.cur) 311 timings = self.timings 312 if fn in timings: 313 cc, ns, tt, ct, callers = timings[fn] 314 timings[fn] = cc, ns+1, tt, ct, callers 315 else: 316 timings[fn] = 0, 0, 0, 0, {} 317 return 1 318 319 def trace_dispatch_return(self, frame, t): 320 if frame is not self.cur[-2]: 321 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3]) 322 self.trace_dispatch_return(self.cur[-2], 0) 323 324 # Prefix "r" means part of the Returning or exiting frame. 325 # Prefix "p" means part of the Previous or Parent or older frame. 326 327 rpt, rit, ret, rfn, frame, rcur = self.cur 328 rit = rit + t 329 frame_total = rit + ret 330 331 ppt, pit, pet, pfn, pframe, pcur = rcur 332 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur 333 334 timings = self.timings 335 cc, ns, tt, ct, callers = timings[rfn] 336 if not ns: 337 # This is the only occurrence of the function on the stack. 338 # Else this is a (directly or indirectly) recursive call, and 339 # its cumulative time will get updated when the topmost call to 340 # it returns. 341 ct = ct + frame_total 342 cc = cc + 1 343 344 if pfn in callers: 345 callers[pfn] = callers[pfn] + 1 # hack: gather more 346 # stats such as the amount of time added to ct courtesy 347 # of this specific call, and the contribution to cc 348 # courtesy of this call. 349 else: 350 callers[pfn] = 1 351 352 timings[rfn] = cc, ns - 1, tt + rit, ct, callers 353 354 return 1 355 356 357 dispatch = { 358 "call": trace_dispatch_call, 359 "exception": trace_dispatch_exception, 360 "return": trace_dispatch_return, 361 "c_call": trace_dispatch_c_call, 362 "c_exception": trace_dispatch_return, # the C function returned 363 "c_return": trace_dispatch_return, 364 } 365 366 367 # The next few functions play with self.cmd. By carefully preloading 368 # our parallel stack, we can force the profiled result to include 369 # an arbitrary string as the name of the calling function. 370 # We use self.cmd as that string, and the resulting stats look 371 # very nice :-). 372 373 def set_cmd(self, cmd): 374 if self.cur[-1]: return # already set 375 self.cmd = cmd 376 self.simulate_call(cmd) 377 378 class fake_code: 379 def __init__(self, filename, line, name): 380 self.co_filename = filename 381 self.co_line = line 382 self.co_name = name 383 self.co_firstlineno = 0 384 385 def __repr__(self): 386 return repr((self.co_filename, self.co_line, self.co_name)) 387 388 class fake_frame: 389 def __init__(self, code, prior): 390 self.f_code = code 391 self.f_back = prior 392 393 def simulate_call(self, name): 394 code = self.fake_code('profile', 0, name) 395 if self.cur: 396 pframe = self.cur[-2] 397 else: 398 pframe = None 399 frame = self.fake_frame(code, pframe) 400 self.dispatch['call'](self, frame, 0) 401 402 # collect stats from pending stack, including getting final 403 # timings for self.cmd frame. 404 405 def simulate_cmd_complete(self): 406 get_time = self.get_time 407 t = get_time() - self.t 408 while self.cur[-1]: 409 # We *can* cause assertion errors here if 410 # dispatch_trace_return checks for a frame match! 411 self.dispatch['return'](self, self.cur[-2], t) 412 t = 0 413 self.t = get_time() - t 414 415 416 def print_stats(self, sort=-1): 417 import pstats 418 pstats.Stats(self).strip_dirs().sort_stats(sort). \ 419 print_stats() 420 421 def dump_stats(self, file): 422 f = open(file, 'wb') 423 self.create_stats() 424 marshal.dump(self.stats, f) 425 f.close() 426 427 def create_stats(self): 428 self.simulate_cmd_complete() 429 self.snapshot_stats() 430 431 def snapshot_stats(self): 432 self.stats = {} 433 for func, (cc, ns, tt, ct, callers) in self.timings.iteritems(): 434 callers = callers.copy() 435 nc = 0 436 for callcnt in callers.itervalues(): 437 nc += callcnt 438 self.stats[func] = cc, nc, tt, ct, callers 439 440 441 # The following two methods can be called by clients to use 442 # a profiler to profile a statement, given as a string. 443 444 def run(self, cmd): 445 import __main__ 446 dict = __main__.__dict__ 447 return self.runctx(cmd, dict, dict) 448 449 def runctx(self, cmd, globals, locals): 450 self.set_cmd(cmd) 451 sys.setprofile(self.dispatcher) 452 try: 453 exec cmd in globals, locals 454 finally: 455 sys.setprofile(None) 456 return self 457 458 # This method is more useful to profile a single function call. 459 def runcall(self, func, *args, **kw): 460 self.set_cmd(repr(func)) 461 sys.setprofile(self.dispatcher) 462 try: 463 return func(*args, **kw) 464 finally: 465 sys.setprofile(None) 466 467 468 #****************************************************************** 469 # The following calculates the overhead for using a profiler. The 470 # problem is that it takes a fair amount of time for the profiler 471 # to stop the stopwatch (from the time it receives an event). 472 # Similarly, there is a delay from the time that the profiler 473 # re-starts the stopwatch before the user's code really gets to 474 # continue. The following code tries to measure the difference on 475 # a per-event basis. 476 # 477 # Note that this difference is only significant if there are a lot of 478 # events, and relatively little user code per event. For example, 479 # code with small functions will typically benefit from having the 480 # profiler calibrated for the current platform. This *could* be 481 # done on the fly during init() time, but it is not worth the 482 # effort. Also note that if too large a value specified, then 483 # execution time on some functions will actually appear as a 484 # negative number. It is *normal* for some functions (with very 485 # low call counts) to have such negative stats, even if the 486 # calibration figure is "correct." 487 # 488 # One alternative to profile-time calibration adjustments (i.e., 489 # adding in the magic little delta during each event) is to track 490 # more carefully the number of events (and cumulatively, the number 491 # of events during sub functions) that are seen. If this were 492 # done, then the arithmetic could be done after the fact (i.e., at 493 # display time). Currently, we track only call/return events. 494 # These values can be deduced by examining the callees and callers 495 # vectors for each functions. Hence we *can* almost correct the 496 # internal time figure at print time (note that we currently don't 497 # track exception event processing counts). Unfortunately, there 498 # is currently no similar information for cumulative sub-function 499 # time. It would not be hard to "get all this info" at profiler 500 # time. Specifically, we would have to extend the tuples to keep 501 # counts of this in each frame, and then extend the defs of timing 502 # tuples to include the significant two figures. I'm a bit fearful 503 # that this additional feature will slow the heavily optimized 504 # event/time ratio (i.e., the profiler would run slower, fur a very 505 # low "value added" feature.) 506 #************************************************************** 507 508 def calibrate(self, m, verbose=0): 509 if self.__class__ is not Profile: 510 raise TypeError("Subclasses must override .calibrate().") 511 512 saved_bias = self.bias 513 self.bias = 0 514 try: 515 return self._calibrate_inner(m, verbose) 516 finally: 517 self.bias = saved_bias 518 519 def _calibrate_inner(self, m, verbose): 520 get_time = self.get_time 521 522 # Set up a test case to be run with and without profiling. Include 523 # lots of calls, because we're trying to quantify stopwatch overhead. 524 # Do not raise any exceptions, though, because we want to know 525 # exactly how many profile events are generated (one call event, + 526 # one return event, per Python-level call). 527 528 def f1(n): 529 for i in range(n): 530 x = 1 531 532 def f(m, f1=f1): 533 for i in range(m): 534 f1(100) 535 536 f(m) # warm up the cache 537 538 # elapsed_noprofile <- time f(m) takes without profiling. 539 t0 = get_time() 540 f(m) 541 t1 = get_time() 542 elapsed_noprofile = t1 - t0 543 if verbose: 544 print "elapsed time without profiling =", elapsed_noprofile 545 546 # elapsed_profile <- time f(m) takes with profiling. The difference 547 # is profiling overhead, only some of which the profiler subtracts 548 # out on its own. 549 p = Profile() 550 t0 = get_time() 551 p.runctx('f(m)', globals(), locals()) 552 t1 = get_time() 553 elapsed_profile = t1 - t0 554 if verbose: 555 print "elapsed time with profiling =", elapsed_profile 556 557 # reported_time <- "CPU seconds" the profiler charged to f and f1. 558 total_calls = 0.0 559 reported_time = 0.0 560 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \ 561 p.timings.items(): 562 if funcname in ("f", "f1"): 563 total_calls += cc 564 reported_time += tt 565 566 if verbose: 567 print "'CPU seconds' profiler reported =", reported_time 568 print "total # calls =", total_calls 569 if total_calls != m + 1: 570 raise ValueError("internal error: total calls = %d" % total_calls) 571 572 # reported_time - elapsed_noprofile = overhead the profiler wasn't 573 # able to measure. Divide by twice the number of calls (since there 574 # are two profiler events per call in this test) to get the hidden 575 # overhead per event. 576 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls 577 if verbose: 578 print "mean stopwatch overhead per profile event =", mean 579 return mean 580 581 #**************************************************************************** 582 def Stats(*args): 583 print 'Report generating functions are in the "pstats" module\a' 584 585 def main(): 586 usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." 587 parser = OptionParser(usage=usage) 588 parser.allow_interspersed_args = False 589 parser.add_option('-o', '--outfile', dest="outfile", 590 help="Save stats to <outfile>", default=None) 591 parser.add_option('-s', '--sort', dest="sort", 592 help="Sort order when printing to stdout, based on pstats.Stats class", 593 default=-1) 594 595 if not sys.argv[1:]: 596 parser.print_usage() 597 sys.exit(2) 598 599 (options, args) = parser.parse_args() 600 sys.argv[:] = args 601 602 if len(args) > 0: 603 progname = args[0] 604 sys.path.insert(0, os.path.dirname(progname)) 605 with open(progname, 'rb') as fp: 606 code = compile(fp.read(), progname, 'exec') 607 globs = { 608 '__file__': progname, 609 '__name__': '__main__', 610 '__package__': None, 611 } 612 runctx(code, globs, None, options.outfile, options.sort) 613 else: 614 parser.print_usage() 615 return parser 616 617 # When invoked as main program, invoke the profiler on a script 618 if __name__ == '__main__': 619 main() 620