Home | History | Annotate | Download | only in Lib
      1 #! /usr/bin/env python

      2 #

      3 # Class for profiling python code. rev 1.0  6/2/94

      4 #

      5 # Based on prior profile module by Sjoerd Mullender...

      6 #   which was hacked somewhat by: Guido van Rossum

      7 
      8 """Class for profiling Python code."""
      9 
     10 # Copyright 1994, by InfoSeek Corporation, all rights reserved.

     11 # Written by James Roskind

     12 #

     13 # Permission to use, copy, modify, and distribute this Python software

     14 # and its associated documentation for any purpose (subject to the

     15 # restriction in the following sentence) without fee is hereby granted,

     16 # provided that the above copyright notice appears in all copies, and

     17 # that both that copyright notice and this permission notice appear in

     18 # supporting documentation, and that the name of InfoSeek not be used in

     19 # advertising or publicity pertaining to distribution of the software

     20 # without specific, written prior permission.  This permission is

     21 # explicitly restricted to the copying and modification of the software

     22 # to remain in Python, compiled Python, or other languages (such as C)

     23 # wherein the modified or derived code is exclusively imported into a

     24 # Python module.

     25 #

     26 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS

     27 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND

     28 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY

     29 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER

     30 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF

     31 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN

     32 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     33 
     34 
     35 
     36 import sys
     37 import os
     38 import time
     39 import marshal
     40 from optparse import OptionParser
     41 
     42 __all__ = ["run", "runctx", "help", "Profile"]
     43 
     44 # Sample timer for use with

     45 #i_count = 0

     46 #def integer_timer():

     47 #       global i_count

     48 #       i_count = i_count + 1

     49 #       return i_count

     50 #itimes = integer_timer # replace with C coded timer returning integers

     51 
     52 #**************************************************************************

     53 # The following are the static member functions for the profiler class

     54 # Note that an instance of Profile() is *not* needed to call them.

     55 #**************************************************************************

     56 
     57 def run(statement, filename=None, sort=-1):
     58     """Run statement under profiler optionally saving results in filename
     59 
     60     This function takes a single argument that can be passed to the
     61     "exec" statement, and an optional file name.  In all cases this
     62     routine attempts to "exec" its first argument and gather profiling
     63     statistics from the execution. If no file name is present, then this
     64     function automatically prints a simple profiling report, sorted by the
     65     standard name string (file/line/function-name) that is presented in
     66     each line.
     67     """
     68     prof = Profile()
     69     try:
     70         prof = prof.run(statement)
     71     except SystemExit:
     72         pass
     73     if filename is not None:
     74         prof.dump_stats(filename)
     75     else:
     76         return prof.print_stats(sort)
     77 
     78 def runctx(statement, globals, locals, filename=None, sort=-1):
     79     """Run statement under profiler, supplying your own globals and locals,
     80     optionally saving results in filename.
     81 
     82     statement and filename have the same semantics as profile.run
     83     """
     84     prof = Profile()
     85     try:
     86         prof = prof.runctx(statement, globals, locals)
     87     except SystemExit:
     88         pass
     89 
     90     if filename is not None:
     91         prof.dump_stats(filename)
     92     else:
     93         return prof.print_stats(sort)
     94 
     95 # Backwards compatibility.

     96 def help():
     97     print "Documentation for the profile module can be found "
     98     print "in the Python Library Reference, section 'The Python Profiler'."
     99 
    100 if hasattr(os, "times"):
    101     def _get_time_times(timer=os.times):
    102         t = timer()
    103         return t[0] + t[1]
    104 
    105 # Using getrusage(3) is better than clock(3) if available:

    106 # on some systems (e.g. FreeBSD), getrusage has a higher resolution

    107 # Furthermore, on a POSIX system, returns microseconds, which

    108 # wrap around after 36min.

    109 _has_res = 0
    110 try:
    111     import resource
    112     resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
    113     def _get_time_resource(timer=resgetrusage):
    114         t = timer()
    115         return t[0] + t[1]
    116     _has_res = 1
    117 except ImportError:
    118     pass
    119 
    120 class Profile:
    121     """Profiler class.
    122 
    123     self.cur is always a tuple.  Each such tuple corresponds to a stack
    124     frame that is currently active (self.cur[-2]).  The following are the
    125     definitions of its members.  We use this external "parallel stack" to
    126     avoid contaminating the program that we are profiling. (old profiler
    127     used to write into the frames local dictionary!!) Derived classes
    128     can change the definition of some entries, as long as they leave
    129     [-2:] intact (frame and previous tuple).  In case an internal error is
    130     detected, the -3 element is used as the function name.
    131 
    132     [ 0] = Time that needs to be charged to the parent frame's function.
    133            It is used so that a function call will not have to access the
    134            timing data for the parent frame.
    135     [ 1] = Total time spent in this frame's function, excluding time in
    136            subfunctions (this latter is tallied in cur[2]).
    137     [ 2] = Total time spent in subfunctions, excluding time executing the
    138            frame's function (this latter is tallied in cur[1]).
    139     [-3] = Name of the function that corresponds to this frame.
    140     [-2] = Actual frame that we correspond to (used to sync exception handling).
    141     [-1] = Our parent 6-tuple (corresponds to frame.f_back).
    142 
    143     Timing data for each function is stored as a 5-tuple in the dictionary
    144     self.timings[].  The index is always the name stored in self.cur[-3].
    145     The following are the definitions of the members:
    146 
    147     [0] = The number of times this function was called, not counting direct
    148           or indirect recursion,
    149     [1] = Number of times this function appears on the stack, minus one
    150     [2] = Total time spent internal to this function
    151     [3] = Cumulative time that this function was present on the stack.  In
    152           non-recursive functions, this is the total execution time from start
    153           to finish of each invocation of a function, including time spent in
    154           all subfunctions.
    155     [4] = A dictionary indicating for each function name, the number of times
    156           it was called by us.
    157     """
    158 
    159     bias = 0  # calibration constant

    160 
    161     def __init__(self, timer=None, bias=None):
    162         self.timings = {}
    163         self.cur = None
    164         self.cmd = ""
    165         self.c_func_name = ""
    166 
    167         if bias is None:
    168             bias = self.bias
    169         self.bias = bias     # Materialize in local dict for lookup speed.

    170 
    171         if not timer:
    172             if _has_res:
    173                 self.timer = resgetrusage
    174                 self.dispatcher = self.trace_dispatch
    175                 self.get_time = _get_time_resource
    176             elif hasattr(time, 'clock'):
    177                 self.timer = self.get_time = time.clock
    178                 self.dispatcher = self.trace_dispatch_i
    179             elif hasattr(os, 'times'):
    180                 self.timer = os.times
    181                 self.dispatcher = self.trace_dispatch
    182                 self.get_time = _get_time_times
    183             else:
    184                 self.timer = self.get_time = time.time
    185                 self.dispatcher = self.trace_dispatch_i
    186         else:
    187             self.timer = timer
    188             t = self.timer() # test out timer function

    189             try:
    190                 length = len(t)
    191             except TypeError:
    192                 self.get_time = timer
    193                 self.dispatcher = self.trace_dispatch_i
    194             else:
    195                 if length == 2:
    196                     self.dispatcher = self.trace_dispatch
    197                 else:
    198                     self.dispatcher = self.trace_dispatch_l
    199                 # This get_time() implementation needs to be defined

    200                 # here to capture the passed-in timer in the parameter

    201                 # list (for performance).  Note that we can't assume

    202                 # the timer() result contains two values in all

    203                 # cases.

    204                 def get_time_timer(timer=timer, sum=sum):
    205                     return sum(timer())
    206                 self.get_time = get_time_timer
    207         self.t = self.get_time()
    208         self.simulate_call('profiler')
    209 
    210     # Heavily optimized dispatch routine for os.times() timer

    211 
    212     def trace_dispatch(self, frame, event, arg):
    213         timer = self.timer
    214         t = timer()
    215         t = t[0] + t[1] - self.t - self.bias
    216 
    217         if event == "c_call":
    218             self.c_func_name = arg.__name__
    219 
    220         if self.dispatch[event](self, frame,t):
    221             t = timer()
    222             self.t = t[0] + t[1]
    223         else:
    224             r = timer()
    225             self.t = r[0] + r[1] - t # put back unrecorded delta

    226 
    227     # Dispatch routine for best timer program (return = scalar, fastest if

    228     # an integer but float works too -- and time.clock() relies on that).

    229 
    230     def trace_dispatch_i(self, frame, event, arg):
    231         timer = self.timer
    232         t = timer() - self.t - self.bias
    233 
    234         if event == "c_call":
    235             self.c_func_name = arg.__name__
    236 
    237         if self.dispatch[event](self, frame, t):
    238             self.t = timer()
    239         else:
    240             self.t = timer() - t  # put back unrecorded delta

    241 
    242     # Dispatch routine for macintosh (timer returns time in ticks of

    243     # 1/60th second)

    244 
    245     def trace_dispatch_mac(self, frame, event, arg):
    246         timer = self.timer
    247         t = timer()/60.0 - self.t - self.bias
    248 
    249         if event == "c_call":
    250             self.c_func_name = arg.__name__
    251 
    252         if self.dispatch[event](self, frame, t):
    253             self.t = timer()/60.0
    254         else:
    255             self.t = timer()/60.0 - t  # put back unrecorded delta

    256 
    257     # SLOW generic dispatch routine for timer returning lists of numbers

    258 
    259     def trace_dispatch_l(self, frame, event, arg):
    260         get_time = self.get_time
    261         t = get_time() - self.t - self.bias
    262 
    263         if event == "c_call":
    264             self.c_func_name = arg.__name__
    265 
    266         if self.dispatch[event](self, frame, t):
    267             self.t = get_time()
    268         else:
    269             self.t = get_time() - t # put back unrecorded delta

    270 
    271     # In the event handlers, the first 3 elements of self.cur are unpacked

    272     # into vrbls w/ 3-letter names.  The last two characters are meant to be

    273     # mnemonic:

    274     #     _pt  self.cur[0] "parent time"   time to be charged to parent frame

    275     #     _it  self.cur[1] "internal time" time spent directly in the function

    276     #     _et  self.cur[2] "external time" time spent in subfunctions

    277 
    278     def trace_dispatch_exception(self, frame, t):
    279         rpt, rit, ret, rfn, rframe, rcur = self.cur
    280         if (rframe is not frame) and rcur:
    281             return self.trace_dispatch_return(rframe, t)
    282         self.cur = rpt, rit+t, ret, rfn, rframe, rcur
    283         return 1
    284 
    285 
    286     def trace_dispatch_call(self, frame, t):
    287         if self.cur and frame.f_back is not self.cur[-2]:
    288             rpt, rit, ret, rfn, rframe, rcur = self.cur
    289             if not isinstance(rframe, Profile.fake_frame):
    290                 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
    291                                                        rframe, rframe.f_back,
    292                                                        frame, frame.f_back)
    293                 self.trace_dispatch_return(rframe, 0)
    294                 assert (self.cur is None or \
    295                         frame.f_back is self.cur[-2]), ("Bad call",
    296                                                         self.cur[-3])
    297         fcode = frame.f_code
    298         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
    299         self.cur = (t, 0, 0, fn, frame, self.cur)
    300         timings = self.timings
    301         if fn in timings:
    302             cc, ns, tt, ct, callers = timings[fn]
    303             timings[fn] = cc, ns + 1, tt, ct, callers
    304         else:
    305             timings[fn] = 0, 0, 0, 0, {}
    306         return 1
    307 
    308     def trace_dispatch_c_call (self, frame, t):
    309         fn = ("", 0, self.c_func_name)
    310         self.cur = (t, 0, 0, fn, frame, self.cur)
    311         timings = self.timings
    312         if fn in timings:
    313             cc, ns, tt, ct, callers = timings[fn]
    314             timings[fn] = cc, ns+1, tt, ct, callers
    315         else:
    316             timings[fn] = 0, 0, 0, 0, {}
    317         return 1
    318 
    319     def trace_dispatch_return(self, frame, t):
    320         if frame is not self.cur[-2]:
    321             assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
    322             self.trace_dispatch_return(self.cur[-2], 0)
    323 
    324         # Prefix "r" means part of the Returning or exiting frame.

    325         # Prefix "p" means part of the Previous or Parent or older frame.

    326 
    327         rpt, rit, ret, rfn, frame, rcur = self.cur
    328         rit = rit + t
    329         frame_total = rit + ret
    330 
    331         ppt, pit, pet, pfn, pframe, pcur = rcur
    332         self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
    333 
    334         timings = self.timings
    335         cc, ns, tt, ct, callers = timings[rfn]
    336         if not ns:
    337             # This is the only occurrence of the function on the stack.

    338             # Else this is a (directly or indirectly) recursive call, and

    339             # its cumulative time will get updated when the topmost call to

    340             # it returns.

    341             ct = ct + frame_total
    342             cc = cc + 1
    343 
    344         if pfn in callers:
    345             callers[pfn] = callers[pfn] + 1  # hack: gather more

    346             # stats such as the amount of time added to ct courtesy

    347             # of this specific call, and the contribution to cc

    348             # courtesy of this call.

    349         else:
    350             callers[pfn] = 1
    351 
    352         timings[rfn] = cc, ns - 1, tt + rit, ct, callers
    353 
    354         return 1
    355 
    356 
    357     dispatch = {
    358         "call": trace_dispatch_call,
    359         "exception": trace_dispatch_exception,
    360         "return": trace_dispatch_return,
    361         "c_call": trace_dispatch_c_call,
    362         "c_exception": trace_dispatch_return,  # the C function returned

    363         "c_return": trace_dispatch_return,
    364         }
    365 
    366 
    367     # The next few functions play with self.cmd. By carefully preloading

    368     # our parallel stack, we can force the profiled result to include

    369     # an arbitrary string as the name of the calling function.

    370     # We use self.cmd as that string, and the resulting stats look

    371     # very nice :-).

    372 
    373     def set_cmd(self, cmd):
    374         if self.cur[-1]: return   # already set

    375         self.cmd = cmd
    376         self.simulate_call(cmd)
    377 
    378     class fake_code:
    379         def __init__(self, filename, line, name):
    380             self.co_filename = filename
    381             self.co_line = line
    382             self.co_name = name
    383             self.co_firstlineno = 0
    384 
    385         def __repr__(self):
    386             return repr((self.co_filename, self.co_line, self.co_name))
    387 
    388     class fake_frame:
    389         def __init__(self, code, prior):
    390             self.f_code = code
    391             self.f_back = prior
    392 
    393     def simulate_call(self, name):
    394         code = self.fake_code('profile', 0, name)
    395         if self.cur:
    396             pframe = self.cur[-2]
    397         else:
    398             pframe = None
    399         frame = self.fake_frame(code, pframe)
    400         self.dispatch['call'](self, frame, 0)
    401 
    402     # collect stats from pending stack, including getting final

    403     # timings for self.cmd frame.

    404 
    405     def simulate_cmd_complete(self):
    406         get_time = self.get_time
    407         t = get_time() - self.t
    408         while self.cur[-1]:
    409             # We *can* cause assertion errors here if

    410             # dispatch_trace_return checks for a frame match!

    411             self.dispatch['return'](self, self.cur[-2], t)
    412             t = 0
    413         self.t = get_time() - t
    414 
    415 
    416     def print_stats(self, sort=-1):
    417         import pstats
    418         pstats.Stats(self).strip_dirs().sort_stats(sort). \
    419                   print_stats()
    420 
    421     def dump_stats(self, file):
    422         f = open(file, 'wb')
    423         self.create_stats()
    424         marshal.dump(self.stats, f)
    425         f.close()
    426 
    427     def create_stats(self):
    428         self.simulate_cmd_complete()
    429         self.snapshot_stats()
    430 
    431     def snapshot_stats(self):
    432         self.stats = {}
    433         for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
    434             callers = callers.copy()
    435             nc = 0
    436             for callcnt in callers.itervalues():
    437                 nc += callcnt
    438             self.stats[func] = cc, nc, tt, ct, callers
    439 
    440 
    441     # The following two methods can be called by clients to use

    442     # a profiler to profile a statement, given as a string.

    443 
    444     def run(self, cmd):
    445         import __main__
    446         dict = __main__.__dict__
    447         return self.runctx(cmd, dict, dict)
    448 
    449     def runctx(self, cmd, globals, locals):
    450         self.set_cmd(cmd)
    451         sys.setprofile(self.dispatcher)
    452         try:
    453             exec cmd in globals, locals
    454         finally:
    455             sys.setprofile(None)
    456         return self
    457 
    458     # This method is more useful to profile a single function call.

    459     def runcall(self, func, *args, **kw):
    460         self.set_cmd(repr(func))
    461         sys.setprofile(self.dispatcher)
    462         try:
    463             return func(*args, **kw)
    464         finally:
    465             sys.setprofile(None)
    466 
    467 
    468     #******************************************************************

    469     # The following calculates the overhead for using a profiler.  The

    470     # problem is that it takes a fair amount of time for the profiler

    471     # to stop the stopwatch (from the time it receives an event).

    472     # Similarly, there is a delay from the time that the profiler

    473     # re-starts the stopwatch before the user's code really gets to

    474     # continue.  The following code tries to measure the difference on

    475     # a per-event basis.

    476     #

    477     # Note that this difference is only significant if there are a lot of

    478     # events, and relatively little user code per event.  For example,

    479     # code with small functions will typically benefit from having the

    480     # profiler calibrated for the current platform.  This *could* be

    481     # done on the fly during init() time, but it is not worth the

    482     # effort.  Also note that if too large a value specified, then

    483     # execution time on some functions will actually appear as a

    484     # negative number.  It is *normal* for some functions (with very

    485     # low call counts) to have such negative stats, even if the

    486     # calibration figure is "correct."

    487     #

    488     # One alternative to profile-time calibration adjustments (i.e.,

    489     # adding in the magic little delta during each event) is to track

    490     # more carefully the number of events (and cumulatively, the number

    491     # of events during sub functions) that are seen.  If this were

    492     # done, then the arithmetic could be done after the fact (i.e., at

    493     # display time).  Currently, we track only call/return events.

    494     # These values can be deduced by examining the callees and callers

    495     # vectors for each functions.  Hence we *can* almost correct the

    496     # internal time figure at print time (note that we currently don't

    497     # track exception event processing counts).  Unfortunately, there

    498     # is currently no similar information for cumulative sub-function

    499     # time.  It would not be hard to "get all this info" at profiler

    500     # time.  Specifically, we would have to extend the tuples to keep

    501     # counts of this in each frame, and then extend the defs of timing

    502     # tuples to include the significant two figures. I'm a bit fearful

    503     # that this additional feature will slow the heavily optimized

    504     # event/time ratio (i.e., the profiler would run slower, fur a very

    505     # low "value added" feature.)

    506     #**************************************************************

    507 
    508     def calibrate(self, m, verbose=0):
    509         if self.__class__ is not Profile:
    510             raise TypeError("Subclasses must override .calibrate().")
    511 
    512         saved_bias = self.bias
    513         self.bias = 0
    514         try:
    515             return self._calibrate_inner(m, verbose)
    516         finally:
    517             self.bias = saved_bias
    518 
    519     def _calibrate_inner(self, m, verbose):
    520         get_time = self.get_time
    521 
    522         # Set up a test case to be run with and without profiling.  Include

    523         # lots of calls, because we're trying to quantify stopwatch overhead.

    524         # Do not raise any exceptions, though, because we want to know

    525         # exactly how many profile events are generated (one call event, +

    526         # one return event, per Python-level call).

    527 
    528         def f1(n):
    529             for i in range(n):
    530                 x = 1
    531 
    532         def f(m, f1=f1):
    533             for i in range(m):
    534                 f1(100)
    535 
    536         f(m)    # warm up the cache

    537 
    538         # elapsed_noprofile <- time f(m) takes without profiling.

    539         t0 = get_time()
    540         f(m)
    541         t1 = get_time()
    542         elapsed_noprofile = t1 - t0
    543         if verbose:
    544             print "elapsed time without profiling =", elapsed_noprofile
    545 
    546         # elapsed_profile <- time f(m) takes with profiling.  The difference

    547         # is profiling overhead, only some of which the profiler subtracts

    548         # out on its own.

    549         p = Profile()
    550         t0 = get_time()
    551         p.runctx('f(m)', globals(), locals())
    552         t1 = get_time()
    553         elapsed_profile = t1 - t0
    554         if verbose:
    555             print "elapsed time with profiling =", elapsed_profile
    556 
    557         # reported_time <- "CPU seconds" the profiler charged to f and f1.

    558         total_calls = 0.0
    559         reported_time = 0.0
    560         for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
    561                 p.timings.items():
    562             if funcname in ("f", "f1"):
    563                 total_calls += cc
    564                 reported_time += tt
    565 
    566         if verbose:
    567             print "'CPU seconds' profiler reported =", reported_time
    568             print "total # calls =", total_calls
    569         if total_calls != m + 1:
    570             raise ValueError("internal error: total calls = %d" % total_calls)
    571 
    572         # reported_time - elapsed_noprofile = overhead the profiler wasn't

    573         # able to measure.  Divide by twice the number of calls (since there

    574         # are two profiler events per call in this test) to get the hidden

    575         # overhead per event.

    576         mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
    577         if verbose:
    578             print "mean stopwatch overhead per profile event =", mean
    579         return mean
    580 
    581 #****************************************************************************

    582 def Stats(*args):
    583     print 'Report generating functions are in the "pstats" module\a'
    584 
    585 def main():
    586     usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
    587     parser = OptionParser(usage=usage)
    588     parser.allow_interspersed_args = False
    589     parser.add_option('-o', '--outfile', dest="outfile",
    590         help="Save stats to <outfile>", default=None)
    591     parser.add_option('-s', '--sort', dest="sort",
    592         help="Sort order when printing to stdout, based on pstats.Stats class",
    593         default=-1)
    594 
    595     if not sys.argv[1:]:
    596         parser.print_usage()
    597         sys.exit(2)
    598 
    599     (options, args) = parser.parse_args()
    600     sys.argv[:] = args
    601 
    602     if len(args) > 0:
    603         progname = args[0]
    604         sys.path.insert(0, os.path.dirname(progname))
    605         with open(progname, 'rb') as fp:
    606             code = compile(fp.read(), progname, 'exec')
    607         globs = {
    608             '__file__': progname,
    609             '__name__': '__main__',
    610             '__package__': None,
    611         }
    612         runctx(code, globs, None, options.outfile, options.sort)
    613     else:
    614         parser.print_usage()
    615     return parser
    616 
    617 # When invoked as main program, invoke the profiler on a script

    618 if __name__ == '__main__':
    619     main()
    620