Home | History | Annotate | Download | only in python2.7
      1 #! /usr/bin/env python
      2 #
      3 # Class for profiling python code. rev 1.0  6/2/94
      4 #
      5 # Written by James Roskind
      6 # Based on prior profile module by Sjoerd Mullender...
      7 #   which was hacked somewhat by: Guido van Rossum
      8 
      9 """Class for profiling Python code."""
     10 
     11 # Copyright Disney Enterprises, Inc.  All Rights Reserved.
     12 # Licensed to PSF under a Contributor Agreement
     13 #
     14 # Licensed under the Apache License, Version 2.0 (the "License");
     15 # you may not use this file except in compliance with the License.
     16 # You may obtain a copy of the License at
     17 #
     18 # http://www.apache.org/licenses/LICENSE-2.0
     19 #
     20 # Unless required by applicable law or agreed to in writing, software
     21 # distributed under the License is distributed on an "AS IS" BASIS,
     22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
     23 # either express or implied.  See the License for the specific language
     24 # governing permissions and limitations under the License.
     25 
     26 
     27 import sys
     28 import os
     29 import time
     30 import marshal
     31 from optparse import OptionParser
     32 
     33 __all__ = ["run", "runctx", "help", "Profile"]
     34 
     35 # Sample timer for use with
     36 #i_count = 0
     37 #def integer_timer():
     38 #       global i_count
     39 #       i_count = i_count + 1
     40 #       return i_count
     41 #itimes = integer_timer # replace with C coded timer returning integers
     42 
     43 #**************************************************************************
     44 # The following are the static member functions for the profiler class
     45 # Note that an instance of Profile() is *not* needed to call them.
     46 #**************************************************************************
     47 
     48 def run(statement, filename=None, sort=-1):
     49     """Run statement under profiler optionally saving results in filename
     50 
     51     This function takes a single argument that can be passed to the
     52     "exec" statement, and an optional file name.  In all cases this
     53     routine attempts to "exec" its first argument and gather profiling
     54     statistics from the execution. If no file name is present, then this
     55     function automatically prints a simple profiling report, sorted by the
     56     standard name string (file/line/function-name) that is presented in
     57     each line.
     58     """
     59     prof = Profile()
     60     try:
     61         prof = prof.run(statement)
     62     except SystemExit:
     63         pass
     64     if filename is not None:
     65         prof.dump_stats(filename)
     66     else:
     67         return prof.print_stats(sort)
     68 
     69 def runctx(statement, globals, locals, filename=None, sort=-1):
     70     """Run statement under profiler, supplying your own globals and locals,
     71     optionally saving results in filename.
     72 
     73     statement and filename have the same semantics as profile.run
     74     """
     75     prof = Profile()
     76     try:
     77         prof = prof.runctx(statement, globals, locals)
     78     except SystemExit:
     79         pass
     80 
     81     if filename is not None:
     82         prof.dump_stats(filename)
     83     else:
     84         return prof.print_stats(sort)
     85 
     86 # Backwards compatibility.
     87 def help():
     88     print "Documentation for the profile module can be found "
     89     print "in the Python Library Reference, section 'The Python Profiler'."
     90 
     91 if hasattr(os, "times"):
     92     def _get_time_times(timer=os.times):
     93         t = timer()
     94         return t[0] + t[1]
     95 
     96 # Using getrusage(3) is better than clock(3) if available:
     97 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
     98 # Furthermore, on a POSIX system, returns microseconds, which
     99 # wrap around after 36min.
    100 _has_res = 0
    101 try:
    102     import resource
    103     resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
    104     def _get_time_resource(timer=resgetrusage):
    105         t = timer()
    106         return t[0] + t[1]
    107     _has_res = 1
    108 except ImportError:
    109     pass
    110 
    111 class Profile:
    112     """Profiler class.
    113 
    114     self.cur is always a tuple.  Each such tuple corresponds to a stack
    115     frame that is currently active (self.cur[-2]).  The following are the
    116     definitions of its members.  We use this external "parallel stack" to
    117     avoid contaminating the program that we are profiling. (old profiler
    118     used to write into the frames local dictionary!!) Derived classes
    119     can change the definition of some entries, as long as they leave
    120     [-2:] intact (frame and previous tuple).  In case an internal error is
    121     detected, the -3 element is used as the function name.
    122 
    123     [ 0] = Time that needs to be charged to the parent frame's function.
    124            It is used so that a function call will not have to access the
    125            timing data for the parent frame.
    126     [ 1] = Total time spent in this frame's function, excluding time in
    127            subfunctions (this latter is tallied in cur[2]).
    128     [ 2] = Total time spent in subfunctions, excluding time executing the
    129            frame's function (this latter is tallied in cur[1]).
    130     [-3] = Name of the function that corresponds to this frame.
    131     [-2] = Actual frame that we correspond to (used to sync exception handling).
    132     [-1] = Our parent 6-tuple (corresponds to frame.f_back).
    133 
    134     Timing data for each function is stored as a 5-tuple in the dictionary
    135     self.timings[].  The index is always the name stored in self.cur[-3].
    136     The following are the definitions of the members:
    137 
    138     [0] = The number of times this function was called, not counting direct
    139           or indirect recursion,
    140     [1] = Number of times this function appears on the stack, minus one
    141     [2] = Total time spent internal to this function
    142     [3] = Cumulative time that this function was present on the stack.  In
    143           non-recursive functions, this is the total execution time from start
    144           to finish of each invocation of a function, including time spent in
    145           all subfunctions.
    146     [4] = A dictionary indicating for each function name, the number of times
    147           it was called by us.
    148     """
    149 
    150     bias = 0  # calibration constant
    151 
    152     def __init__(self, timer=None, bias=None):
    153         self.timings = {}
    154         self.cur = None
    155         self.cmd = ""
    156         self.c_func_name = ""
    157 
    158         if bias is None:
    159             bias = self.bias
    160         self.bias = bias     # Materialize in local dict for lookup speed.
    161 
    162         if not timer:
    163             if _has_res:
    164                 self.timer = resgetrusage
    165                 self.dispatcher = self.trace_dispatch
    166                 self.get_time = _get_time_resource
    167             elif hasattr(time, 'clock'):
    168                 self.timer = self.get_time = time.clock
    169                 self.dispatcher = self.trace_dispatch_i
    170             elif hasattr(os, 'times'):
    171                 self.timer = os.times
    172                 self.dispatcher = self.trace_dispatch
    173                 self.get_time = _get_time_times
    174             else:
    175                 self.timer = self.get_time = time.time
    176                 self.dispatcher = self.trace_dispatch_i
    177         else:
    178             self.timer = timer
    179             t = self.timer() # test out timer function
    180             try:
    181                 length = len(t)
    182             except TypeError:
    183                 self.get_time = timer
    184                 self.dispatcher = self.trace_dispatch_i
    185             else:
    186                 if length == 2:
    187                     self.dispatcher = self.trace_dispatch
    188                 else:
    189                     self.dispatcher = self.trace_dispatch_l
    190                 # This get_time() implementation needs to be defined
    191                 # here to capture the passed-in timer in the parameter
    192                 # list (for performance).  Note that we can't assume
    193                 # the timer() result contains two values in all
    194                 # cases.
    195                 def get_time_timer(timer=timer, sum=sum):
    196                     return sum(timer())
    197                 self.get_time = get_time_timer
    198         self.t = self.get_time()
    199         self.simulate_call('profiler')
    200 
    201     # Heavily optimized dispatch routine for os.times() timer
    202 
    203     def trace_dispatch(self, frame, event, arg):
    204         timer = self.timer
    205         t = timer()
    206         t = t[0] + t[1] - self.t - self.bias
    207 
    208         if event == "c_call":
    209             self.c_func_name = arg.__name__
    210 
    211         if self.dispatch[event](self, frame,t):
    212             t = timer()
    213             self.t = t[0] + t[1]
    214         else:
    215             r = timer()
    216             self.t = r[0] + r[1] - t # put back unrecorded delta
    217 
    218     # Dispatch routine for best timer program (return = scalar, fastest if
    219     # an integer but float works too -- and time.clock() relies on that).
    220 
    221     def trace_dispatch_i(self, frame, event, arg):
    222         timer = self.timer
    223         t = timer() - self.t - self.bias
    224 
    225         if event == "c_call":
    226             self.c_func_name = arg.__name__
    227 
    228         if self.dispatch[event](self, frame, t):
    229             self.t = timer()
    230         else:
    231             self.t = timer() - t  # put back unrecorded delta
    232 
    233     # Dispatch routine for macintosh (timer returns time in ticks of
    234     # 1/60th second)
    235 
    236     def trace_dispatch_mac(self, frame, event, arg):
    237         timer = self.timer
    238         t = timer()/60.0 - self.t - self.bias
    239 
    240         if event == "c_call":
    241             self.c_func_name = arg.__name__
    242 
    243         if self.dispatch[event](self, frame, t):
    244             self.t = timer()/60.0
    245         else:
    246             self.t = timer()/60.0 - t  # put back unrecorded delta
    247 
    248     # SLOW generic dispatch routine for timer returning lists of numbers
    249 
    250     def trace_dispatch_l(self, frame, event, arg):
    251         get_time = self.get_time
    252         t = get_time() - self.t - self.bias
    253 
    254         if event == "c_call":
    255             self.c_func_name = arg.__name__
    256 
    257         if self.dispatch[event](self, frame, t):
    258             self.t = get_time()
    259         else:
    260             self.t = get_time() - t # put back unrecorded delta
    261 
    262     # In the event handlers, the first 3 elements of self.cur are unpacked
    263     # into vrbls w/ 3-letter names.  The last two characters are meant to be
    264     # mnemonic:
    265     #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
    266     #     _it  self.cur[1] "internal time" time spent directly in the function
    267     #     _et  self.cur[2] "external time" time spent in subfunctions
    268 
    269     def trace_dispatch_exception(self, frame, t):
    270         rpt, rit, ret, rfn, rframe, rcur = self.cur
    271         if (rframe is not frame) and rcur:
    272             return self.trace_dispatch_return(rframe, t)
    273         self.cur = rpt, rit+t, ret, rfn, rframe, rcur
    274         return 1
    275 
    276 
    277     def trace_dispatch_call(self, frame, t):
    278         if self.cur and frame.f_back is not self.cur[-2]:
    279             rpt, rit, ret, rfn, rframe, rcur = self.cur
    280             if not isinstance(rframe, Profile.fake_frame):
    281                 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
    282                                                        rframe, rframe.f_back,
    283                                                        frame, frame.f_back)
    284                 self.trace_dispatch_return(rframe, 0)
    285                 assert (self.cur is None or \
    286                         frame.f_back is self.cur[-2]), ("Bad call",
    287                                                         self.cur[-3])
    288         fcode = frame.f_code
    289         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
    290         self.cur = (t, 0, 0, fn, frame, self.cur)
    291         timings = self.timings
    292         if fn in timings:
    293             cc, ns, tt, ct, callers = timings[fn]
    294             timings[fn] = cc, ns + 1, tt, ct, callers
    295         else:
    296             timings[fn] = 0, 0, 0, 0, {}
    297         return 1
    298 
    299     def trace_dispatch_c_call (self, frame, t):
    300         fn = ("", 0, self.c_func_name)
    301         self.cur = (t, 0, 0, fn, frame, self.cur)
    302         timings = self.timings
    303         if fn in timings:
    304             cc, ns, tt, ct, callers = timings[fn]
    305             timings[fn] = cc, ns+1, tt, ct, callers
    306         else:
    307             timings[fn] = 0, 0, 0, 0, {}
    308         return 1
    309 
    310     def trace_dispatch_return(self, frame, t):
    311         if frame is not self.cur[-2]:
    312             assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
    313             self.trace_dispatch_return(self.cur[-2], 0)
    314 
    315         # Prefix "r" means part of the Returning or exiting frame.
    316         # Prefix "p" means part of the Previous or Parent or older frame.
    317 
    318         rpt, rit, ret, rfn, frame, rcur = self.cur
    319         rit = rit + t
    320         frame_total = rit + ret
    321 
    322         ppt, pit, pet, pfn, pframe, pcur = rcur
    323         self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
    324 
    325         timings = self.timings
    326         cc, ns, tt, ct, callers = timings[rfn]
    327         if not ns:
    328             # This is the only occurrence of the function on the stack.
    329             # Else this is a (directly or indirectly) recursive call, and
    330             # its cumulative time will get updated when the topmost call to
    331             # it returns.
    332             ct = ct + frame_total
    333             cc = cc + 1
    334 
    335         if pfn in callers:
    336             callers[pfn] = callers[pfn] + 1  # hack: gather more
    337             # stats such as the amount of time added to ct courtesy
    338             # of this specific call, and the contribution to cc
    339             # courtesy of this call.
    340         else:
    341             callers[pfn] = 1
    342 
    343         timings[rfn] = cc, ns - 1, tt + rit, ct, callers
    344 
    345         return 1
    346 
    347 
    348     dispatch = {
    349         "call": trace_dispatch_call,
    350         "exception": trace_dispatch_exception,
    351         "return": trace_dispatch_return,
    352         "c_call": trace_dispatch_c_call,
    353         "c_exception": trace_dispatch_return,  # the C function returned
    354         "c_return": trace_dispatch_return,
    355         }
    356 
    357 
    358     # The next few functions play with self.cmd. By carefully preloading
    359     # our parallel stack, we can force the profiled result to include
    360     # an arbitrary string as the name of the calling function.
    361     # We use self.cmd as that string, and the resulting stats look
    362     # very nice :-).
    363 
    364     def set_cmd(self, cmd):
    365         if self.cur[-1]: return   # already set
    366         self.cmd = cmd
    367         self.simulate_call(cmd)
    368 
    369     class fake_code:
    370         def __init__(self, filename, line, name):
    371             self.co_filename = filename
    372             self.co_line = line
    373             self.co_name = name
    374             self.co_firstlineno = 0
    375 
    376         def __repr__(self):
    377             return repr((self.co_filename, self.co_line, self.co_name))
    378 
    379     class fake_frame:
    380         def __init__(self, code, prior):
    381             self.f_code = code
    382             self.f_back = prior
    383 
    384     def simulate_call(self, name):
    385         code = self.fake_code('profile', 0, name)
    386         if self.cur:
    387             pframe = self.cur[-2]
    388         else:
    389             pframe = None
    390         frame = self.fake_frame(code, pframe)
    391         self.dispatch['call'](self, frame, 0)
    392 
    393     # collect stats from pending stack, including getting final
    394     # timings for self.cmd frame.
    395 
    396     def simulate_cmd_complete(self):
    397         get_time = self.get_time
    398         t = get_time() - self.t
    399         while self.cur[-1]:
    400             # We *can* cause assertion errors here if
    401             # dispatch_trace_return checks for a frame match!
    402             self.dispatch['return'](self, self.cur[-2], t)
    403             t = 0
    404         self.t = get_time() - t
    405 
    406 
    407     def print_stats(self, sort=-1):
    408         import pstats
    409         pstats.Stats(self).strip_dirs().sort_stats(sort). \
    410                   print_stats()
    411 
    412     def dump_stats(self, file):
    413         f = open(file, 'wb')
    414         self.create_stats()
    415         marshal.dump(self.stats, f)
    416         f.close()
    417 
    418     def create_stats(self):
    419         self.simulate_cmd_complete()
    420         self.snapshot_stats()
    421 
    422     def snapshot_stats(self):
    423         self.stats = {}
    424         for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
    425             callers = callers.copy()
    426             nc = 0
    427             for callcnt in callers.itervalues():
    428                 nc += callcnt
    429             self.stats[func] = cc, nc, tt, ct, callers
    430 
    431 
    432     # The following two methods can be called by clients to use
    433     # a profiler to profile a statement, given as a string.
    434 
    435     def run(self, cmd):
    436         import __main__
    437         dict = __main__.__dict__
    438         return self.runctx(cmd, dict, dict)
    439 
    440     def runctx(self, cmd, globals, locals):
    441         self.set_cmd(cmd)
    442         sys.setprofile(self.dispatcher)
    443         try:
    444             exec cmd in globals, locals
    445         finally:
    446             sys.setprofile(None)
    447         return self
    448 
    449     # This method is more useful to profile a single function call.
    450     def runcall(self, func, *args, **kw):
    451         self.set_cmd(repr(func))
    452         sys.setprofile(self.dispatcher)
    453         try:
    454             return func(*args, **kw)
    455         finally:
    456             sys.setprofile(None)
    457 
    458 
    459     #******************************************************************
    460     # The following calculates the overhead for using a profiler.  The
    461     # problem is that it takes a fair amount of time for the profiler
    462     # to stop the stopwatch (from the time it receives an event).
    463     # Similarly, there is a delay from the time that the profiler
    464     # re-starts the stopwatch before the user's code really gets to
    465     # continue.  The following code tries to measure the difference on
    466     # a per-event basis.
    467     #
    468     # Note that this difference is only significant if there are a lot of
    469     # events, and relatively little user code per event.  For example,
    470     # code with small functions will typically benefit from having the
    471     # profiler calibrated for the current platform.  This *could* be
    472     # done on the fly during init() time, but it is not worth the
    473     # effort.  Also note that if too large a value specified, then
    474     # execution time on some functions will actually appear as a
    475     # negative number.  It is *normal* for some functions (with very
    476     # low call counts) to have such negative stats, even if the
    477     # calibration figure is "correct."
    478     #
    479     # One alternative to profile-time calibration adjustments (i.e.,
    480     # adding in the magic little delta during each event) is to track
    481     # more carefully the number of events (and cumulatively, the number
    482     # of events during sub functions) that are seen.  If this were
    483     # done, then the arithmetic could be done after the fact (i.e., at
    484     # display time).  Currently, we track only call/return events.
    485     # These values can be deduced by examining the callees and callers
    486     # vectors for each functions.  Hence we *can* almost correct the
    487     # internal time figure at print time (note that we currently don't
    488     # track exception event processing counts).  Unfortunately, there
    489     # is currently no similar information for cumulative sub-function
    490     # time.  It would not be hard to "get all this info" at profiler
    491     # time.  Specifically, we would have to extend the tuples to keep
    492     # counts of this in each frame, and then extend the defs of timing
    493     # tuples to include the significant two figures. I'm a bit fearful
    494     # that this additional feature will slow the heavily optimized
    495     # event/time ratio (i.e., the profiler would run slower, fur a very
    496     # low "value added" feature.)
    497     #**************************************************************
    498 
    499     def calibrate(self, m, verbose=0):
    500         if self.__class__ is not Profile:
    501             raise TypeError("Subclasses must override .calibrate().")
    502 
    503         saved_bias = self.bias
    504         self.bias = 0
    505         try:
    506             return self._calibrate_inner(m, verbose)
    507         finally:
    508             self.bias = saved_bias
    509 
    510     def _calibrate_inner(self, m, verbose):
    511         get_time = self.get_time
    512 
    513         # Set up a test case to be run with and without profiling.  Include
    514         # lots of calls, because we're trying to quantify stopwatch overhead.
    515         # Do not raise any exceptions, though, because we want to know
    516         # exactly how many profile events are generated (one call event, +
    517         # one return event, per Python-level call).
    518 
    519         def f1(n):
    520             for i in range(n):
    521                 x = 1
    522 
    523         def f(m, f1=f1):
    524             for i in range(m):
    525                 f1(100)
    526 
    527         f(m)    # warm up the cache
    528 
    529         # elapsed_noprofile <- time f(m) takes without profiling.
    530         t0 = get_time()
    531         f(m)
    532         t1 = get_time()
    533         elapsed_noprofile = t1 - t0
    534         if verbose:
    535             print "elapsed time without profiling =", elapsed_noprofile
    536 
    537         # elapsed_profile <- time f(m) takes with profiling.  The difference
    538         # is profiling overhead, only some of which the profiler subtracts
    539         # out on its own.
    540         p = Profile()
    541         t0 = get_time()
    542         p.runctx('f(m)', globals(), locals())
    543         t1 = get_time()
    544         elapsed_profile = t1 - t0
    545         if verbose:
    546             print "elapsed time with profiling =", elapsed_profile
    547 
    548         # reported_time <- "CPU seconds" the profiler charged to f and f1.
    549         total_calls = 0.0
    550         reported_time = 0.0
    551         for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
    552                 p.timings.items():
    553             if funcname in ("f", "f1"):
    554                 total_calls += cc
    555                 reported_time += tt
    556 
    557         if verbose:
    558             print "'CPU seconds' profiler reported =", reported_time
    559             print "total # calls =", total_calls
    560         if total_calls != m + 1:
    561             raise ValueError("internal error: total calls = %d" % total_calls)
    562 
    563         # reported_time - elapsed_noprofile = overhead the profiler wasn't
    564         # able to measure.  Divide by twice the number of calls (since there
    565         # are two profiler events per call in this test) to get the hidden
    566         # overhead per event.
    567         mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
    568         if verbose:
    569             print "mean stopwatch overhead per profile event =", mean
    570         return mean
    571 
    572 #****************************************************************************
    573 def Stats(*args):
    574     print 'Report generating functions are in the "pstats" module\a'
    575 
    576 def main():
    577     usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
    578     parser = OptionParser(usage=usage)
    579     parser.allow_interspersed_args = False
    580     parser.add_option('-o', '--outfile', dest="outfile",
    581         help="Save stats to <outfile>", default=None)
    582     parser.add_option('-s', '--sort', dest="sort",
    583         help="Sort order when printing to stdout, based on pstats.Stats class",
    584         default=-1)
    585 
    586     if not sys.argv[1:]:
    587         parser.print_usage()
    588         sys.exit(2)
    589 
    590     (options, args) = parser.parse_args()
    591     sys.argv[:] = args
    592 
    593     if len(args) > 0:
    594         progname = args[0]
    595         sys.path.insert(0, os.path.dirname(progname))
    596         with open(progname, 'rb') as fp:
    597             code = compile(fp.read(), progname, 'exec')
    598         globs = {
    599             '__file__': progname,
    600             '__name__': '__main__',
    601             '__package__': None,
    602         }
    603         runctx(code, globs, None, options.outfile, options.sort)
    604     else:
    605         parser.print_usage()
    606     return parser
    607 
    608 # When invoked as main program, invoke the profiler on a script
    609 if __name__ == '__main__':
    610     main()
    611