1 """Class for printing reports on profiled python code.""" 2 3 # Written by James Roskind 4 # Based on prior profile module by Sjoerd Mullender... 5 # which was hacked somewhat by: Guido van Rossum 6 7 # Copyright Disney Enterprises, Inc. All Rights Reserved. 8 # Licensed to PSF under a Contributor Agreement 9 # 10 # Licensed under the Apache License, Version 2.0 (the "License"); 11 # you may not use this file except in compliance with the License. 12 # You may obtain a copy of the License at 13 # 14 # http://www.apache.org/licenses/LICENSE-2.0 15 # 16 # Unless required by applicable law or agreed to in writing, software 17 # distributed under the License is distributed on an "AS IS" BASIS, 18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 19 # either express or implied. See the License for the specific language 20 # governing permissions and limitations under the License. 21 22 23 import sys 24 import os 25 import time 26 import marshal 27 import re 28 from functools import cmp_to_key 29 30 __all__ = ["Stats"] 31 32 class Stats: 33 """This class is used for creating reports from data generated by the 34 Profile class. It is a "friend" of that class, and imports data either 35 by direct access to members of Profile class, or by reading in a dictionary 36 that was emitted (via marshal) from the Profile class. 37 38 The big change from the previous Profiler (in terms of raw functionality) 39 is that an "add()" method has been provided to combine Stats from 40 several distinct profile runs. Both the constructor and the add() 41 method now take arbitrarily many file names as arguments. 42 43 All the print methods now take an argument that indicates how many lines 44 to print. If the arg is a floating point number between 0 and 1.0, then 45 it is taken as a decimal percentage of the available lines to be printed 46 (e.g., .1 means print 10% of all available lines). If it is an integer, 47 it is taken to mean the number of lines of data that you wish to have 48 printed. 49 50 The sort_stats() method now processes some additional options (i.e., in 51 addition to the old -1, 0, 1, or 2). It takes an arbitrary number of 52 quoted strings to select the sort order. For example sort_stats('time', 53 'name') sorts on the major key of 'internal function time', and on the 54 minor key of 'the name of the function'. Look at the two tables in 55 sort_stats() and get_sort_arg_defs(self) for more examples. 56 57 All methods return self, so you can string together commands like: 58 Stats('foo', 'goo').strip_dirs().sort_stats('calls').\ 59 print_stats(5).print_callers(5) 60 """ 61 62 def __init__(self, *args, **kwds): 63 # I can't figure out how to explictly specify a stream keyword arg 64 # with *args: 65 # def __init__(self, *args, stream=sys.stdout): ... 66 # so I use **kwds and sqauwk if something unexpected is passed in. 67 self.stream = sys.stdout 68 if "stream" in kwds: 69 self.stream = kwds["stream"] 70 del kwds["stream"] 71 if kwds: 72 keys = kwds.keys() 73 keys.sort() 74 extras = ", ".join(["%s=%s" % (k, kwds[k]) for k in keys]) 75 raise ValueError, "unrecognized keyword args: %s" % extras 76 if not len(args): 77 arg = None 78 else: 79 arg = args[0] 80 args = args[1:] 81 self.init(arg) 82 self.add(*args) 83 84 def init(self, arg): 85 self.all_callees = None # calc only if needed 86 self.files = [] 87 self.fcn_list = None 88 self.total_tt = 0 89 self.total_calls = 0 90 self.prim_calls = 0 91 self.max_name_len = 0 92 self.top_level = {} 93 self.stats = {} 94 self.sort_arg_dict = {} 95 self.load_stats(arg) 96 trouble = 1 97 try: 98 self.get_top_level_stats() 99 trouble = 0 100 finally: 101 if trouble: 102 print >> self.stream, "Invalid timing data", 103 if self.files: print >> self.stream, self.files[-1], 104 print >> self.stream 105 106 def load_stats(self, arg): 107 if not arg: self.stats = {} 108 elif isinstance(arg, basestring): 109 f = open(arg, 'rb') 110 self.stats = marshal.load(f) 111 f.close() 112 try: 113 file_stats = os.stat(arg) 114 arg = time.ctime(file_stats.st_mtime) + " " + arg 115 except: # in case this is not unix 116 pass 117 self.files = [ arg ] 118 elif hasattr(arg, 'create_stats'): 119 arg.create_stats() 120 self.stats = arg.stats 121 arg.stats = {} 122 if not self.stats: 123 raise TypeError("Cannot create or construct a %r object from %r" 124 % (self.__class__, arg)) 125 return 126 127 def get_top_level_stats(self): 128 for func, (cc, nc, tt, ct, callers) in self.stats.items(): 129 self.total_calls += nc 130 self.prim_calls += cc 131 self.total_tt += tt 132 if ("jprofile", 0, "profiler") in callers: 133 self.top_level[func] = None 134 if len(func_std_string(func)) > self.max_name_len: 135 self.max_name_len = len(func_std_string(func)) 136 137 def add(self, *arg_list): 138 if not arg_list: return self 139 if len(arg_list) > 1: self.add(*arg_list[1:]) 140 other = arg_list[0] 141 if type(self) != type(other) or self.__class__ != other.__class__: 142 other = Stats(other) 143 self.files += other.files 144 self.total_calls += other.total_calls 145 self.prim_calls += other.prim_calls 146 self.total_tt += other.total_tt 147 for func in other.top_level: 148 self.top_level[func] = None 149 150 if self.max_name_len < other.max_name_len: 151 self.max_name_len = other.max_name_len 152 153 self.fcn_list = None 154 155 for func, stat in other.stats.iteritems(): 156 if func in self.stats: 157 old_func_stat = self.stats[func] 158 else: 159 old_func_stat = (0, 0, 0, 0, {},) 160 self.stats[func] = add_func_stats(old_func_stat, stat) 161 return self 162 163 def dump_stats(self, filename): 164 """Write the profile data to a file we know how to load back.""" 165 f = file(filename, 'wb') 166 try: 167 marshal.dump(self.stats, f) 168 finally: 169 f.close() 170 171 # list the tuple indices and directions for sorting, 172 # along with some printable description 173 sort_arg_dict_default = { 174 "calls" : (((1,-1), ), "call count"), 175 "ncalls" : (((1,-1), ), "call count"), 176 "cumtime" : (((3,-1), ), "cumulative time"), 177 "cumulative": (((3,-1), ), "cumulative time"), 178 "file" : (((4, 1), ), "file name"), 179 "filename" : (((4, 1), ), "file name"), 180 "line" : (((5, 1), ), "line number"), 181 "module" : (((4, 1), ), "file name"), 182 "name" : (((6, 1), ), "function name"), 183 "nfl" : (((6, 1),(4, 1),(5, 1),), "name/file/line"), 184 "pcalls" : (((0,-1), ), "primitive call count"), 185 "stdname" : (((7, 1), ), "standard name"), 186 "time" : (((2,-1), ), "internal time"), 187 "tottime" : (((2,-1), ), "internal time"), 188 } 189 190 def get_sort_arg_defs(self): 191 """Expand all abbreviations that are unique.""" 192 if not self.sort_arg_dict: 193 self.sort_arg_dict = dict = {} 194 bad_list = {} 195 for word, tup in self.sort_arg_dict_default.iteritems(): 196 fragment = word 197 while fragment: 198 if not fragment: 199 break 200 if fragment in dict: 201 bad_list[fragment] = 0 202 break 203 dict[fragment] = tup 204 fragment = fragment[:-1] 205 for word in bad_list: 206 del dict[word] 207 return self.sort_arg_dict 208 209 def sort_stats(self, *field): 210 if not field: 211 self.fcn_list = 0 212 return self 213 if len(field) == 1 and isinstance(field[0], (int, long)): 214 # Be compatible with old profiler 215 field = [ {-1: "stdname", 216 0: "calls", 217 1: "time", 218 2: "cumulative"}[field[0]] ] 219 220 sort_arg_defs = self.get_sort_arg_defs() 221 sort_tuple = () 222 self.sort_type = "" 223 connector = "" 224 for word in field: 225 sort_tuple = sort_tuple + sort_arg_defs[word][0] 226 self.sort_type += connector + sort_arg_defs[word][1] 227 connector = ", " 228 229 stats_list = [] 230 for func, (cc, nc, tt, ct, callers) in self.stats.iteritems(): 231 stats_list.append((cc, nc, tt, ct) + func + 232 (func_std_string(func), func)) 233 234 stats_list.sort(key=cmp_to_key(TupleComp(sort_tuple).compare)) 235 236 self.fcn_list = fcn_list = [] 237 for tuple in stats_list: 238 fcn_list.append(tuple[-1]) 239 return self 240 241 def reverse_order(self): 242 if self.fcn_list: 243 self.fcn_list.reverse() 244 return self 245 246 def strip_dirs(self): 247 oldstats = self.stats 248 self.stats = newstats = {} 249 max_name_len = 0 250 for func, (cc, nc, tt, ct, callers) in oldstats.iteritems(): 251 newfunc = func_strip_path(func) 252 if len(func_std_string(newfunc)) > max_name_len: 253 max_name_len = len(func_std_string(newfunc)) 254 newcallers = {} 255 for func2, caller in callers.iteritems(): 256 newcallers[func_strip_path(func2)] = caller 257 258 if newfunc in newstats: 259 newstats[newfunc] = add_func_stats( 260 newstats[newfunc], 261 (cc, nc, tt, ct, newcallers)) 262 else: 263 newstats[newfunc] = (cc, nc, tt, ct, newcallers) 264 old_top = self.top_level 265 self.top_level = new_top = {} 266 for func in old_top: 267 new_top[func_strip_path(func)] = None 268 269 self.max_name_len = max_name_len 270 271 self.fcn_list = None 272 self.all_callees = None 273 return self 274 275 def calc_callees(self): 276 if self.all_callees: return 277 self.all_callees = all_callees = {} 278 for func, (cc, nc, tt, ct, callers) in self.stats.iteritems(): 279 if not func in all_callees: 280 all_callees[func] = {} 281 for func2, caller in callers.iteritems(): 282 if not func2 in all_callees: 283 all_callees[func2] = {} 284 all_callees[func2][func] = caller 285 return 286 287 #****************************************************************** 288 # The following functions support actual printing of reports 289 #****************************************************************** 290 291 # Optional "amount" is either a line count, or a percentage of lines. 292 293 def eval_print_amount(self, sel, list, msg): 294 new_list = list 295 if isinstance(sel, basestring): 296 try: 297 rex = re.compile(sel) 298 except re.error: 299 msg += " <Invalid regular expression %r>\n" % sel 300 return new_list, msg 301 new_list = [] 302 for func in list: 303 if rex.search(func_std_string(func)): 304 new_list.append(func) 305 else: 306 count = len(list) 307 if isinstance(sel, float) and 0.0 <= sel < 1.0: 308 count = int(count * sel + .5) 309 new_list = list[:count] 310 elif isinstance(sel, (int, long)) and 0 <= sel < count: 311 count = sel 312 new_list = list[:count] 313 if len(list) != len(new_list): 314 msg += " List reduced from %r to %r due to restriction <%r>\n" % ( 315 len(list), len(new_list), sel) 316 317 return new_list, msg 318 319 def get_print_list(self, sel_list): 320 width = self.max_name_len 321 if self.fcn_list: 322 stat_list = self.fcn_list[:] 323 msg = " Ordered by: " + self.sort_type + '\n' 324 else: 325 stat_list = self.stats.keys() 326 msg = " Random listing order was used\n" 327 328 for selection in sel_list: 329 stat_list, msg = self.eval_print_amount(selection, stat_list, msg) 330 331 count = len(stat_list) 332 333 if not stat_list: 334 return 0, stat_list 335 print >> self.stream, msg 336 if count < len(self.stats): 337 width = 0 338 for func in stat_list: 339 if len(func_std_string(func)) > width: 340 width = len(func_std_string(func)) 341 return width+2, stat_list 342 343 def print_stats(self, *amount): 344 for filename in self.files: 345 print >> self.stream, filename 346 if self.files: print >> self.stream 347 indent = ' ' * 8 348 for func in self.top_level: 349 print >> self.stream, indent, func_get_function_name(func) 350 351 print >> self.stream, indent, self.total_calls, "function calls", 352 if self.total_calls != self.prim_calls: 353 print >> self.stream, "(%d primitive calls)" % self.prim_calls, 354 print >> self.stream, "in %.3f seconds" % self.total_tt 355 print >> self.stream 356 width, list = self.get_print_list(amount) 357 if list: 358 self.print_title() 359 for func in list: 360 self.print_line(func) 361 print >> self.stream 362 print >> self.stream 363 return self 364 365 def print_callees(self, *amount): 366 width, list = self.get_print_list(amount) 367 if list: 368 self.calc_callees() 369 370 self.print_call_heading(width, "called...") 371 for func in list: 372 if func in self.all_callees: 373 self.print_call_line(width, func, self.all_callees[func]) 374 else: 375 self.print_call_line(width, func, {}) 376 print >> self.stream 377 print >> self.stream 378 return self 379 380 def print_callers(self, *amount): 381 width, list = self.get_print_list(amount) 382 if list: 383 self.print_call_heading(width, "was called by...") 384 for func in list: 385 cc, nc, tt, ct, callers = self.stats[func] 386 self.print_call_line(width, func, callers, "<-") 387 print >> self.stream 388 print >> self.stream 389 return self 390 391 def print_call_heading(self, name_size, column_title): 392 print >> self.stream, "Function ".ljust(name_size) + column_title 393 # print sub-header only if we have new-style callers 394 subheader = False 395 for cc, nc, tt, ct, callers in self.stats.itervalues(): 396 if callers: 397 value = callers.itervalues().next() 398 subheader = isinstance(value, tuple) 399 break 400 if subheader: 401 print >> self.stream, " "*name_size + " ncalls tottime cumtime" 402 403 def print_call_line(self, name_size, source, call_dict, arrow="->"): 404 print >> self.stream, func_std_string(source).ljust(name_size) + arrow, 405 if not call_dict: 406 print >> self.stream 407 return 408 clist = call_dict.keys() 409 clist.sort() 410 indent = "" 411 for func in clist: 412 name = func_std_string(func) 413 value = call_dict[func] 414 if isinstance(value, tuple): 415 nc, cc, tt, ct = value 416 if nc != cc: 417 substats = '%d/%d' % (nc, cc) 418 else: 419 substats = '%d' % (nc,) 420 substats = '%s %s %s %s' % (substats.rjust(7+2*len(indent)), 421 f8(tt), f8(ct), name) 422 left_width = name_size + 1 423 else: 424 substats = '%s(%r) %s' % (name, value, f8(self.stats[func][3])) 425 left_width = name_size + 3 426 print >> self.stream, indent*left_width + substats 427 indent = " " 428 429 def print_title(self): 430 print >> self.stream, ' ncalls tottime percall cumtime percall', 431 print >> self.stream, 'filename:lineno(function)' 432 433 def print_line(self, func): # hack : should print percentages 434 cc, nc, tt, ct, callers = self.stats[func] 435 c = str(nc) 436 if nc != cc: 437 c = c + '/' + str(cc) 438 print >> self.stream, c.rjust(9), 439 print >> self.stream, f8(tt), 440 if nc == 0: 441 print >> self.stream, ' '*8, 442 else: 443 print >> self.stream, f8(float(tt)/nc), 444 print >> self.stream, f8(ct), 445 if cc == 0: 446 print >> self.stream, ' '*8, 447 else: 448 print >> self.stream, f8(float(ct)/cc), 449 print >> self.stream, func_std_string(func) 450 451 class TupleComp: 452 """This class provides a generic function for comparing any two tuples. 453 Each instance records a list of tuple-indices (from most significant 454 to least significant), and sort direction (ascending or decending) for 455 each tuple-index. The compare functions can then be used as the function 456 argument to the system sort() function when a list of tuples need to be 457 sorted in the instances order.""" 458 459 def __init__(self, comp_select_list): 460 self.comp_select_list = comp_select_list 461 462 def compare (self, left, right): 463 for index, direction in self.comp_select_list: 464 l = left[index] 465 r = right[index] 466 if l < r: 467 return -direction 468 if l > r: 469 return direction 470 return 0 471 472 #************************************************************************** 473 # func_name is a triple (file:string, line:int, name:string) 474 475 def func_strip_path(func_name): 476 filename, line, name = func_name 477 return os.path.basename(filename), line, name 478 479 def func_get_function_name(func): 480 return func[2] 481 482 def func_std_string(func_name): # match what old profile produced 483 if func_name[:2] == ('~', 0): 484 # special case for built-in functions 485 name = func_name[2] 486 if name.startswith('<') and name.endswith('>'): 487 return '{%s}' % name[1:-1] 488 else: 489 return name 490 else: 491 return "%s:%d(%s)" % func_name 492 493 #************************************************************************** 494 # The following functions combine statists for pairs functions. 495 # The bulk of the processing involves correctly handling "call" lists, 496 # such as callers and callees. 497 #************************************************************************** 498 499 def add_func_stats(target, source): 500 """Add together all the stats for two profile entries.""" 501 cc, nc, tt, ct, callers = source 502 t_cc, t_nc, t_tt, t_ct, t_callers = target 503 return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct, 504 add_callers(t_callers, callers)) 505 506 def add_callers(target, source): 507 """Combine two caller lists in a single list.""" 508 new_callers = {} 509 for func, caller in target.iteritems(): 510 new_callers[func] = caller 511 for func, caller in source.iteritems(): 512 if func in new_callers: 513 if isinstance(caller, tuple): 514 # format used by cProfile 515 new_callers[func] = tuple([i[0] + i[1] for i in 516 zip(caller, new_callers[func])]) 517 else: 518 # format used by profile 519 new_callers[func] += caller 520 else: 521 new_callers[func] = caller 522 return new_callers 523 524 def count_calls(callers): 525 """Sum the caller statistics to get total number of calls received.""" 526 nc = 0 527 for calls in callers.itervalues(): 528 nc += calls 529 return nc 530 531 #************************************************************************** 532 # The following functions support printing of reports 533 #************************************************************************** 534 535 def f8(x): 536 return "%8.3f" % x 537 538 #************************************************************************** 539 # Statistics browser added by ESR, April 2001 540 #************************************************************************** 541 542 if __name__ == '__main__': 543 import cmd 544 try: 545 import readline 546 except ImportError: 547 pass 548 549 class ProfileBrowser(cmd.Cmd): 550 def __init__(self, profile=None): 551 cmd.Cmd.__init__(self) 552 self.prompt = "% " 553 self.stats = None 554 self.stream = sys.stdout 555 if profile is not None: 556 self.do_read(profile) 557 558 def generic(self, fn, line): 559 args = line.split() 560 processed = [] 561 for term in args: 562 try: 563 processed.append(int(term)) 564 continue 565 except ValueError: 566 pass 567 try: 568 frac = float(term) 569 if frac > 1 or frac < 0: 570 print >> self.stream, "Fraction argument must be in [0, 1]" 571 continue 572 processed.append(frac) 573 continue 574 except ValueError: 575 pass 576 processed.append(term) 577 if self.stats: 578 getattr(self.stats, fn)(*processed) 579 else: 580 print >> self.stream, "No statistics object is loaded." 581 return 0 582 def generic_help(self): 583 print >> self.stream, "Arguments may be:" 584 print >> self.stream, "* An integer maximum number of entries to print." 585 print >> self.stream, "* A decimal fractional number between 0 and 1, controlling" 586 print >> self.stream, " what fraction of selected entries to print." 587 print >> self.stream, "* A regular expression; only entries with function names" 588 print >> self.stream, " that match it are printed." 589 590 def do_add(self, line): 591 if self.stats: 592 self.stats.add(line) 593 else: 594 print >> self.stream, "No statistics object is loaded." 595 return 0 596 def help_add(self): 597 print >> self.stream, "Add profile info from given file to current statistics object." 598 599 def do_callees(self, line): 600 return self.generic('print_callees', line) 601 def help_callees(self): 602 print >> self.stream, "Print callees statistics from the current stat object." 603 self.generic_help() 604 605 def do_callers(self, line): 606 return self.generic('print_callers', line) 607 def help_callers(self): 608 print >> self.stream, "Print callers statistics from the current stat object." 609 self.generic_help() 610 611 def do_EOF(self, line): 612 print >> self.stream, "" 613 return 1 614 def help_EOF(self): 615 print >> self.stream, "Leave the profile brower." 616 617 def do_quit(self, line): 618 return 1 619 def help_quit(self): 620 print >> self.stream, "Leave the profile brower." 621 622 def do_read(self, line): 623 if line: 624 try: 625 self.stats = Stats(line) 626 except IOError, args: 627 print >> self.stream, args[1] 628 return 629 except Exception as err: 630 print >> self.stream, err.__class__.__name__ + ':', err 631 return 632 self.prompt = line + "% " 633 elif len(self.prompt) > 2: 634 line = self.prompt[:-2] 635 self.do_read(line) 636 else: 637 print >> self.stream, "No statistics object is current -- cannot reload." 638 return 0 639 def help_read(self): 640 print >> self.stream, "Read in profile data from a specified file." 641 print >> self.stream, "Without argument, reload the current file." 642 643 def do_reverse(self, line): 644 if self.stats: 645 self.stats.reverse_order() 646 else: 647 print >> self.stream, "No statistics object is loaded." 648 return 0 649 def help_reverse(self): 650 print >> self.stream, "Reverse the sort order of the profiling report." 651 652 def do_sort(self, line): 653 if not self.stats: 654 print >> self.stream, "No statistics object is loaded." 655 return 656 abbrevs = self.stats.get_sort_arg_defs() 657 if line and all((x in abbrevs) for x in line.split()): 658 self.stats.sort_stats(*line.split()) 659 else: 660 print >> self.stream, "Valid sort keys (unique prefixes are accepted):" 661 for (key, value) in Stats.sort_arg_dict_default.iteritems(): 662 print >> self.stream, "%s -- %s" % (key, value[1]) 663 return 0 664 def help_sort(self): 665 print >> self.stream, "Sort profile data according to specified keys." 666 print >> self.stream, "(Typing `sort' without arguments lists valid keys.)" 667 def complete_sort(self, text, *args): 668 return [a for a in Stats.sort_arg_dict_default if a.startswith(text)] 669 670 def do_stats(self, line): 671 return self.generic('print_stats', line) 672 def help_stats(self): 673 print >> self.stream, "Print statistics from the current stat object." 674 self.generic_help() 675 676 def do_strip(self, line): 677 if self.stats: 678 self.stats.strip_dirs() 679 else: 680 print >> self.stream, "No statistics object is loaded." 681 def help_strip(self): 682 print >> self.stream, "Strip leading path information from filenames in the report." 683 684 def help_help(self): 685 print >> self.stream, "Show help for a given command." 686 687 def postcmd(self, stop, line): 688 if stop: 689 return stop 690 return None 691 692 import sys 693 if len(sys.argv) > 1: 694 initprofile = sys.argv[1] 695 else: 696 initprofile = None 697 try: 698 browser = ProfileBrowser(initprofile) 699 print >> browser.stream, "Welcome to the profile statistics browser." 700 browser.cmdloop() 701 print >> browser.stream, "Goodbye." 702 except KeyboardInterrupt: 703 pass 704 705 # That's all, folks. 706