1 2 /*--------------------------------------------------------------------*/ 3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2016-2017 Philippe Waroquiers 11 12 This code generalises the XTree idea that was implemented in 13 the massif tool in Valgrind versions <= 3.12, which is 14 Copyright (C) 2005-2017 Nicholas Nethercote 15 njn (at) valgrind.org 16 17 The XTree implementation in this file is however implemented completely 18 differently. Some code has been re-used for the production of the 19 massif file header (e.g. FP_cmd function). 20 21 This program is free software; you can redistribute it and/or 22 modify it under the terms of the GNU General Public License as 23 published by the Free Software Foundation; either version 2 of the 24 License, or (at your option) any later version. 25 26 This program is distributed in the hope that it will be useful, but 27 WITHOUT ANY WARRANTY; without even the implied warranty of 28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 29 General Public License for more details. 30 31 You should have received a copy of the GNU General Public License 32 along with this program; if not, write to the Free Software 33 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 34 02111-1307, USA. 35 36 The GNU General Public License is contained in the file COPYING. 37 */ 38 39 #include "pub_core_basics.h" 40 #include "pub_core_debuglog.h" 41 #include "pub_core_clientstate.h" 42 #include "pub_core_stacktrace.h" 43 #include "pub_core_execontext.h" 44 #include "pub_core_libcbase.h" 45 #include "pub_core_libcassert.h" 46 #include "pub_core_libcfile.h" 47 #include "pub_core_libcprint.h" 48 #include "pub_core_libcproc.h" 49 #include "pub_core_hashtable.h" 50 #include "pub_core_mallocfree.h" 51 #include "pub_core_options.h" 52 #include "pub_core_debuginfo.h" 53 #include "pub_core_deduppoolalloc.h" 54 #include "pub_core_xtree.h" /* self */ 55 56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \ 57 VG_(dmsg)(__VA_ARGS__) \ 58 : 0) 59 60 /* Defines the relevant part of an ec. This is shared between an xt 61 and its snapshots (see XT_shared XArray of xec). */ 62 typedef 63 struct _xec { 64 ExeContext* ec; 65 UShort top; // The first ips of ec to take into account. 66 UShort n_ips_sel; // The nr of ips from top to take into account. 67 } 68 xec; 69 70 /* XT_shared maintains the information shared between an XT and all 71 its snapshots. */ 72 typedef 73 struct _XT_shared { 74 UWord nrRef; /* nr of XTrees referencing this shared memory. */ 75 76 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */ 77 const HChar* cc; /* cost centre for alloc */ 78 Free_Fn_t free_fn; /* free fn */ 79 80 /* The data associated to each ec is stored in 2 arrays: 81 an xec array, shared between an xt and all its snapshots. 82 a data array, private to each XTree. 83 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in 84 xec and data arrays where the ec information is located (this 85 indirection is used to avoid huge xec and data arrays, in 86 case an XTree contains data only for a small number of ec. 87 The offset in the xec and data array is used as xtree ec unique 88 id i.e. an xecu. */ 89 90 UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */ 91 UInt* d4ecu2xecu; 92 93 /* ec information common to an xt and its snapshots. */ 94 XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */ 95 96 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1]. 97 See ips_order_cmp. */ 98 XArray* ips_order_xecu; 99 } XT_shared; 100 101 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec 102 (with the index ecu/4). */ 103 #define NO_OFFSET 0xffffffff 104 105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn, 106 const HChar* cc, 107 void (*free_fn)(void*)) 108 { 109 XT_shared* shared; 110 111 vg_assert(alloc_fn); 112 vg_assert(cc); 113 vg_assert(free_fn); 114 shared = alloc_fn(cc, sizeof(*shared)); 115 shared->nrRef = 0; 116 shared->alloc_fn = alloc_fn; 117 shared->cc = cc; 118 shared->free_fn = free_fn; 119 120 shared->d4ecu2xecu_sz = 0; 121 shared->d4ecu2xecu = NULL; 122 shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec)); 123 shared->ips_order_xecu = NULL; // Allocated when needed. 124 125 return shared; 126 } 127 128 static void delete_XT_shared (XT_shared* shared) 129 { 130 vg_assert(shared->nrRef == 0); 131 shared->free_fn(shared->d4ecu2xecu); 132 VG_(deleteXA)(shared->xec); 133 if (shared->ips_order_xecu != NULL) 134 VG_(deleteXA)(shared->ips_order_xecu); 135 shared->free_fn(shared); 136 } 137 138 /* Compare 2 entries in ips_order_xecu by StackTrace elements. 139 In case stack traces are of different length, an 'absent' ips is 140 considered smaller than any other address. */ 141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec 142 static Int ips_order_cmp(const void* vleft, const void* vright) 143 { 144 const Xecu left_xecu = *(const Xecu*)vleft; 145 const Xecu right_xecu = *(const Xecu*)vright; 146 const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu); 147 const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu); 148 const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec) 149 + left->top; 150 const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec) 151 + right->top; 152 UInt i; 153 154 const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel 155 ? left->n_ips_sel : right->n_ips_sel; 156 157 // First see if we have a difference on the common nr of ips selected 158 for (i = 0; i < c_n_ips_sel; i++) { 159 if (left_ips[i] == right_ips[i]) continue; 160 if (left_ips[i] < right_ips[i]) return -1; 161 return 1; 162 } 163 // Common part is equal => compare lengths. 164 if (left->n_ips_sel < right->n_ips_sel) return -1; 165 if (left->n_ips_sel > right->n_ips_sel) return 1; 166 return 0; 167 } 168 169 // If needed, build or refresh shared->ips_order_xecu 170 static void ensure_ips_order_xecu_valid(XT_shared* shared) 171 { 172 UInt i; 173 UInt n_xecu; 174 175 if (shared->ips_order_xecu == NULL) { 176 shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc, 177 shared->free_fn, sizeof(Xecu)); 178 VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec)); 179 VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp); 180 } 181 182 if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu)) 183 return; 184 185 n_xecu = VG_(sizeXA)(shared->xec); 186 for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++) 187 VG_(addToXA)(shared->ips_order_xecu, &i); 188 189 xec_data_for_sort = shared->xec; 190 VG_(sortXA)(shared->ips_order_xecu); 191 } 192 193 static void addRef_XT_shared (XT_shared* shared) 194 { 195 shared->nrRef++; 196 } 197 198 static UWord release_XT_shared(XT_shared* shared) 199 { 200 UWord nrRef; 201 202 vg_assert(shared->nrRef > 0); 203 nrRef = --shared->nrRef; 204 if (nrRef == 0) 205 delete_XT_shared(shared); 206 return nrRef; 207 } 208 209 210 struct _XTree { 211 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */ 212 const HChar* cc; /* cost centre for alloc */ 213 Free_Fn_t free_fn; /* free fn */ 214 Word dataSzB; /* data size in bytes */ 215 XT_init_data_t init_data_fn; 216 XT_add_data_t add_data_fn; 217 XT_sub_data_t sub_data_fn; 218 XT_filter_IPs_t filter_IPs_fn; 219 220 XT_shared* shared; 221 222 HChar* tmp_data; /* temporary buffer, to insert new elements. */ 223 XArray* data; /* of elements of size dataSzB */ 224 }; 225 226 227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn, 228 const HChar* cc, 229 Free_Fn_t free_fn, 230 Word dataSzB, 231 XT_init_data_t init_data_fn, 232 XT_add_data_t add_data_fn, 233 XT_sub_data_t sub_data_fn, 234 XT_filter_IPs_t filter_IPs_fn) 235 { 236 XTree* xt; 237 238 /* check user-supplied info .. */ 239 vg_assert(alloc_fn); 240 vg_assert(free_fn); 241 vg_assert(dataSzB >= 0); 242 vg_assert(init_data_fn); 243 vg_assert(add_data_fn); 244 vg_assert(sub_data_fn); 245 246 xt = alloc_fn(cc, sizeof(struct _XTree) ); 247 xt->alloc_fn = alloc_fn; 248 xt->cc = cc; 249 xt->free_fn = free_fn; 250 xt->dataSzB = dataSzB; 251 xt->init_data_fn = init_data_fn; 252 xt->add_data_fn = add_data_fn; 253 xt->sub_data_fn = sub_data_fn; 254 xt->filter_IPs_fn = filter_IPs_fn; 255 256 xt->shared = new_XT_shared(alloc_fn, cc, free_fn); 257 addRef_XT_shared(xt->shared); 258 xt->tmp_data = alloc_fn(cc, xt->dataSzB); 259 xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB); 260 261 return xt; 262 } 263 264 XTree* VG_(XT_snapshot)(XTree* xt) 265 { 266 XTree* nxt; 267 268 vg_assert(xt); 269 270 nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) ); 271 272 *nxt = *xt; 273 addRef_XT_shared(nxt->shared); 274 nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB); 275 nxt->data = VG_(cloneXA)(nxt->cc, xt->data); 276 277 return nxt; 278 } 279 280 void VG_(XT_delete) ( XTree* xt ) 281 { 282 vg_assert(xt); 283 284 release_XT_shared(xt->shared); 285 xt->free_fn(xt->tmp_data); 286 VG_(deleteXA)(xt->data); 287 xt->free_fn(xt); 288 } 289 290 static Xecu find_or_insert (XTree* xt, ExeContext* ec) 291 { 292 293 const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4; 294 XT_shared* shared = xt->shared; 295 296 /* First grow the d4ecu2xecu array if needed. */ 297 if (d4ecu >= shared->d4ecu2xecu_sz) { 298 UInt old_sz = shared->d4ecu2xecu_sz; 299 UInt new_sz = (3 * d4ecu) / 2; 300 301 if (new_sz < 1000) 302 new_sz = 1000; 303 shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu, 304 new_sz * sizeof(UInt)); 305 shared->d4ecu2xecu_sz = new_sz; 306 for (UInt i = old_sz; i < new_sz; i++) 307 shared->d4ecu2xecu[i] = NO_OFFSET; 308 } 309 310 if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) { 311 xec xe; 312 313 xe.ec = ec; 314 if (xt->filter_IPs_fn == NULL) { 315 xe.top = 0; 316 xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec); 317 } else { 318 UInt top; 319 UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec); 320 xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel, 321 &top, &n_ips_sel); 322 xe.top = (UShort)top; 323 xe.n_ips_sel = (UShort)n_ips_sel; 324 } 325 xt->init_data_fn(xt->tmp_data); 326 VG_(addToXA)(shared->xec, &xe); 327 shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data); 328 } 329 330 return shared->d4ecu2xecu[d4ecu]; 331 } 332 333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value) 334 { 335 Xecu xecu = find_or_insert(xt, ec); 336 void* data = VG_(indexXA)(xt->data, xecu); 337 338 xt->add_data_fn(data, value); 339 return xecu; 340 } 341 342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value) 343 { 344 Xecu xecu = find_or_insert(xt, ec); 345 void* data = VG_(indexXA)(xt->data, xecu); 346 347 xt->sub_data_fn(data, value); 348 return xecu; 349 } 350 351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value) 352 { 353 void* data = VG_(indexXA)(xt->data, xecu); 354 xt->add_data_fn(data, value); 355 } 356 357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value) 358 { 359 void* data = VG_(indexXA)(xt->data, xecu); 360 xt->sub_data_fn(data, value); 361 } 362 363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu) 364 { 365 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu); 366 return (UInt)xe->n_ips_sel; 367 } 368 369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu) 370 { 371 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu); 372 return xe->ec; 373 } 374 375 static VgFile* xt_open (const HChar* outfilename) 376 { 377 VgFile* fp; 378 379 fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC, 380 VKI_S_IRUSR|VKI_S_IWUSR); 381 if (fp == NULL) { 382 VG_(message)(Vg_UserMsg, 383 "Error: can not open xtree output file `%s'\n", 384 outfilename); 385 } 386 return fp; 387 } 388 389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); }) 390 391 // Print "cmd:" line. 392 static void FP_cmd(VgFile* fp) 393 { 394 UInt i; 395 396 FP("cmd: "); 397 FP("%s", VG_(args_the_exename)); 398 for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) { 399 HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i); 400 FP(" %s", arg); 401 } 402 FP("\n"); 403 } 404 405 /* ----------- Callgrind output ------------------------------------------- */ 406 407 /* Output a callgrind format element in compressed format: 408 "name=(pos)" or "name=(pos) value" (if value_new) 409 or not compressed format: "name=value" 410 VG_(clo_xtree_compress_strings) indicates if the compressed format is used. 411 name is the format element (e.g. fl, fn, cfi, cfn, ...). 412 pos is the value dictionary position, used for compressed format. 413 value_new is True if this is the first usage of value. */ 414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos, 415 const HChar* value, Bool value_new) 416 { 417 if (!VG_(clo_xtree_compress_strings)) 418 FP("%s=%s\n", name, value); 419 else if (value_new) 420 FP("%s=(%d) %s\n", name, pos, value); 421 else 422 FP("%s=(%d)\n", name, pos); 423 } 424 425 void VG_(XT_callgrind_print) 426 (XTree* xt, 427 const HChar* outfilename, 428 const HChar* events, 429 const HChar* (*img_value)(const void* value)) 430 { 431 UInt n_xecu; 432 XT_shared* shared = xt->shared; 433 VgFile* fp = xt_open(outfilename); 434 DedupPoolAlloc* fnname_ddpa; 435 DedupPoolAlloc* filename_ddpa; 436 HChar* filename_buf = NULL; 437 UInt filename_buf_size = 0; 438 const HChar* filename_dir; 439 const HChar* filename_name; 440 441 if (fp == NULL) 442 return; 443 444 fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn, 445 "XT_callgrind_print.fn", xt->free_fn); 446 filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn, 447 "XT_callgrind_print.fl", xt->free_fn); 448 449 FP("# callgrind format\n"); 450 FP("version: 1\n"); 451 FP("creator: xtree-1\n"); 452 FP("pid: %d\n", VG_(getpid)()); 453 FP_cmd(fp); 454 455 /* Currently, we only need/support line positions. */ 456 FP("\npositions:%s\n", " line"); 457 458 /* Produce one "event:" line for each event, and the "events:" line. */ 459 { 460 HChar strtok_events[VG_(strlen)(events)+1]; 461 HChar* e; 462 HChar* ssaveptr; 463 HChar* p; 464 465 VG_(strcpy)(strtok_events, events); 466 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr); 467 e != NULL; 468 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) 469 FP("event: %s\n", e); 470 FP("events:"); 471 VG_(strcpy)(strtok_events, events); 472 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr); 473 e != NULL; 474 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) { 475 p = e; 476 while (*p) { 477 if (*p == ':') 478 *p = 0; 479 p++; 480 } 481 FP(" %s", e); 482 } 483 FP("\n"); 484 } 485 xt->init_data_fn(xt->tmp_data); // to compute totals 486 487 n_xecu = VG_(sizeXA)(xt->data); 488 vg_assert (n_xecu <= VG_(sizeXA)(shared->xec)); 489 for (Xecu xecu = 0; xecu < n_xecu; xecu++) { 490 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); 491 if (xe->n_ips_sel == 0) 492 continue; 493 494 const HChar* img = img_value(VG_(indexXA)(xt->data, xecu)); 495 496 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n] 497 // in the variables called_filename/called_linenum/called_fnname. 498 // The booleans called_filename_new/called_fnname_new are set to True 499 // the first time the called_filename/called_fnname are encountered. 500 // The called_filename_nr/called_fnname_nr are numbers identifying 501 // the strings called_filename/called_fnname. 502 #define CALLED_FLF(n) \ 503 if ((n) < 0 \ 504 || !VG_(get_filename_linenum)(ips[(n)], \ 505 &filename_name, \ 506 &filename_dir, \ 507 &called_linenum)) { \ 508 filename_name = "UnknownFile???"; \ 509 called_linenum = 0; \ 510 } \ 511 if ((n) < 0 \ 512 || !VG_(get_fnname)(ips[(n)], &called_fnname)) { \ 513 called_fnname = "UnknownFn???"; \ 514 } \ 515 { \ 516 UInt needed_size = VG_(strlen)(filename_dir) + 1 \ 517 + VG_(strlen)(filename_name) + 1; \ 518 if (filename_buf_size < needed_size) { \ 519 filename_buf_size = needed_size; \ 520 filename_buf = VG_(realloc)(xt->cc, filename_buf, \ 521 filename_buf_size); \ 522 } \ 523 } \ 524 VG_(strcpy)(filename_buf, filename_dir); \ 525 if (filename_buf[0] != '\0') { \ 526 VG_(strcat)(filename_buf, "/"); \ 527 } \ 528 VG_(strcat)(filename_buf, filename_name); \ 529 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \ 530 filename_buf, \ 531 &called_filename_new); \ 532 called_filename = filename_buf; \ 533 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \ 534 called_fnname, \ 535 &called_fnname_new); 536 537 /* Instead of unknown fnname ???, CALLED_FLF could use instead: 538 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]); 539 but that creates a lot of (useless) nodes at least for 540 valgrind self-hosting. */ 541 542 if (img) { 543 const HChar* called_filename; 544 UInt called_filename_nr; 545 Bool called_filename_new; // True the first time we see this filename. 546 const HChar* called_fnname; 547 UInt called_fnname_nr; 548 Bool called_fnname_new; // True the first time we see this fnname. 549 UInt called_linenum; 550 UInt prev_linenum; 551 552 const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top; 553 Int ips_idx = xe->n_ips_sel - 1; 554 555 if (0) { 556 VG_(printf)("entry img %s\n", img); 557 VG_(pp_ExeContext)(xe->ec); 558 VG_(printf)("\n"); 559 } 560 xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu)); 561 CALLED_FLF(ips_idx); 562 for (; 563 ips_idx >= 0; 564 ips_idx--) { 565 FP_pos_str(fp, "fl", called_filename_nr, 566 called_filename, called_filename_new); 567 FP_pos_str(fp, "fn", called_fnname_nr, 568 called_fnname, called_fnname_new); 569 if (ips_idx == 0) 570 FP("%d %s\n", called_linenum, img); 571 else 572 FP("%d\n", called_linenum); //no self cost. 573 prev_linenum = called_linenum; 574 if (ips_idx >= 1) { 575 CALLED_FLF(ips_idx-1); 576 FP_pos_str(fp, "cfi", called_filename_nr, 577 called_filename, called_filename_new); 578 FP_pos_str(fp, "cfn", called_fnname_nr, 579 called_fnname, called_fnname_new); 580 called_filename_new = False; 581 called_fnname_new = False; 582 /* Giving a call count of 0 allows kcachegrind to hide the calls 583 column. A call count of 1 means kcachegrind would show in the 584 calls column the nr of stacktrace containing this arc, which 585 is very confusing. So, the less bad is to give a 0 call 586 count. */ 587 FP("calls=0 %d\n", called_linenum); 588 FP("%d %s\n", prev_linenum, img); 589 } 590 } 591 FP("\n"); 592 } 593 } 594 /* callgrind format is not really fully supporting (yet?) execution trees: 595 in an execution tree, self and inclusive costs are identical, and 596 cannot be added together. 597 If no totals: line is given, callgrind_annotate calculates the addition 598 of all costs, and so gives a wrong totals. 599 Giving a totals: line solves this, but the user must give the option 600 --inclusive=yes (kind of hack) to have all the functions given 601 in the output file. */ 602 FP("totals: %s\n", img_value(xt->tmp_data)); 603 VG_(fclose)(fp); 604 VG_(deleteDedupPA)(fnname_ddpa); 605 VG_(deleteDedupPA)(filename_ddpa); 606 VG_(free)(filename_buf); 607 } 608 609 610 /* ----------- Massif output ---------------------------------------------- */ 611 612 /* For Massif output, some functions from the execontext are not output, a.o. 613 the allocation functions at the top of the stack and the functions below 614 main. So, the StackTrace of the execontexts in the xtree must be filtered. 615 Ms_Ec defines the subset of the stacktrace relevant for the report. */ 616 typedef 617 struct { 618 StackTrace ips; // ips and n_ips provides the subset of the xtree ec 619 UInt n_ips; // to use for a massif report. 620 621 SizeT report_value; // The value to report for this stack trace. 622 } Ms_Ec; 623 624 /* Ms_Group defines (at a certain depth) a group of ec context that 625 have the same IPs at the given depth, and have the same 'parent'. 626 total is the sum of the values of all group elements. 627 A Ms_Group can also represent a set of ec contexts that do not 628 have the same IP, but that have each a total which is below the 629 significant size. Such a group has a NULL ms_ec, a zero group_io. 630 n_ec is the nr of insignificant ec that have been collected inside this 631 insignificant group, and total is the sum of all non significant ec 632 at the given depth. */ 633 typedef 634 struct { 635 Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1] 636 Addr group_ip; 637 UInt n_ec; 638 SizeT total; 639 } Ms_Group; 640 641 /* Compare 2 groups by total, to have bigger total first. */ 642 static Int ms_group_revcmp_total(const void* vleft, const void* vright) 643 { 644 const Ms_Group* left = (const Ms_Group*)vleft; 645 const Ms_Group* right = (const Ms_Group*)vright; 646 647 // First reverse compare total 648 if (left->total > right->total) return -1; 649 if (left->total < right->total) return 1; 650 651 /* Equal size => compare IPs. 652 This (somewhat?) helps to have deterministic test results. 653 If this would change between platforms, then we should compare 654 function names/filename/linenr */ 655 if (left->group_ip < right->group_ip) return -1; 656 if (left->group_ip > right->group_ip) return 1; 657 return 0; 658 } 659 660 /* Scan the addresses in ms_ec at the given depth. 661 On return, 662 *groups points to an array of Ms_Group sorted by total. 663 *n_groups is the nr of groups 664 The caller is responsible to free the allocated group array. */ 665 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz, 666 UInt* n_groups, Ms_Group** groups) 667 { 668 UInt i, g; 669 Addr cur_group_ip = 0; 670 671 *n_groups = 0; 672 673 /* Handle special case somewhat more efficiently */ 674 if (n_ec == 0) { 675 *groups = NULL; 676 return; 677 } 678 679 /* Compute how many groups we have. */ 680 for (i = 0; i < n_ec; i++) { 681 if (ms_ec[i].n_ips > depth 682 && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) { 683 (*n_groups)++; 684 cur_group_ip = ms_ec[i].ips[depth]; 685 } 686 } 687 688 /* make the group array. */ 689 *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group)); 690 i = 0; 691 for (g = 0; g < *n_groups; g++) { 692 while (ms_ec[i].n_ips <= depth) 693 i++; 694 cur_group_ip = ms_ec[i].ips[depth]; 695 (*groups)[g].group_ip = cur_group_ip; 696 (*groups)[g].ms_ec = &ms_ec[i]; 697 (*groups)[g].n_ec = 1; 698 (*groups)[g].total = ms_ec[i].report_value; 699 i++; 700 while (i < n_ec 701 && ms_ec[i].n_ips > depth 702 && cur_group_ip == ms_ec[i].ips[depth]) { 703 (*groups)[g].total += ms_ec[i].report_value; 704 i++; 705 (*groups)[g].n_ec++; 706 } 707 } 708 709 /* Search for insignificant groups, collect them all together 710 in the first insignificant group, and compact the group array. */ 711 { 712 UInt insig1; // Position of first insignificant group. 713 UInt n_insig = 0; // Nr of insignificant groups found. 714 715 for (g = 0; g < *n_groups; g++) { 716 if ((*groups)[g].total < sig_sz) { 717 if (n_insig == 0) { 718 // First insig group => transform it into the special group 719 (*groups)[g].ms_ec = NULL; 720 (*groups)[g].group_ip = 0; 721 (*groups)[g].n_ec = 0; 722 // start the sum of insig total as total 723 insig1 = g; 724 } else { 725 // Add this insig group total into insig1 first group 726 (*groups)[insig1].total += (*groups)[g].total; 727 } 728 n_insig++; 729 } else { 730 if (n_insig > 1) 731 (*groups)[g - n_insig + 1] = (*groups)[g]; 732 } 733 } 734 if (n_insig > 0) { 735 (*groups)[insig1].n_ec = n_insig; 736 *n_groups -= n_insig - 1; 737 } 738 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig); 739 } 740 741 /* Sort on total size, bigger size first. */ 742 VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total); 743 } 744 745 static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group, 746 SizeT sig_sz, double sig_pct_threshold) 747 { 748 UInt i; 749 Ms_Group* groups; 750 UInt n_groups; 751 752 // If this is an insignificant group, handle it specially 753 if (group->ms_ec == NULL) { 754 const HChar* s = ( 1 == group->n_ec? "," : "s, all" ); 755 vg_assert(group->group_ip == 0); 756 FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n", 757 depth+1, "", group->total, group->n_ec, s, sig_pct_threshold); 758 return; 759 } 760 761 // Normal group => output the group and its subgroups. 762 ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz, 763 &n_groups, &groups); 764 765 FP("%*s" "n%u: %ld %s\n", 766 depth + 1, "", 767 n_groups, 768 group->total, 769 VG_(describe_IP)(group->ms_ec->ips[depth] - 1, NULL)); 770 /* XTREE??? Massif original code removes 1 to get the IP description. I am 771 wondering if this is not something that predates revision r8818, 772 which introduced a -1 in the stack unwind (see m_stacktrace.c) 773 Kept for the moment to allow exact comparison with massif output, but 774 probably we should remove this, as we very probably end up 2 bytes before 775 the RA Return Address. */ 776 777 /* Output sub groups of this group. */ 778 for (i = 0; i < n_groups; i++) 779 ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold); 780 781 VG_(free)(groups); 782 } 783 784 /* Allocate and build an array of Ms_Ec sorted by addresses in the 785 Ms_Ec StackTrace. */ 786 static void prepare_ms_ec (XTree* xt, 787 ULong (*report_value)(const void* value), 788 ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec) 789 { 790 XT_shared* shared = xt->shared; 791 const UInt n_xecu = VG_(sizeXA)(shared->xec); 792 const UInt n_data_xecu = VG_(sizeXA)(xt->data); 793 Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec)); 794 UInt n_xecu_sel = 0; // Nr of xecu that are selected for output. 795 796 vg_assert(n_data_xecu <= n_xecu); 797 798 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace. 799 ensure_ips_order_xecu_valid(shared); 800 801 *top_total = 0; 802 DMSG(1, "iteration %u\n", n_xecu); 803 for (UInt i = 0; i < n_xecu; i++) { 804 Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i); 805 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); 806 807 if (xecu >= n_data_xecu) 808 continue; // No data for this xecu in xt->data. 809 ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel; 810 if (ms_ec[n_xecu_sel].n_ips == 0) 811 continue; 812 813 ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top; 814 ms_ec[n_xecu_sel].report_value 815 = (*report_value)(VG_(indexXA)(xt->data, xecu)); 816 *top_total += ms_ec[n_xecu_sel].report_value; 817 818 n_xecu_sel++; 819 } 820 vg_assert(n_xecu_sel <= n_xecu); 821 822 *vms_ec = ms_ec; 823 *vn_ec = n_xecu_sel; 824 } 825 826 MsFile* VG_(XT_massif_open) 827 (const HChar* outfilename, 828 const HChar* desc, 829 const XArray* desc_args, 830 const HChar* time_unit) 831 { 832 UInt i; 833 VgFile* fp = xt_open(outfilename); 834 835 if (fp == NULL) 836 return NULL; // xt_open reported the error. 837 838 /* ------ file header ------------------------------- */ 839 FP("desc:"); 840 if (desc) 841 FP(" %s", desc); 842 i = 0; 843 if (desc_args) { 844 for (i = 0; i < VG_(sizeXA)(desc_args); i++) { 845 HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i); 846 FP(" %s", arg); 847 } 848 } 849 if (0 == i && desc == NULL) FP(" (none)"); 850 FP("\n"); 851 852 FP_cmd(fp); 853 854 FP("time_unit: %s\n", time_unit); 855 856 return fp; 857 } 858 859 void VG_(XT_massif_close)(MsFile* fp) 860 { 861 if (fp == NULL) 862 return; // Error should have been reported by VG_(XT_massif_open) 863 864 VG_(fclose)(fp); 865 } 866 867 void VG_(XT_massif_print) 868 (MsFile* fp, 869 XTree* xt, 870 const Massif_Header* header, 871 ULong (*report_value)(const void* value)) 872 { 873 UInt i; 874 875 if (fp == NULL) 876 return; // Normally VG_(XT_massif_open) already reported an error. 877 878 /* Compute/prepare Snapshot totals/data/... */ 879 ULong top_total; 880 881 /* Following variables only used for detailed snapshot. */ 882 UInt n_ec = 0; 883 Ms_Ec* ms_ec = NULL; 884 const HChar* kind = 885 header->detailed ? (header->peak ? "peak" : "detailed") : "empty"; 886 887 DMSG(1, "XT_massif_print %s\n", kind); 888 if (header->detailed) { 889 /* Prepare the Ms_Ec sorted array of stacktraces and the groups 890 at level 0. */ 891 prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec); 892 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec); 893 } else if (xt == NULL) { 894 /* Non detailed, no xt => use the sz provided in the header. */ 895 top_total = header->sz_B; 896 } else { 897 /* For non detailed snapshot, compute total directly from the xec. */ 898 const XT_shared* shared = xt->shared; 899 const UInt n_xecu = VG_(sizeXA)(xt->data); 900 top_total = 0; 901 902 for (UInt xecu = 0; xecu < n_xecu; xecu++) { 903 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); 904 if (xe->n_ips_sel == 0) 905 continue; 906 top_total += (*report_value)(VG_(indexXA)(xt->data, xecu)); 907 } 908 } 909 910 /* ------ snapshot header --------------------------- */ 911 FP("#-----------\n"); 912 FP("snapshot=%d\n", header->snapshot_n); 913 FP("#-----------\n"); 914 FP("time=%lld\n", header->time); 915 916 FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B 917 FP("mem_heap_extra_B=%llu\n", header->extra_B); 918 FP("mem_stacks_B=%llu\n", header->stacks_B); 919 FP("heap_tree=%s\n", kind); 920 921 /* ------ detailed snapshot data ----------------------------- */ 922 if (header->detailed) { 923 UInt n_groups; 924 Ms_Group* groups; 925 926 ULong sig_sz; 927 // Work out how big a child must be to be significant. If the current 928 // top_total is zero, then we set it to 1, which means everything will be 929 // judged insignificant -- this is sensible, as there's no point showing 930 // any detail for this case. Unless they used threshold=0, in which 931 // case we show them everything because that's what they asked for. 932 // 933 // Nb: We do this once now, rather than once per child, because if we do 934 // that the cost of all the divisions adds up to something significant. 935 if (0 == top_total && 0 != header->sig_threshold) 936 sig_sz = 1; 937 else 938 sig_sz = ((top_total + header->extra_B + header->stacks_B) 939 * header->sig_threshold) / 100; 940 941 /* Produce the groups at depth 0 */ 942 DMSG(1, "XT_massif_print producing depth 0 groups\n"); 943 ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups); 944 945 /* Output the top node. */ 946 FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc); 947 948 /* Output depth 0 groups. */ 949 DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups); 950 for (i = 0; i < n_groups; i++) 951 ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold); 952 953 VG_(free)(groups); 954 VG_(free)(ms_ec); 955 } 956 } 957 958 Int VG_(XT_offset_main_or_below_main)(Addr* ips, Int n_ips) 959 { 960 /* Search for main or below main function. 961 To limit the nr of ips to examine, we maintain the deepest 962 offset where main was found, and we first search main 963 from there. 964 If no main is found, we will then do a search for main or 965 below main function till the top. */ 966 static Int deepest_main = 0; 967 Vg_FnNameKind kind = Vg_FnNameNormal; 968 Int mbm = n_ips - 1; // Position of deepest main or below main. 969 Vg_FnNameKind mbmkind = Vg_FnNameNormal; 970 Int i; 971 972 for (i = n_ips - 1 - deepest_main; 973 i < n_ips; 974 i++) { 975 mbmkind = VG_(get_fnname_kind_from_IP)(ips[i]); 976 if (mbmkind != Vg_FnNameNormal) { 977 mbm = i; 978 break; 979 } 980 } 981 982 /* Search for main or below main function till top. */ 983 for (i = mbm - 1; 984 i >= 0 && mbmkind != Vg_FnNameMain; 985 i--) { 986 kind = VG_(get_fnname_kind_from_IP)(ips[i]); 987 if (kind != Vg_FnNameNormal) { 988 mbm = i; 989 mbmkind = kind; 990 } 991 } 992 if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) { 993 if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main) 994 deepest_main = n_ips - 1 - mbm; 995 return mbm; 996 } else 997 return n_ips-1; 998 } 999 1000 void VG_(XT_filter_1top_and_maybe_below_main) 1001 (Addr* ips, Int n_ips, 1002 UInt* top, UInt* n_ips_sel) 1003 { 1004 Int mbm; 1005 1006 *n_ips_sel = n_ips; 1007 if (n_ips == 0) { 1008 *top = 0; 1009 return; 1010 } 1011 1012 /* Filter top function. */ 1013 *top = 1; 1014 1015 if (VG_(clo_show_below_main)) 1016 mbm = n_ips - 1; 1017 else 1018 mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips); 1019 1020 *n_ips_sel = mbm - *top + 1; 1021 } 1022 1023 void VG_(XT_filter_maybe_below_main) 1024 (Addr* ips, Int n_ips, 1025 UInt* top, UInt* n_ips_sel) 1026 { 1027 Int mbm; 1028 1029 *n_ips_sel = n_ips; 1030 *top = 0; 1031 if (n_ips == 0) 1032 return; 1033 1034 if (VG_(clo_show_below_main)) 1035 mbm = n_ips - 1; 1036 else 1037 mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips); 1038 1039 *n_ips_sel = mbm - *top + 1; 1040 } 1041 1042 /*--------------------------------------------------------------------*/ 1043 /*--- end m_xtree.c ---*/ 1044 /*--------------------------------------------------------------------*/ 1045