1 /*--------------------------------------------------------------------*/ 2 /*--- Callgrind ---*/ 3 /*--- bbcc.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Callgrind, a Valgrind tool for call tracing. 8 9 Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 10 11 This program is free software; you can redistribute it and/or 12 modify it under the terms of the GNU General Public License as 13 published by the Free Software Foundation; either version 2 of the 14 License, or (at your option) any later version. 15 16 This program is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 General Public License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with this program; if not, write to the Free Software 23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 24 02111-1307, USA. 25 26 The GNU General Public License is contained in the file COPYING. 27 */ 28 29 #include "global.h" 30 #include "costs.h" 31 32 #include <pub_tool_threadstate.h> 33 34 /*------------------------------------------------------------*/ 35 /*--- BBCC operations ---*/ 36 /*------------------------------------------------------------*/ 37 38 #define N_BBCC_INITIAL_ENTRIES 10437 39 40 /* BBCC table (key is BB/Context), per thread, resizable */ 41 bbcc_hash current_bbccs; 42 43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) 44 { 45 Int i; 46 47 CLG_ASSERT(bbccs != 0); 48 49 bbccs->size = N_BBCC_INITIAL_ENTRIES; 50 bbccs->entries = 0; 51 bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1", 52 bbccs->size * sizeof(BBCC*)); 53 54 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; 55 } 56 57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) 58 { 59 CLG_ASSERT(dst != 0); 60 61 dst->size = current_bbccs.size; 62 dst->entries = current_bbccs.entries; 63 dst->table = current_bbccs.table; 64 } 65 66 bbcc_hash* CLG_(get_current_bbcc_hash)() 67 { 68 return ¤t_bbccs; 69 } 70 71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h) 72 { 73 CLG_ASSERT(h != 0); 74 75 current_bbccs.size = h->size; 76 current_bbccs.entries = h->entries; 77 current_bbccs.table = h->table; 78 } 79 80 /* 81 * Zero all costs of a BBCC 82 */ 83 void CLG_(zero_bbcc)(BBCC* bbcc) 84 { 85 Int i; 86 jCC* jcc; 87 88 CLG_ASSERT(bbcc->cxt != 0); 89 CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %d " 90 "(fn '%s', rec %d)\n", 91 bb_addr(bbcc->bb), 92 bbcc->cxt->base_number + bbcc->rec_index, 93 bbcc->cxt->fn[0]->name, 94 bbcc->rec_index); 95 96 if ((bbcc->ecounter_sum ==0) && 97 (bbcc->ret_counter ==0)) return; 98 99 for(i=0;i<bbcc->bb->cost_count;i++) 100 bbcc->cost[i] = 0; 101 for(i=0;i <= bbcc->bb->cjmp_count;i++) { 102 bbcc->jmp[i].ecounter = 0; 103 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) 104 CLG_(init_cost)( CLG_(sets).full, jcc->cost ); 105 } 106 bbcc->ecounter_sum = 0; 107 bbcc->ret_counter = 0; 108 } 109 110 111 112 void CLG_(forall_bbccs)(void (*func)(BBCC*)) 113 { 114 BBCC *bbcc, *bbcc2; 115 int i, j; 116 117 for (i = 0; i < current_bbccs.size; i++) { 118 if ((bbcc=current_bbccs.table[i]) == NULL) continue; 119 while (bbcc) { 120 /* every bbcc should have a rec_array */ 121 CLG_ASSERT(bbcc->rec_array != 0); 122 123 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) { 124 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; 125 126 (*func)(bbcc2); 127 } 128 bbcc = bbcc->next; 129 } 130 } 131 } 132 133 134 /* All BBCCs for recursion level 0 are inserted into a 135 * thread specific hash table with key 136 * - address of BB structure (unique, as never freed) 137 * - current context (includes caller chain) 138 * BBCCs for other recursion levels are in bbcc->rec_array. 139 * 140 * The hash is used in setup_bb(), i.e. to find the cost 141 * counters to be changed in the execution of a BB. 142 */ 143 144 static __inline__ 145 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) 146 { 147 CLG_ASSERT(bb != 0); 148 CLG_ASSERT(cxt != 0); 149 150 return ((Addr)bb + (Addr)cxt) % size; 151 } 152 153 154 /* Lookup for a BBCC in hash. 155 */ 156 static 157 BBCC* lookup_bbcc(BB* bb, Context* cxt) 158 { 159 BBCC* bbcc = bb->last_bbcc; 160 UInt idx; 161 162 /* check LRU */ 163 if (bbcc->cxt == cxt) { 164 if (!CLG_(clo).separate_threads) { 165 /* if we don't dump threads separate, tid doesn't have to match */ 166 return bbcc; 167 } 168 if (bbcc->tid == CLG_(current_tid)) return bbcc; 169 } 170 171 CLG_(stat).bbcc_lru_misses++; 172 173 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); 174 bbcc = current_bbccs.table[idx]; 175 while (bbcc && 176 (bb != bbcc->bb || 177 cxt != bbcc->cxt)) { 178 bbcc = bbcc->next; 179 } 180 181 CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %d, fn '%s'): %p (tid %d)\n", 182 bb_addr(bb), cxt->base_number, cxt->fn[0]->name, 183 bbcc, bbcc ? bbcc->tid : 0); 184 185 CLG_DEBUGIF(2) 186 if (bbcc) CLG_(print_bbcc)(-2,bbcc); 187 188 return bbcc; 189 } 190 191 192 /* double size of hash table 1 (addr->BBCC) */ 193 static void resize_bbcc_hash(void) 194 { 195 Int i, new_size, conflicts1 = 0, conflicts2 = 0; 196 BBCC** new_table; 197 UInt new_idx; 198 BBCC *curr_BBCC, *next_BBCC; 199 200 new_size = 2*current_bbccs.size+3; 201 new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1", 202 new_size * sizeof(BBCC*)); 203 204 if (!new_table) return; 205 206 for (i = 0; i < new_size; i++) 207 new_table[i] = NULL; 208 209 for (i = 0; i < current_bbccs.size; i++) { 210 if (current_bbccs.table[i] == NULL) continue; 211 212 curr_BBCC = current_bbccs.table[i]; 213 while (NULL != curr_BBCC) { 214 next_BBCC = curr_BBCC->next; 215 216 new_idx = bbcc_hash_idx(curr_BBCC->bb, 217 curr_BBCC->cxt, 218 new_size); 219 220 curr_BBCC->next = new_table[new_idx]; 221 new_table[new_idx] = curr_BBCC; 222 if (curr_BBCC->next) { 223 conflicts1++; 224 if (curr_BBCC->next->next) 225 conflicts2++; 226 } 227 228 curr_BBCC = next_BBCC; 229 } 230 } 231 232 VG_(free)(current_bbccs.table); 233 234 235 CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", 236 current_bbccs.size, new_size, 237 current_bbccs.entries, conflicts1, conflicts2); 238 239 current_bbccs.size = new_size; 240 current_bbccs.table = new_table; 241 CLG_(stat).bbcc_hash_resizes++; 242 } 243 244 245 static __inline 246 BBCC** new_recursion(int size) 247 { 248 BBCC** bbccs; 249 int i; 250 251 bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size); 252 for(i=0;i<size;i++) 253 bbccs[i] = 0; 254 255 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs); 256 257 return bbccs; 258 } 259 260 261 /* 262 * Allocate a new BBCC 263 * 264 * Uninitialized: 265 * cxt, rec_index, rec_array, next_bbcc, next1, next2 266 */ 267 static __inline__ 268 BBCC* new_bbcc(BB* bb) 269 { 270 BBCC* bbcc; 271 Int i; 272 273 /* We need cjmp_count+1 JmpData structs: 274 * the last is for the unconditional jump/call/ret at end of BB 275 */ 276 bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1", 277 sizeof(BBCC) + 278 (bb->cjmp_count+1) * sizeof(JmpData)); 279 bbcc->bb = bb; 280 bbcc->tid = CLG_(current_tid); 281 282 bbcc->ret_counter = 0; 283 bbcc->skipped = 0; 284 bbcc->cost = CLG_(get_costarray)(bb->cost_count); 285 for(i=0;i<bb->cost_count;i++) 286 bbcc->cost[i] = 0; 287 for(i=0; i<=bb->cjmp_count; i++) { 288 bbcc->jmp[i].ecounter = 0; 289 bbcc->jmp[i].jcc_list = 0; 290 } 291 bbcc->ecounter_sum = 0; 292 293 /* Init pointer caches (LRU) */ 294 bbcc->lru_next_bbcc = 0; 295 bbcc->lru_from_jcc = 0; 296 bbcc->lru_to_jcc = 0; 297 298 CLG_(stat).distinct_bbccs++; 299 300 CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n", 301 bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs); 302 303 return bbcc; 304 } 305 306 307 /** 308 * Inserts a new BBCC into hashes. 309 * BBCC specific items must be set as this is used for the hash 310 * keys: 311 * fn : current function 312 * tid : current thread ID 313 * from : position where current function is called from 314 * 315 * Recursion level doesn't need to be set as this is not included 316 * in the hash key: Only BBCCs with rec level 0 are in hashes. 317 */ 318 static 319 void insert_bbcc_into_hash(BBCC* bbcc) 320 { 321 UInt idx; 322 323 CLG_ASSERT(bbcc->cxt != 0); 324 325 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n", 326 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); 327 328 /* check fill degree of hash and resize if needed (>90%) */ 329 current_bbccs.entries++; 330 if (100 * current_bbccs.entries / current_bbccs.size > 90) 331 resize_bbcc_hash(); 332 333 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); 334 bbcc->next = current_bbccs.table[idx]; 335 current_bbccs.table[idx] = bbcc; 336 337 CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n", 338 current_bbccs.entries); 339 } 340 341 static Char* mangled_cxt(Context* cxt, int rec_index) 342 { 343 static Char mangled[FN_NAME_LEN]; 344 int i, p; 345 346 if (!cxt) return "(no context)"; 347 348 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); 349 if (rec_index >0) 350 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); 351 for(i=1;i<cxt->size;i++) 352 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); 353 354 return mangled; 355 } 356 357 358 /* Create a new BBCC as a copy of an existing one, 359 * but with costs set to 0 and jcc chains empty. 360 * 361 * This is needed when a BB is executed in another context than 362 * the one at instrumentation time of the BB. 363 * 364 * Use cases: 365 * rec_index == 0: clone from a BBCC with differing tid/cxt 366 * and insert into hashes 367 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 368 * don't insert into hashes 369 */ 370 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) 371 { 372 BBCC* bbcc; 373 374 CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n", 375 bb_addr(orig->bb), rec_index, cxt->fn[0]->name); 376 377 bbcc = new_bbcc(orig->bb); 378 379 if (rec_index == 0) { 380 381 /* hash insertion is only allowed if tid or cxt is different */ 382 CLG_ASSERT((orig->tid != CLG_(current_tid)) || 383 (orig->cxt != cxt)); 384 385 bbcc->rec_index = 0; 386 bbcc->cxt = cxt; 387 bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); 388 bbcc->rec_array[0] = bbcc; 389 390 insert_bbcc_into_hash(bbcc); 391 } 392 else { 393 if (CLG_(clo).separate_threads) 394 CLG_ASSERT(orig->tid == CLG_(current_tid)); 395 396 CLG_ASSERT(orig->cxt == cxt); 397 CLG_ASSERT(orig->rec_array); 398 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); 399 CLG_ASSERT(orig->rec_array[rec_index] ==0); 400 401 /* new BBCC will only have differing recursion level */ 402 bbcc->rec_index = rec_index; 403 bbcc->cxt = cxt; 404 bbcc->rec_array = orig->rec_array; 405 bbcc->rec_array[rec_index] = bbcc; 406 } 407 408 /* update list of BBCCs for same BB */ 409 bbcc->next_bbcc = orig->bb->bbcc_list; 410 orig->bb->bbcc_list = bbcc; 411 412 413 CLG_DEBUGIF(3) 414 CLG_(print_bbcc)(-2, bbcc); 415 416 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n" 417 " orig %s\n" 418 " new %s\n", 419 orig, rec_index, bb_addr(orig->bb), 420 mangled_cxt(orig->cxt, orig->rec_index), 421 mangled_cxt(bbcc->cxt, bbcc->rec_index)); 422 423 CLG_(stat).bbcc_clones++; 424 425 return bbcc; 426 }; 427 428 429 430 /* Get a pointer to the cost centre structure for given basic block 431 * address. If created, the BBCC is inserted into the BBCC hash. 432 * Also sets BB_seen_before by reference. 433 * 434 */ 435 BBCC* CLG_(get_bbcc)(BB* bb) 436 { 437 BBCC* bbcc; 438 439 CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb)); 440 441 bbcc = bb->bbcc_list; 442 443 if (!bbcc) { 444 bbcc = new_bbcc(bb); 445 446 /* initialize BBCC */ 447 bbcc->cxt = 0; 448 bbcc->rec_array = 0; 449 bbcc->rec_index = 0; 450 451 bbcc->next_bbcc = bb->bbcc_list; 452 bb->bbcc_list = bbcc; 453 bb->last_bbcc = bbcc; 454 455 CLG_DEBUGIF(3) 456 CLG_(print_bbcc)(-2, bbcc); 457 } 458 459 CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n", 460 bb_addr(bb), bbcc); 461 462 return bbcc; 463 } 464 465 466 /* Callgrind manages its own call stack for each thread. 467 * When leaving a function, a underflow can happen when 468 * Callgrind's tracing was switched on in the middle of 469 * a run, i.e. when Callgrind was not able to trace the 470 * call instruction. 471 * This function tries to reconstruct the original call. 472 * As we know the return address (the address following 473 * the CALL instruction), we can detect the function 474 * we return back to, but the original call site is unknown. 475 * We suppose a call site at return address - 1. 476 * (TODO: other heuristic: lookup info of instrumented BBs). 477 */ 478 static void handleUnderflow(BB* bb) 479 { 480 /* RET at top of call stack */ 481 BBCC* source_bbcc; 482 BB* source_bb; 483 Bool seen_before; 484 fn_node* caller; 485 int fn_number, *pactive; 486 call_entry* call_entry_up; 487 488 CLG_DEBUG(1," Callstack underflow !\n"); 489 490 /* we emulate an old call from the function we return to 491 * by using (<return address> -1) */ 492 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); 493 source_bbcc = CLG_(get_bbcc)(source_bb); 494 495 /* seen_before can be true if RET from a signal handler */ 496 if (!seen_before) { 497 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; 498 } 499 else if (CLG_(current_state).collect) 500 source_bbcc->ecounter_sum++; 501 502 /* Force a new top context, will be set active by push_cxt() */ 503 CLG_(current_fn_stack).top--; 504 CLG_(current_state).cxt = 0; 505 caller = CLG_(get_fn_node)(bb); 506 CLG_(push_cxt)( caller ); 507 508 if (!seen_before) { 509 /* set rec array for source BBCC: this is at rec level 1 */ 510 source_bbcc->rec_array = new_recursion(caller->separate_recursions); 511 source_bbcc->rec_array[0] = source_bbcc; 512 513 CLG_ASSERT(source_bbcc->cxt == 0); 514 source_bbcc->cxt = CLG_(current_state).cxt; 515 insert_bbcc_into_hash(source_bbcc); 516 } 517 CLG_ASSERT(CLG_(current_state).bbcc); 518 519 /* correct active counts */ 520 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; 521 pactive = CLG_(get_fn_entry)(fn_number); 522 (*pactive)--; 523 524 /* This assertion is not correct for reentrant 525 * signal handlers */ 526 /* CLG_ASSERT(*pactive == 0); */ 527 528 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ 529 /* back to current context */ 530 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); 531 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, 532 (Addr)-1, False); 533 call_entry_up = 534 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); 535 /* assume this call is lasting since last dump or 536 * for a signal handler since it's call */ 537 if (CLG_(current_state).sig == 0) 538 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, 539 CLG_(get_current_thread)()->lastdump_cost ); 540 else 541 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); 542 } 543 544 545 /* 546 * Helper function called at start of each instrumented BB to setup 547 * pointer to costs for current thread/context/recursion level 548 */ 549 550 VG_REGPARM(1) 551 void CLG_(setup_bbcc)(BB* bb) 552 { 553 BBCC *bbcc, *last_bbcc; 554 Bool call_emulation = False, delayed_push = False, skip = False; 555 Addr sp; 556 BB* last_bb; 557 ThreadId tid; 558 Int jmpkind, passed = 0, csp; 559 Bool ret_without_call = False; 560 Int popcount_on_return = 1; 561 562 CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb)); 563 564 /* This is needed because thread switches can not reliable be tracked 565 * with callback CLG_(run_thread) only: we have otherwise no way to get 566 * the thread ID after a signal handler returns. 567 * This could be removed again if that bug is fixed in Valgrind. 568 * This is in the hot path but hopefully not to costly. 569 */ 570 tid = VG_(get_running_tid)(); 571 #if 1 572 CLG_(switch_thread)(tid); 573 #else 574 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); 575 #endif 576 577 sp = VG_(get_SP)(tid); 578 last_bbcc = CLG_(current_state).bbcc; 579 last_bb = last_bbcc ? last_bbcc->bb : 0; 580 581 if (last_bb) { 582 passed = CLG_(current_state).jmps_passed; 583 CLG_ASSERT(passed <= last_bb->cjmp_count); 584 if (passed == last_bb->cjmp_count) { 585 jmpkind = last_bb->jmpkind; 586 587 /* VEX always gives a Boring jump kind also when passed trough */ 588 if ((jmpkind == Ijk_Boring) && 589 (last_bb->offset + last_bb->instr_len == bb->offset)) 590 jmpkind = JmpNone; 591 } 592 else 593 jmpkind = JmpCond; 594 595 /* if we are in a function which is skipped in the call graph, we 596 * do not increment the exe counter to produce cost (if simulation off), 597 * which would lead to dumping this BB to be skipped 598 */ 599 if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) { 600 last_bbcc->ecounter_sum++; 601 last_bbcc->jmp[passed].ecounter++; 602 if (!CLG_(clo).simulate_cache) { 603 /* update Ir cost */ 604 UInt instr_count = last_bb->jmp[passed].instr+1; 605 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 606 } 607 } 608 609 CLG_DEBUGIF(4) { 610 CLG_(print_execstate)(-2, &CLG_(current_state) ); 611 CLG_(print_bbcc_cost)(-2, last_bbcc); 612 } 613 } 614 else { 615 jmpkind = JmpNone; 616 } 617 618 /* Manipulate JmpKind if needed, only using BB specific info */ 619 620 csp = CLG_(current_call_stack).sp; 621 622 /* A return not matching the top call in our callstack is a jump */ 623 if ( (jmpkind == Ijk_Ret) && (csp >0)) { 624 Int csp_up = csp-1; 625 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); 626 627 /* We have a real return if 628 * - the stack pointer (SP) left the current stack frame, or 629 * - SP has the same value as when reaching the current function 630 * and the address of this BB is the return address of last call 631 * (we even allow to leave multiple frames if the SP stays the 632 * same and we find a matching return address) 633 * The latter condition is needed because on PPC, SP can stay 634 * the same over CALL=b(c)l / RET=b(c)lr boundaries 635 */ 636 if (sp < top_ce->sp) popcount_on_return = 0; 637 else if (top_ce->sp == sp) { 638 while(1) { 639 if (top_ce->ret_addr == bb_addr(bb)) break; 640 if (csp_up>0) { 641 csp_up--; 642 top_ce = &(CLG_(current_call_stack).entry[csp_up]); 643 if (top_ce->sp == sp) { 644 popcount_on_return++; 645 continue; 646 } 647 } 648 popcount_on_return = 0; 649 break; 650 } 651 } 652 if (popcount_on_return == 0) { 653 jmpkind = Ijk_Boring; 654 ret_without_call = True; 655 } 656 } 657 658 /* Should this jump be converted to call or pop/call ? */ 659 if (( jmpkind != Ijk_Ret) && 660 ( jmpkind != Ijk_Call) && last_bb) { 661 662 /* We simulate a JMP/Cont to be a CALL if 663 * - jump is in another ELF object or section kind 664 * - jump is to first instruction of a function (tail recursion) 665 */ 666 if (ret_without_call || 667 /* This is for detection of optimized tail recursion. 668 * On PPC, this is only detected as call when going to another 669 * function. The problem is that on PPC it can go wrong 670 * more easily (no stack frame setup needed) 671 */ 672 #if defined(VGA_ppc32) 673 (bb->is_entry && (last_bb->fn != bb->fn)) || 674 #else 675 bb->is_entry || 676 #endif 677 (last_bb->sect_kind != bb->sect_kind) || 678 (last_bb->obj->number != bb->obj->number)) { 679 680 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", 681 last_bb->fn->name, last_bb->obj->name, 682 bb->fn->name, bb->obj->name, 683 ret_without_call?" (RET w/o CALL)":""); 684 685 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { 686 687 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); 688 689 if (top_ce->jcc) { 690 691 CLG_DEBUG(1," Pop on Jump!\n"); 692 693 /* change source for delayed push */ 694 CLG_(current_state).bbcc = top_ce->jcc->from; 695 sp = top_ce->sp; 696 CLG_(pop_call_stack)(); 697 } 698 else { 699 CLG_ASSERT(CLG_(current_state).nonskipped != 0); 700 } 701 } 702 703 jmpkind = Ijk_Call; 704 call_emulation = True; 705 } 706 } 707 708 if (jmpkind == Ijk_Call) 709 skip = CLG_(get_fn_node)(bb)->skip; 710 711 CLG_DEBUGIF(1) { 712 if (jmpkind == JmpCond) 713 VG_(printf)("Conditional"); 714 else if (jmpkind == JmpNone) 715 VG_(printf)("None"); 716 else 717 ppIRJumpKind( jmpkind ); 718 719 VG_(printf)(" %08lx -> %08lx, SP %08lx\n", 720 last_bb ? bb_jmpaddr(last_bb) : 0, 721 bb_addr(bb), sp); 722 } 723 724 /* Handle CALL/RET and update context to get correct BBCC */ 725 726 if (jmpkind == Ijk_Ret) { 727 728 if ((csp == 0) || 729 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && 730 ( *(CLG_(current_fn_stack).top-1)==0)) ) { 731 732 /* On an empty call stack or at a signal separation marker, 733 * a RETURN generates an call stack underflow. 734 */ 735 handleUnderflow(bb); 736 CLG_(pop_call_stack)(); 737 } 738 else { 739 CLG_ASSERT(popcount_on_return >0); 740 CLG_(unwind_call_stack)(sp, popcount_on_return); 741 } 742 } 743 else { 744 Int unwind_count = CLG_(unwind_call_stack)(sp, 0); 745 if (unwind_count > 0) { 746 /* if unwinding was done, this actually is a return */ 747 jmpkind = Ijk_Ret; 748 } 749 750 if (jmpkind == Ijk_Call) { 751 delayed_push = True; 752 753 csp = CLG_(current_call_stack).sp; 754 if (call_emulation && csp>0) 755 sp = CLG_(current_call_stack).entry[csp-1].sp; 756 757 } 758 } 759 760 /* Change new context if needed, taking delayed_push into account */ 761 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { 762 CLG_(push_cxt)(CLG_(get_fn_node)(bb)); 763 } 764 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); 765 766 /* If there is a fresh instrumented BBCC, assign current context */ 767 bbcc = CLG_(get_bbcc)(bb); 768 if (bbcc->cxt == 0) { 769 CLG_ASSERT(bbcc->rec_array == 0); 770 771 bbcc->cxt = CLG_(current_state).cxt; 772 bbcc->rec_array = 773 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); 774 bbcc->rec_array[0] = bbcc; 775 776 insert_bbcc_into_hash(bbcc); 777 } 778 else { 779 /* get BBCC with current context */ 780 781 /* first check LRU of last bbcc executed */ 782 783 if (last_bbcc) { 784 bbcc = last_bbcc->lru_next_bbcc; 785 if (bbcc && 786 ((bbcc->bb != bb) || 787 (bbcc->cxt != CLG_(current_state).cxt))) 788 bbcc = 0; 789 } 790 else 791 bbcc = 0; 792 793 if (!bbcc) 794 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); 795 if (!bbcc) 796 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); 797 798 bb->last_bbcc = bbcc; 799 } 800 801 /* save for fast lookup */ 802 if (last_bbcc) 803 last_bbcc->lru_next_bbcc = bbcc; 804 805 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { 806 UInt level, idx; 807 fn_node* top = *(CLG_(current_fn_stack).top); 808 809 level = *CLG_(get_fn_entry)(top->number); 810 811 if (delayed_push && !skip) { 812 if (CLG_(clo).skip_direct_recursion) { 813 /* do not increment rec. level if called from 814 * same function */ 815 if (!CLG_(current_state).bbcc || 816 (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])) 817 level++; 818 } 819 else level++; 820 } 821 if (level> top->separate_recursions) 822 level = top->separate_recursions; 823 824 if (level == 0) { 825 /* can only happen if instrumentation just was switched on */ 826 level = 1; 827 *CLG_(get_fn_entry)(top->number) = 1; 828 } 829 830 idx = level -1; 831 if (bbcc->rec_array[idx]) 832 bbcc = bbcc->rec_array[idx]; 833 else 834 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); 835 836 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); 837 } 838 839 if (delayed_push) { 840 if (!skip && CLG_(current_state).nonskipped) { 841 /* a call from skipped to nonskipped */ 842 CLG_(current_state).bbcc = CLG_(current_state).nonskipped; 843 } 844 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, 845 bbcc, sp, skip); 846 } 847 848 if (CLG_(clo).collect_jumps && 849 ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) { 850 851 /* Handle conditional jumps followed, i.e. trace arcs 852 * This uses JCC structures, too */ 853 854 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); 855 CLG_ASSERT(jcc != 0); 856 // Change from default, and check if already changed 857 if (jcc->jmpkind == Ijk_Call) 858 jcc->jmpkind = jmpkind; 859 else { 860 // FIXME: Why can this fail? 861 // CLG_ASSERT(jcc->jmpkind == jmpkind); 862 } 863 864 jcc->call_counter++; 865 if (jmpkind == JmpCond) 866 CLG_(stat).jcnd_counter++; 867 else 868 CLG_(stat).jump_counter++; 869 } 870 871 CLG_(current_state).bbcc = bbcc; 872 // needed for log_* handlers called in this BB 873 CLG_(bb_base) = bb->obj->offset + bb->offset; 874 CLG_(cost_base) = bbcc->cost; 875 876 CLG_DEBUGIF(1) { 877 VG_(printf)(" "); 878 CLG_(print_bbcc_fn)(bbcc); 879 VG_(printf)("\n"); 880 } 881 882 CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %d), Instrs %d (Len %d)\n", 883 bb_addr(bb), bbcc->cost, bb->cost_count, 884 bb->instr_count, bb->instr_len); 885 CLG_DEBUGIF(3) 886 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); 887 CLG_DEBUG(3,"\n"); 888 889 CLG_(stat).bb_executions++; 890 } 891