1 /*--------------------------------------------------------------------*/ 2 /*--- Callgrind ---*/ 3 /*--- bbcc.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Callgrind, a Valgrind tool for call tracing. 8 9 Copyright (C) 2002-2013, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 10 11 This program is free software; you can redistribute it and/or 12 modify it under the terms of the GNU General Public License as 13 published by the Free Software Foundation; either version 2 of the 14 License, or (at your option) any later version. 15 16 This program is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 General Public License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with this program; if not, write to the Free Software 23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 24 02111-1307, USA. 25 26 The GNU General Public License is contained in the file COPYING. 27 */ 28 29 #include "global.h" 30 #include "costs.h" 31 32 #include "pub_tool_threadstate.h" 33 34 /*------------------------------------------------------------*/ 35 /*--- BBCC operations ---*/ 36 /*------------------------------------------------------------*/ 37 38 #define N_BBCC_INITIAL_ENTRIES 10437 39 40 /* BBCC table (key is BB/Context), per thread, resizable */ 41 bbcc_hash current_bbccs; 42 43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) 44 { 45 Int i; 46 47 CLG_ASSERT(bbccs != 0); 48 49 bbccs->size = N_BBCC_INITIAL_ENTRIES; 50 bbccs->entries = 0; 51 bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1", 52 bbccs->size * sizeof(BBCC*)); 53 54 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; 55 } 56 57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) 58 { 59 CLG_ASSERT(dst != 0); 60 61 dst->size = current_bbccs.size; 62 dst->entries = current_bbccs.entries; 63 dst->table = current_bbccs.table; 64 } 65 66 bbcc_hash* CLG_(get_current_bbcc_hash)() 67 { 68 return ¤t_bbccs; 69 } 70 71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h) 72 { 73 CLG_ASSERT(h != 0); 74 75 current_bbccs.size = h->size; 76 current_bbccs.entries = h->entries; 77 current_bbccs.table = h->table; 78 } 79 80 /* 81 * Zero all costs of a BBCC 82 */ 83 void CLG_(zero_bbcc)(BBCC* bbcc) 84 { 85 Int i; 86 jCC* jcc; 87 88 CLG_ASSERT(bbcc->cxt != 0); 89 CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %d " 90 "(fn '%s', rec %d)\n", 91 bb_addr(bbcc->bb), 92 bbcc->cxt->base_number + bbcc->rec_index, 93 bbcc->cxt->fn[0]->name, 94 bbcc->rec_index); 95 96 if ((bbcc->ecounter_sum ==0) && 97 (bbcc->ret_counter ==0)) return; 98 99 for(i=0;i<bbcc->bb->cost_count;i++) 100 bbcc->cost[i] = 0; 101 for(i=0;i <= bbcc->bb->cjmp_count;i++) { 102 bbcc->jmp[i].ecounter = 0; 103 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) 104 CLG_(init_cost)( CLG_(sets).full, jcc->cost ); 105 } 106 bbcc->ecounter_sum = 0; 107 bbcc->ret_counter = 0; 108 } 109 110 111 112 void CLG_(forall_bbccs)(void (*func)(BBCC*)) 113 { 114 BBCC *bbcc, *bbcc2; 115 int i, j; 116 117 for (i = 0; i < current_bbccs.size; i++) { 118 if ((bbcc=current_bbccs.table[i]) == NULL) continue; 119 while (bbcc) { 120 /* every bbcc should have a rec_array */ 121 CLG_ASSERT(bbcc->rec_array != 0); 122 123 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) { 124 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; 125 126 (*func)(bbcc2); 127 } 128 bbcc = bbcc->next; 129 } 130 } 131 } 132 133 134 /* All BBCCs for recursion level 0 are inserted into a 135 * thread specific hash table with key 136 * - address of BB structure (unique, as never freed) 137 * - current context (includes caller chain) 138 * BBCCs for other recursion levels are in bbcc->rec_array. 139 * 140 * The hash is used in setup_bb(), i.e. to find the cost 141 * counters to be changed in the execution of a BB. 142 */ 143 144 static __inline__ 145 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) 146 { 147 CLG_ASSERT(bb != 0); 148 CLG_ASSERT(cxt != 0); 149 150 return ((Addr)bb + (Addr)cxt) % size; 151 } 152 153 154 /* Lookup for a BBCC in hash. 155 */ 156 static 157 BBCC* lookup_bbcc(BB* bb, Context* cxt) 158 { 159 BBCC* bbcc = bb->last_bbcc; 160 UInt idx; 161 162 /* check LRU */ 163 if (bbcc->cxt == cxt) { 164 if (!CLG_(clo).separate_threads) { 165 /* if we don't dump threads separate, tid doesn't have to match */ 166 return bbcc; 167 } 168 if (bbcc->tid == CLG_(current_tid)) return bbcc; 169 } 170 171 CLG_(stat).bbcc_lru_misses++; 172 173 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); 174 bbcc = current_bbccs.table[idx]; 175 while (bbcc && 176 (bb != bbcc->bb || 177 cxt != bbcc->cxt)) { 178 bbcc = bbcc->next; 179 } 180 181 CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %d, fn '%s'): %p (tid %d)\n", 182 bb_addr(bb), cxt->base_number, cxt->fn[0]->name, 183 bbcc, bbcc ? bbcc->tid : 0); 184 185 CLG_DEBUGIF(2) 186 if (bbcc) CLG_(print_bbcc)(-2,bbcc); 187 188 return bbcc; 189 } 190 191 192 /* double size of hash table 1 (addr->BBCC) */ 193 static void resize_bbcc_hash(void) 194 { 195 Int i, new_size, conflicts1 = 0, conflicts2 = 0; 196 BBCC** new_table; 197 UInt new_idx; 198 BBCC *curr_BBCC, *next_BBCC; 199 200 new_size = 2*current_bbccs.size+3; 201 new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1", 202 new_size * sizeof(BBCC*)); 203 204 if (!new_table) return; 205 206 for (i = 0; i < new_size; i++) 207 new_table[i] = NULL; 208 209 for (i = 0; i < current_bbccs.size; i++) { 210 if (current_bbccs.table[i] == NULL) continue; 211 212 curr_BBCC = current_bbccs.table[i]; 213 while (NULL != curr_BBCC) { 214 next_BBCC = curr_BBCC->next; 215 216 new_idx = bbcc_hash_idx(curr_BBCC->bb, 217 curr_BBCC->cxt, 218 new_size); 219 220 curr_BBCC->next = new_table[new_idx]; 221 new_table[new_idx] = curr_BBCC; 222 if (curr_BBCC->next) { 223 conflicts1++; 224 if (curr_BBCC->next->next) 225 conflicts2++; 226 } 227 228 curr_BBCC = next_BBCC; 229 } 230 } 231 232 VG_(free)(current_bbccs.table); 233 234 235 CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", 236 current_bbccs.size, new_size, 237 current_bbccs.entries, conflicts1, conflicts2); 238 239 current_bbccs.size = new_size; 240 current_bbccs.table = new_table; 241 CLG_(stat).bbcc_hash_resizes++; 242 } 243 244 245 static __inline 246 BBCC** new_recursion(int size) 247 { 248 BBCC** bbccs; 249 int i; 250 251 bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size); 252 for(i=0;i<size;i++) 253 bbccs[i] = 0; 254 255 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs); 256 257 return bbccs; 258 } 259 260 261 /* 262 * Allocate a new BBCC 263 * 264 * Uninitialized: 265 * cxt, rec_index, rec_array, next_bbcc, next1, next2 266 */ 267 static __inline__ 268 BBCC* new_bbcc(BB* bb) 269 { 270 BBCC* bbcc; 271 Int i; 272 273 /* We need cjmp_count+1 JmpData structs: 274 * the last is for the unconditional jump/call/ret at end of BB 275 */ 276 bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1", 277 sizeof(BBCC) + 278 (bb->cjmp_count+1) * sizeof(JmpData)); 279 bbcc->bb = bb; 280 bbcc->tid = CLG_(current_tid); 281 282 bbcc->ret_counter = 0; 283 bbcc->skipped = 0; 284 bbcc->cost = CLG_(get_costarray)(bb->cost_count); 285 for(i=0;i<bb->cost_count;i++) 286 bbcc->cost[i] = 0; 287 for(i=0; i<=bb->cjmp_count; i++) { 288 bbcc->jmp[i].ecounter = 0; 289 bbcc->jmp[i].jcc_list = 0; 290 } 291 bbcc->ecounter_sum = 0; 292 293 /* Init pointer caches (LRU) */ 294 bbcc->lru_next_bbcc = 0; 295 bbcc->lru_from_jcc = 0; 296 bbcc->lru_to_jcc = 0; 297 298 CLG_(stat).distinct_bbccs++; 299 300 CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n", 301 bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs); 302 303 return bbcc; 304 } 305 306 307 /** 308 * Inserts a new BBCC into hashes. 309 * BBCC specific items must be set as this is used for the hash 310 * keys: 311 * fn : current function 312 * tid : current thread ID 313 * from : position where current function is called from 314 * 315 * Recursion level doesn't need to be set as this is not included 316 * in the hash key: Only BBCCs with rec level 0 are in hashes. 317 */ 318 static 319 void insert_bbcc_into_hash(BBCC* bbcc) 320 { 321 UInt idx; 322 323 CLG_ASSERT(bbcc->cxt != 0); 324 325 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n", 326 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); 327 328 /* check fill degree of hash and resize if needed (>90%) */ 329 current_bbccs.entries++; 330 if (100 * current_bbccs.entries / current_bbccs.size > 90) 331 resize_bbcc_hash(); 332 333 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); 334 bbcc->next = current_bbccs.table[idx]; 335 current_bbccs.table[idx] = bbcc; 336 337 CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n", 338 current_bbccs.entries); 339 } 340 341 static const HChar* mangled_cxt(Context* cxt, int rec_index) 342 { 343 static HChar mangled[FN_NAME_LEN]; 344 int i, p; 345 346 if (!cxt) return "(no context)"; 347 348 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); 349 if (rec_index >0) 350 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); 351 for(i=1;i<cxt->size;i++) 352 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); 353 354 return mangled; 355 } 356 357 358 /* Create a new BBCC as a copy of an existing one, 359 * but with costs set to 0 and jcc chains empty. 360 * 361 * This is needed when a BB is executed in another context than 362 * the one at instrumentation time of the BB. 363 * 364 * Use cases: 365 * rec_index == 0: clone from a BBCC with differing tid/cxt 366 * and insert into hashes 367 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 368 * don't insert into hashes 369 */ 370 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) 371 { 372 BBCC* bbcc; 373 374 CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n", 375 bb_addr(orig->bb), rec_index, cxt->fn[0]->name); 376 377 bbcc = new_bbcc(orig->bb); 378 379 if (rec_index == 0) { 380 381 /* hash insertion is only allowed if tid or cxt is different */ 382 CLG_ASSERT((orig->tid != CLG_(current_tid)) || 383 (orig->cxt != cxt)); 384 385 bbcc->rec_index = 0; 386 bbcc->cxt = cxt; 387 bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); 388 bbcc->rec_array[0] = bbcc; 389 390 insert_bbcc_into_hash(bbcc); 391 } 392 else { 393 if (CLG_(clo).separate_threads) 394 CLG_ASSERT(orig->tid == CLG_(current_tid)); 395 396 CLG_ASSERT(orig->cxt == cxt); 397 CLG_ASSERT(orig->rec_array); 398 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); 399 CLG_ASSERT(orig->rec_array[rec_index] ==0); 400 401 /* new BBCC will only have differing recursion level */ 402 bbcc->rec_index = rec_index; 403 bbcc->cxt = cxt; 404 bbcc->rec_array = orig->rec_array; 405 bbcc->rec_array[rec_index] = bbcc; 406 } 407 408 /* update list of BBCCs for same BB */ 409 bbcc->next_bbcc = orig->bb->bbcc_list; 410 orig->bb->bbcc_list = bbcc; 411 412 413 CLG_DEBUGIF(3) 414 CLG_(print_bbcc)(-2, bbcc); 415 416 // FIXME: mangled_cxt returns a pointer to a static buffer that 417 // gets overwritten with each invocation. 418 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n" 419 " orig %s\n" 420 " new %s\n", 421 orig, rec_index, bb_addr(orig->bb), 422 mangled_cxt(orig->cxt, orig->rec_index), 423 mangled_cxt(bbcc->cxt, bbcc->rec_index)); 424 425 CLG_(stat).bbcc_clones++; 426 427 return bbcc; 428 }; 429 430 431 432 /* Get a pointer to the cost centre structure for given basic block 433 * address. If created, the BBCC is inserted into the BBCC hash. 434 * Also sets BB_seen_before by reference. 435 * 436 */ 437 BBCC* CLG_(get_bbcc)(BB* bb) 438 { 439 BBCC* bbcc; 440 441 CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb)); 442 443 bbcc = bb->bbcc_list; 444 445 if (!bbcc) { 446 bbcc = new_bbcc(bb); 447 448 /* initialize BBCC */ 449 bbcc->cxt = 0; 450 bbcc->rec_array = 0; 451 bbcc->rec_index = 0; 452 453 bbcc->next_bbcc = bb->bbcc_list; 454 bb->bbcc_list = bbcc; 455 bb->last_bbcc = bbcc; 456 457 CLG_DEBUGIF(3) 458 CLG_(print_bbcc)(-2, bbcc); 459 } 460 461 CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n", 462 bb_addr(bb), bbcc); 463 464 return bbcc; 465 } 466 467 468 /* Callgrind manages its own call stack for each thread. 469 * When leaving a function, a underflow can happen when 470 * Callgrind's tracing was switched on in the middle of 471 * a run, i.e. when Callgrind was not able to trace the 472 * call instruction. 473 * This function tries to reconstruct the original call. 474 * As we know the return address (the address following 475 * the CALL instruction), we can detect the function 476 * we return back to, but the original call site is unknown. 477 * We suppose a call site at return address - 1. 478 * (TODO: other heuristic: lookup info of instrumented BBs). 479 */ 480 static void handleUnderflow(BB* bb) 481 { 482 /* RET at top of call stack */ 483 BBCC* source_bbcc; 484 BB* source_bb; 485 Bool seen_before; 486 fn_node* caller; 487 int fn_number; 488 unsigned *pactive; 489 call_entry* call_entry_up; 490 491 CLG_DEBUG(1," Callstack underflow !\n"); 492 493 /* we emulate an old call from the function we return to 494 * by using (<return address> -1) */ 495 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); 496 source_bbcc = CLG_(get_bbcc)(source_bb); 497 498 /* seen_before can be true if RET from a signal handler */ 499 if (!seen_before) { 500 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; 501 } 502 else if (CLG_(current_state).collect) 503 source_bbcc->ecounter_sum++; 504 505 /* Force a new top context, will be set active by push_cxt() */ 506 CLG_(current_fn_stack).top--; 507 CLG_(current_state).cxt = 0; 508 caller = CLG_(get_fn_node)(bb); 509 CLG_(push_cxt)( caller ); 510 511 if (!seen_before) { 512 /* set rec array for source BBCC: this is at rec level 1 */ 513 source_bbcc->rec_array = new_recursion(caller->separate_recursions); 514 source_bbcc->rec_array[0] = source_bbcc; 515 516 CLG_ASSERT(source_bbcc->cxt == 0); 517 source_bbcc->cxt = CLG_(current_state).cxt; 518 insert_bbcc_into_hash(source_bbcc); 519 } 520 CLG_ASSERT(CLG_(current_state).bbcc); 521 522 /* correct active counts */ 523 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; 524 pactive = CLG_(get_fn_entry)(fn_number); 525 (*pactive)--; 526 527 /* This assertion is not correct for reentrant 528 * signal handlers */ 529 /* CLG_ASSERT(*pactive == 0); */ 530 531 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ 532 /* back to current context */ 533 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); 534 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, 535 (Addr)-1, False); 536 call_entry_up = 537 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); 538 /* assume this call is lasting since last dump or 539 * for a signal handler since it's call */ 540 if (CLG_(current_state).sig == 0) 541 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, 542 CLG_(get_current_thread)()->lastdump_cost ); 543 else 544 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); 545 } 546 547 548 /* 549 * Helper function called at start of each instrumented BB to setup 550 * pointer to costs for current thread/context/recursion level 551 */ 552 553 VG_REGPARM(1) 554 void CLG_(setup_bbcc)(BB* bb) 555 { 556 BBCC *bbcc, *last_bbcc; 557 Bool call_emulation = False, delayed_push = False, skip = False; 558 Addr sp; 559 BB* last_bb; 560 ThreadId tid; 561 ClgJumpKind jmpkind; 562 Bool isConditionalJump; 563 Int passed = 0, csp; 564 Bool ret_without_call = False; 565 Int popcount_on_return = 1; 566 567 CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb)); 568 569 /* This is needed because thread switches can not reliable be tracked 570 * with callback CLG_(run_thread) only: we have otherwise no way to get 571 * the thread ID after a signal handler returns. 572 * This could be removed again if that bug is fixed in Valgrind. 573 * This is in the hot path but hopefully not to costly. 574 */ 575 tid = VG_(get_running_tid)(); 576 #if 1 577 /* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid). 578 * As this is on the hot path, we only call CLG_(switch_thread)(tid) 579 * if tid differs from the CLG_(current_tid). 580 */ 581 if (UNLIKELY(tid != CLG_(current_tid))) 582 CLG_(switch_thread)(tid); 583 #else 584 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); 585 #endif 586 587 sp = VG_(get_SP)(tid); 588 last_bbcc = CLG_(current_state).bbcc; 589 last_bb = last_bbcc ? last_bbcc->bb : 0; 590 591 if (last_bb) { 592 passed = CLG_(current_state).jmps_passed; 593 CLG_ASSERT(passed <= last_bb->cjmp_count); 594 jmpkind = last_bb->jmp[passed].jmpkind; 595 isConditionalJump = (passed < last_bb->cjmp_count); 596 597 if (CLG_(current_state).collect) { 598 if (!CLG_(current_state).nonskipped) { 599 last_bbcc->ecounter_sum++; 600 last_bbcc->jmp[passed].ecounter++; 601 if (!CLG_(clo).simulate_cache) { 602 /* update Ir cost */ 603 UInt instr_count = last_bb->jmp[passed].instr+1; 604 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 605 } 606 } 607 else { 608 /* do not increment exe counter of BBs in skipped functions, as it 609 * would fool dumping code */ 610 if (!CLG_(clo).simulate_cache) { 611 /* update Ir cost */ 612 UInt instr_count = last_bb->jmp[passed].instr+1; 613 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 614 CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ] 615 += instr_count; 616 } 617 } 618 } 619 620 CLG_DEBUGIF(4) { 621 CLG_(print_execstate)(-2, &CLG_(current_state) ); 622 CLG_(print_bbcc_cost)(-2, last_bbcc); 623 } 624 } 625 else { 626 jmpkind = jk_None; 627 isConditionalJump = False; 628 } 629 630 /* Manipulate JmpKind if needed, only using BB specific info */ 631 632 csp = CLG_(current_call_stack).sp; 633 634 /* A return not matching the top call in our callstack is a jump */ 635 if ( (jmpkind == jk_Return) && (csp >0)) { 636 Int csp_up = csp-1; 637 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); 638 639 /* We have a real return if 640 * - the stack pointer (SP) left the current stack frame, or 641 * - SP has the same value as when reaching the current function 642 * and the address of this BB is the return address of last call 643 * (we even allow to leave multiple frames if the SP stays the 644 * same and we find a matching return address) 645 * The latter condition is needed because on PPC, SP can stay 646 * the same over CALL=b(c)l / RET=b(c)lr boundaries 647 */ 648 if (sp < top_ce->sp) popcount_on_return = 0; 649 else if (top_ce->sp == sp) { 650 while(1) { 651 if (top_ce->ret_addr == bb_addr(bb)) break; 652 if (csp_up>0) { 653 csp_up--; 654 top_ce = &(CLG_(current_call_stack).entry[csp_up]); 655 if (top_ce->sp == sp) { 656 popcount_on_return++; 657 continue; 658 } 659 } 660 popcount_on_return = 0; 661 break; 662 } 663 } 664 if (popcount_on_return == 0) { 665 jmpkind = jk_Jump; 666 ret_without_call = True; 667 } 668 } 669 670 /* Should this jump be converted to call or pop/call ? */ 671 if (( jmpkind != jk_Return) && 672 ( jmpkind != jk_Call) && last_bb) { 673 674 /* We simulate a JMP/Cont to be a CALL if 675 * - jump is in another ELF object or section kind 676 * - jump is to first instruction of a function (tail recursion) 677 */ 678 if (ret_without_call || 679 /* This is for detection of optimized tail recursion. 680 * On PPC, this is only detected as call when going to another 681 * function. The problem is that on PPC it can go wrong 682 * more easily (no stack frame setup needed) 683 */ 684 #if defined(VGA_ppc32) 685 (bb->is_entry && (last_bb->fn != bb->fn)) || 686 #else 687 bb->is_entry || 688 #endif 689 (last_bb->sect_kind != bb->sect_kind) || 690 (last_bb->obj->number != bb->obj->number)) { 691 692 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", 693 last_bb->fn->name, last_bb->obj->name, 694 bb->fn->name, bb->obj->name, 695 ret_without_call?" (RET w/o CALL)":""); 696 697 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { 698 699 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); 700 701 if (top_ce->jcc) { 702 703 CLG_DEBUG(1," Pop on Jump!\n"); 704 705 /* change source for delayed push */ 706 CLG_(current_state).bbcc = top_ce->jcc->from; 707 sp = top_ce->sp; 708 passed = top_ce->jcc->jmp; 709 CLG_(pop_call_stack)(); 710 } 711 else { 712 CLG_ASSERT(CLG_(current_state).nonskipped != 0); 713 } 714 } 715 716 jmpkind = jk_Call; 717 call_emulation = True; 718 } 719 } 720 721 if (jmpkind == jk_Call) 722 skip = CLG_(get_fn_node)(bb)->skip; 723 724 CLG_DEBUGIF(1) { 725 if (isConditionalJump) 726 VG_(printf)("Cond-"); 727 switch(jmpkind) { 728 case jk_None: VG_(printf)("Fall-through"); break; 729 case jk_Jump: VG_(printf)("Jump"); break; 730 case jk_Call: VG_(printf)("Call"); break; 731 case jk_Return: VG_(printf)("Return"); break; 732 default: tl_assert(0); 733 } 734 VG_(printf)(" %08lx -> %08lx, SP %08lx\n", 735 last_bb ? bb_jmpaddr(last_bb) : 0, 736 bb_addr(bb), sp); 737 } 738 739 /* Handle CALL/RET and update context to get correct BBCC */ 740 741 if (jmpkind == jk_Return) { 742 743 if ((csp == 0) || 744 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && 745 ( *(CLG_(current_fn_stack).top-1)==0)) ) { 746 747 /* On an empty call stack or at a signal separation marker, 748 * a RETURN generates an call stack underflow. 749 */ 750 handleUnderflow(bb); 751 CLG_(pop_call_stack)(); 752 } 753 else { 754 CLG_ASSERT(popcount_on_return >0); 755 CLG_(unwind_call_stack)(sp, popcount_on_return); 756 } 757 } 758 else { 759 Int unwind_count = CLG_(unwind_call_stack)(sp, 0); 760 if (unwind_count > 0) { 761 /* if unwinding was done, this actually is a return */ 762 jmpkind = jk_Return; 763 } 764 765 if (jmpkind == jk_Call) { 766 delayed_push = True; 767 768 csp = CLG_(current_call_stack).sp; 769 if (call_emulation && csp>0) 770 sp = CLG_(current_call_stack).entry[csp-1].sp; 771 772 } 773 } 774 775 /* Change new context if needed, taking delayed_push into account */ 776 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { 777 CLG_(push_cxt)(CLG_(get_fn_node)(bb)); 778 } 779 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); 780 781 /* If there is a fresh instrumented BBCC, assign current context */ 782 bbcc = CLG_(get_bbcc)(bb); 783 if (bbcc->cxt == 0) { 784 CLG_ASSERT(bbcc->rec_array == 0); 785 786 bbcc->cxt = CLG_(current_state).cxt; 787 bbcc->rec_array = 788 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); 789 bbcc->rec_array[0] = bbcc; 790 791 insert_bbcc_into_hash(bbcc); 792 } 793 else { 794 /* get BBCC with current context */ 795 796 /* first check LRU of last bbcc executed */ 797 798 if (last_bbcc) { 799 bbcc = last_bbcc->lru_next_bbcc; 800 if (bbcc && 801 ((bbcc->bb != bb) || 802 (bbcc->cxt != CLG_(current_state).cxt))) 803 bbcc = 0; 804 } 805 else 806 bbcc = 0; 807 808 if (!bbcc) 809 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); 810 if (!bbcc) 811 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); 812 813 bb->last_bbcc = bbcc; 814 } 815 816 /* save for fast lookup */ 817 if (last_bbcc) 818 last_bbcc->lru_next_bbcc = bbcc; 819 820 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { 821 UInt level, idx; 822 fn_node* top = *(CLG_(current_fn_stack).top); 823 824 level = *CLG_(get_fn_entry)(top->number); 825 826 if (delayed_push && !skip) { 827 if (CLG_(clo).skip_direct_recursion) { 828 /* a call was detected, which means that the source BB != 0 */ 829 CLG_ASSERT(CLG_(current_state).bbcc != 0); 830 /* only increment rec. level if called from different function */ 831 if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0]) 832 level++; 833 } 834 else level++; 835 } 836 if (level> top->separate_recursions) 837 level = top->separate_recursions; 838 839 if (level == 0) { 840 /* can only happen if instrumentation just was switched on */ 841 level = 1; 842 *CLG_(get_fn_entry)(top->number) = 1; 843 } 844 845 idx = level -1; 846 if (bbcc->rec_array[idx]) 847 bbcc = bbcc->rec_array[idx]; 848 else 849 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); 850 851 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); 852 } 853 854 if (delayed_push) { 855 if (!skip && CLG_(current_state).nonskipped) { 856 /* a call from skipped to nonskipped */ 857 CLG_(current_state).bbcc = CLG_(current_state).nonskipped; 858 /* FIXME: take the real passed count from shadow stack */ 859 passed = CLG_(current_state).bbcc->bb->cjmp_count; 860 } 861 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, 862 bbcc, sp, skip); 863 } 864 865 if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) { 866 867 /* Handle conditional jumps followed, i.e. trace arcs 868 * This uses JCC structures, too */ 869 870 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); 871 CLG_ASSERT(jcc != 0); 872 // Change from default, and check if already changed 873 if (jcc->jmpkind == jk_Call) 874 jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump; 875 else { 876 // FIXME: Why can this fail? 877 // CLG_ASSERT(jcc->jmpkind == jmpkind); 878 } 879 880 jcc->call_counter++; 881 if (isConditionalJump) 882 CLG_(stat).jcnd_counter++; 883 else 884 CLG_(stat).jump_counter++; 885 } 886 887 CLG_(current_state).bbcc = bbcc; 888 /* Even though this will be set in instrumented code directly before 889 * side exits, it needs to be set to 0 here in case an exception 890 * happens in first instructions of the BB */ 891 CLG_(current_state).jmps_passed = 0; 892 // needed for log_* handlers called in this BB 893 CLG_(bb_base) = bb->obj->offset + bb->offset; 894 CLG_(cost_base) = bbcc->cost; 895 896 CLG_DEBUGIF(1) { 897 VG_(printf)(" "); 898 CLG_(print_bbcc_fn)(bbcc); 899 VG_(printf)("\n"); 900 } 901 902 CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %d), Instrs %d (Len %d)\n", 903 bb_addr(bb), bbcc->cost, bb->cost_count, 904 bb->instr_count, bb->instr_len); 905 CLG_DEBUGIF(3) 906 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); 907 CLG_DEBUG(3,"\n"); 908 909 CLG_(stat).bb_executions++; 910 } 911