1 /*--------------------------------------------------------------------*/ 2 /*--- Callgrind ---*/ 3 /*--- bbcc.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Callgrind, a Valgrind tool for call tracing. 8 9 Copyright (C) 2002-2012, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 10 11 This program is free software; you can redistribute it and/or 12 modify it under the terms of the GNU General Public License as 13 published by the Free Software Foundation; either version 2 of the 14 License, or (at your option) any later version. 15 16 This program is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 General Public License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with this program; if not, write to the Free Software 23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 24 02111-1307, USA. 25 26 The GNU General Public License is contained in the file COPYING. 27 */ 28 29 #include "global.h" 30 #include "costs.h" 31 32 #include "pub_tool_threadstate.h" 33 34 /*------------------------------------------------------------*/ 35 /*--- BBCC operations ---*/ 36 /*------------------------------------------------------------*/ 37 38 #define N_BBCC_INITIAL_ENTRIES 10437 39 40 /* BBCC table (key is BB/Context), per thread, resizable */ 41 bbcc_hash current_bbccs; 42 43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) 44 { 45 Int i; 46 47 CLG_ASSERT(bbccs != 0); 48 49 bbccs->size = N_BBCC_INITIAL_ENTRIES; 50 bbccs->entries = 0; 51 bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1", 52 bbccs->size * sizeof(BBCC*)); 53 54 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; 55 } 56 57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) 58 { 59 CLG_ASSERT(dst != 0); 60 61 dst->size = current_bbccs.size; 62 dst->entries = current_bbccs.entries; 63 dst->table = current_bbccs.table; 64 } 65 66 bbcc_hash* CLG_(get_current_bbcc_hash)() 67 { 68 return ¤t_bbccs; 69 } 70 71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h) 72 { 73 CLG_ASSERT(h != 0); 74 75 current_bbccs.size = h->size; 76 current_bbccs.entries = h->entries; 77 current_bbccs.table = h->table; 78 } 79 80 /* 81 * Zero all costs of a BBCC 82 */ 83 void CLG_(zero_bbcc)(BBCC* bbcc) 84 { 85 Int i; 86 jCC* jcc; 87 88 CLG_ASSERT(bbcc->cxt != 0); 89 CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %d " 90 "(fn '%s', rec %d)\n", 91 bb_addr(bbcc->bb), 92 bbcc->cxt->base_number + bbcc->rec_index, 93 bbcc->cxt->fn[0]->name, 94 bbcc->rec_index); 95 96 if ((bbcc->ecounter_sum ==0) && 97 (bbcc->ret_counter ==0)) return; 98 99 for(i=0;i<bbcc->bb->cost_count;i++) 100 bbcc->cost[i] = 0; 101 for(i=0;i <= bbcc->bb->cjmp_count;i++) { 102 bbcc->jmp[i].ecounter = 0; 103 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) 104 CLG_(init_cost)( CLG_(sets).full, jcc->cost ); 105 } 106 bbcc->ecounter_sum = 0; 107 bbcc->ret_counter = 0; 108 } 109 110 111 112 void CLG_(forall_bbccs)(void (*func)(BBCC*)) 113 { 114 BBCC *bbcc, *bbcc2; 115 int i, j; 116 117 for (i = 0; i < current_bbccs.size; i++) { 118 if ((bbcc=current_bbccs.table[i]) == NULL) continue; 119 while (bbcc) { 120 /* every bbcc should have a rec_array */ 121 CLG_ASSERT(bbcc->rec_array != 0); 122 123 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) { 124 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; 125 126 (*func)(bbcc2); 127 } 128 bbcc = bbcc->next; 129 } 130 } 131 } 132 133 134 /* All BBCCs for recursion level 0 are inserted into a 135 * thread specific hash table with key 136 * - address of BB structure (unique, as never freed) 137 * - current context (includes caller chain) 138 * BBCCs for other recursion levels are in bbcc->rec_array. 139 * 140 * The hash is used in setup_bb(), i.e. to find the cost 141 * counters to be changed in the execution of a BB. 142 */ 143 144 static __inline__ 145 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) 146 { 147 CLG_ASSERT(bb != 0); 148 CLG_ASSERT(cxt != 0); 149 150 return ((Addr)bb + (Addr)cxt) % size; 151 } 152 153 154 /* Lookup for a BBCC in hash. 155 */ 156 static 157 BBCC* lookup_bbcc(BB* bb, Context* cxt) 158 { 159 BBCC* bbcc = bb->last_bbcc; 160 UInt idx; 161 162 /* check LRU */ 163 if (bbcc->cxt == cxt) { 164 if (!CLG_(clo).separate_threads) { 165 /* if we don't dump threads separate, tid doesn't have to match */ 166 return bbcc; 167 } 168 if (bbcc->tid == CLG_(current_tid)) return bbcc; 169 } 170 171 CLG_(stat).bbcc_lru_misses++; 172 173 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); 174 bbcc = current_bbccs.table[idx]; 175 while (bbcc && 176 (bb != bbcc->bb || 177 cxt != bbcc->cxt)) { 178 bbcc = bbcc->next; 179 } 180 181 CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %d, fn '%s'): %p (tid %d)\n", 182 bb_addr(bb), cxt->base_number, cxt->fn[0]->name, 183 bbcc, bbcc ? bbcc->tid : 0); 184 185 CLG_DEBUGIF(2) 186 if (bbcc) CLG_(print_bbcc)(-2,bbcc); 187 188 return bbcc; 189 } 190 191 192 /* double size of hash table 1 (addr->BBCC) */ 193 static void resize_bbcc_hash(void) 194 { 195 Int i, new_size, conflicts1 = 0, conflicts2 = 0; 196 BBCC** new_table; 197 UInt new_idx; 198 BBCC *curr_BBCC, *next_BBCC; 199 200 new_size = 2*current_bbccs.size+3; 201 new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1", 202 new_size * sizeof(BBCC*)); 203 204 if (!new_table) return; 205 206 for (i = 0; i < new_size; i++) 207 new_table[i] = NULL; 208 209 for (i = 0; i < current_bbccs.size; i++) { 210 if (current_bbccs.table[i] == NULL) continue; 211 212 curr_BBCC = current_bbccs.table[i]; 213 while (NULL != curr_BBCC) { 214 next_BBCC = curr_BBCC->next; 215 216 new_idx = bbcc_hash_idx(curr_BBCC->bb, 217 curr_BBCC->cxt, 218 new_size); 219 220 curr_BBCC->next = new_table[new_idx]; 221 new_table[new_idx] = curr_BBCC; 222 if (curr_BBCC->next) { 223 conflicts1++; 224 if (curr_BBCC->next->next) 225 conflicts2++; 226 } 227 228 curr_BBCC = next_BBCC; 229 } 230 } 231 232 VG_(free)(current_bbccs.table); 233 234 235 CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", 236 current_bbccs.size, new_size, 237 current_bbccs.entries, conflicts1, conflicts2); 238 239 current_bbccs.size = new_size; 240 current_bbccs.table = new_table; 241 CLG_(stat).bbcc_hash_resizes++; 242 } 243 244 245 static __inline 246 BBCC** new_recursion(int size) 247 { 248 BBCC** bbccs; 249 int i; 250 251 bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size); 252 for(i=0;i<size;i++) 253 bbccs[i] = 0; 254 255 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs); 256 257 return bbccs; 258 } 259 260 261 /* 262 * Allocate a new BBCC 263 * 264 * Uninitialized: 265 * cxt, rec_index, rec_array, next_bbcc, next1, next2 266 */ 267 static __inline__ 268 BBCC* new_bbcc(BB* bb) 269 { 270 BBCC* bbcc; 271 Int i; 272 273 /* We need cjmp_count+1 JmpData structs: 274 * the last is for the unconditional jump/call/ret at end of BB 275 */ 276 bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1", 277 sizeof(BBCC) + 278 (bb->cjmp_count+1) * sizeof(JmpData)); 279 bbcc->bb = bb; 280 bbcc->tid = CLG_(current_tid); 281 282 bbcc->ret_counter = 0; 283 bbcc->skipped = 0; 284 bbcc->cost = CLG_(get_costarray)(bb->cost_count); 285 for(i=0;i<bb->cost_count;i++) 286 bbcc->cost[i] = 0; 287 for(i=0; i<=bb->cjmp_count; i++) { 288 bbcc->jmp[i].ecounter = 0; 289 bbcc->jmp[i].jcc_list = 0; 290 } 291 bbcc->ecounter_sum = 0; 292 293 /* Init pointer caches (LRU) */ 294 bbcc->lru_next_bbcc = 0; 295 bbcc->lru_from_jcc = 0; 296 bbcc->lru_to_jcc = 0; 297 298 CLG_(stat).distinct_bbccs++; 299 300 CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n", 301 bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs); 302 303 return bbcc; 304 } 305 306 307 /** 308 * Inserts a new BBCC into hashes. 309 * BBCC specific items must be set as this is used for the hash 310 * keys: 311 * fn : current function 312 * tid : current thread ID 313 * from : position where current function is called from 314 * 315 * Recursion level doesn't need to be set as this is not included 316 * in the hash key: Only BBCCs with rec level 0 are in hashes. 317 */ 318 static 319 void insert_bbcc_into_hash(BBCC* bbcc) 320 { 321 UInt idx; 322 323 CLG_ASSERT(bbcc->cxt != 0); 324 325 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n", 326 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); 327 328 /* check fill degree of hash and resize if needed (>90%) */ 329 current_bbccs.entries++; 330 if (100 * current_bbccs.entries / current_bbccs.size > 90) 331 resize_bbcc_hash(); 332 333 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); 334 bbcc->next = current_bbccs.table[idx]; 335 current_bbccs.table[idx] = bbcc; 336 337 CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n", 338 current_bbccs.entries); 339 } 340 341 static Char* mangled_cxt(Context* cxt, int rec_index) 342 { 343 static Char mangled[FN_NAME_LEN]; 344 int i, p; 345 346 if (!cxt) return "(no context)"; 347 348 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); 349 if (rec_index >0) 350 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); 351 for(i=1;i<cxt->size;i++) 352 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); 353 354 return mangled; 355 } 356 357 358 /* Create a new BBCC as a copy of an existing one, 359 * but with costs set to 0 and jcc chains empty. 360 * 361 * This is needed when a BB is executed in another context than 362 * the one at instrumentation time of the BB. 363 * 364 * Use cases: 365 * rec_index == 0: clone from a BBCC with differing tid/cxt 366 * and insert into hashes 367 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 368 * don't insert into hashes 369 */ 370 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) 371 { 372 BBCC* bbcc; 373 374 CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n", 375 bb_addr(orig->bb), rec_index, cxt->fn[0]->name); 376 377 bbcc = new_bbcc(orig->bb); 378 379 if (rec_index == 0) { 380 381 /* hash insertion is only allowed if tid or cxt is different */ 382 CLG_ASSERT((orig->tid != CLG_(current_tid)) || 383 (orig->cxt != cxt)); 384 385 bbcc->rec_index = 0; 386 bbcc->cxt = cxt; 387 bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); 388 bbcc->rec_array[0] = bbcc; 389 390 insert_bbcc_into_hash(bbcc); 391 } 392 else { 393 if (CLG_(clo).separate_threads) 394 CLG_ASSERT(orig->tid == CLG_(current_tid)); 395 396 CLG_ASSERT(orig->cxt == cxt); 397 CLG_ASSERT(orig->rec_array); 398 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); 399 CLG_ASSERT(orig->rec_array[rec_index] ==0); 400 401 /* new BBCC will only have differing recursion level */ 402 bbcc->rec_index = rec_index; 403 bbcc->cxt = cxt; 404 bbcc->rec_array = orig->rec_array; 405 bbcc->rec_array[rec_index] = bbcc; 406 } 407 408 /* update list of BBCCs for same BB */ 409 bbcc->next_bbcc = orig->bb->bbcc_list; 410 orig->bb->bbcc_list = bbcc; 411 412 413 CLG_DEBUGIF(3) 414 CLG_(print_bbcc)(-2, bbcc); 415 416 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n" 417 " orig %s\n" 418 " new %s\n", 419 orig, rec_index, bb_addr(orig->bb), 420 mangled_cxt(orig->cxt, orig->rec_index), 421 mangled_cxt(bbcc->cxt, bbcc->rec_index)); 422 423 CLG_(stat).bbcc_clones++; 424 425 return bbcc; 426 }; 427 428 429 430 /* Get a pointer to the cost centre structure for given basic block 431 * address. If created, the BBCC is inserted into the BBCC hash. 432 * Also sets BB_seen_before by reference. 433 * 434 */ 435 BBCC* CLG_(get_bbcc)(BB* bb) 436 { 437 BBCC* bbcc; 438 439 CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb)); 440 441 bbcc = bb->bbcc_list; 442 443 if (!bbcc) { 444 bbcc = new_bbcc(bb); 445 446 /* initialize BBCC */ 447 bbcc->cxt = 0; 448 bbcc->rec_array = 0; 449 bbcc->rec_index = 0; 450 451 bbcc->next_bbcc = bb->bbcc_list; 452 bb->bbcc_list = bbcc; 453 bb->last_bbcc = bbcc; 454 455 CLG_DEBUGIF(3) 456 CLG_(print_bbcc)(-2, bbcc); 457 } 458 459 CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n", 460 bb_addr(bb), bbcc); 461 462 return bbcc; 463 } 464 465 466 /* Callgrind manages its own call stack for each thread. 467 * When leaving a function, a underflow can happen when 468 * Callgrind's tracing was switched on in the middle of 469 * a run, i.e. when Callgrind was not able to trace the 470 * call instruction. 471 * This function tries to reconstruct the original call. 472 * As we know the return address (the address following 473 * the CALL instruction), we can detect the function 474 * we return back to, but the original call site is unknown. 475 * We suppose a call site at return address - 1. 476 * (TODO: other heuristic: lookup info of instrumented BBs). 477 */ 478 static void handleUnderflow(BB* bb) 479 { 480 /* RET at top of call stack */ 481 BBCC* source_bbcc; 482 BB* source_bb; 483 Bool seen_before; 484 fn_node* caller; 485 int fn_number, *pactive; 486 call_entry* call_entry_up; 487 488 CLG_DEBUG(1," Callstack underflow !\n"); 489 490 /* we emulate an old call from the function we return to 491 * by using (<return address> -1) */ 492 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); 493 source_bbcc = CLG_(get_bbcc)(source_bb); 494 495 /* seen_before can be true if RET from a signal handler */ 496 if (!seen_before) { 497 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; 498 } 499 else if (CLG_(current_state).collect) 500 source_bbcc->ecounter_sum++; 501 502 /* Force a new top context, will be set active by push_cxt() */ 503 CLG_(current_fn_stack).top--; 504 CLG_(current_state).cxt = 0; 505 caller = CLG_(get_fn_node)(bb); 506 CLG_(push_cxt)( caller ); 507 508 if (!seen_before) { 509 /* set rec array for source BBCC: this is at rec level 1 */ 510 source_bbcc->rec_array = new_recursion(caller->separate_recursions); 511 source_bbcc->rec_array[0] = source_bbcc; 512 513 CLG_ASSERT(source_bbcc->cxt == 0); 514 source_bbcc->cxt = CLG_(current_state).cxt; 515 insert_bbcc_into_hash(source_bbcc); 516 } 517 CLG_ASSERT(CLG_(current_state).bbcc); 518 519 /* correct active counts */ 520 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; 521 pactive = CLG_(get_fn_entry)(fn_number); 522 (*pactive)--; 523 524 /* This assertion is not correct for reentrant 525 * signal handlers */ 526 /* CLG_ASSERT(*pactive == 0); */ 527 528 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ 529 /* back to current context */ 530 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); 531 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, 532 (Addr)-1, False); 533 call_entry_up = 534 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); 535 /* assume this call is lasting since last dump or 536 * for a signal handler since it's call */ 537 if (CLG_(current_state).sig == 0) 538 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, 539 CLG_(get_current_thread)()->lastdump_cost ); 540 else 541 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); 542 } 543 544 545 /* 546 * Helper function called at start of each instrumented BB to setup 547 * pointer to costs for current thread/context/recursion level 548 */ 549 550 VG_REGPARM(1) 551 void CLG_(setup_bbcc)(BB* bb) 552 { 553 BBCC *bbcc, *last_bbcc; 554 Bool call_emulation = False, delayed_push = False, skip = False; 555 Addr sp; 556 BB* last_bb; 557 ThreadId tid; 558 ClgJumpKind jmpkind; 559 Bool isConditionalJump; 560 Int passed = 0, csp; 561 Bool ret_without_call = False; 562 Int popcount_on_return = 1; 563 564 CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb)); 565 566 /* This is needed because thread switches can not reliable be tracked 567 * with callback CLG_(run_thread) only: we have otherwise no way to get 568 * the thread ID after a signal handler returns. 569 * This could be removed again if that bug is fixed in Valgrind. 570 * This is in the hot path but hopefully not to costly. 571 */ 572 tid = VG_(get_running_tid)(); 573 #if 1 574 CLG_(switch_thread)(tid); 575 #else 576 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); 577 #endif 578 579 sp = VG_(get_SP)(tid); 580 last_bbcc = CLG_(current_state).bbcc; 581 last_bb = last_bbcc ? last_bbcc->bb : 0; 582 583 if (last_bb) { 584 passed = CLG_(current_state).jmps_passed; 585 CLG_ASSERT(passed <= last_bb->cjmp_count); 586 jmpkind = last_bb->jmp[passed].jmpkind; 587 isConditionalJump = (passed < last_bb->cjmp_count); 588 589 /* if we are in a function which is skipped in the call graph, we 590 * do not increment the exe counter to produce cost (if simulation off), 591 * which would lead to dumping this BB to be skipped 592 */ 593 if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) { 594 last_bbcc->ecounter_sum++; 595 last_bbcc->jmp[passed].ecounter++; 596 if (!CLG_(clo).simulate_cache) { 597 /* update Ir cost */ 598 UInt instr_count = last_bb->jmp[passed].instr+1; 599 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 600 } 601 } 602 603 CLG_DEBUGIF(4) { 604 CLG_(print_execstate)(-2, &CLG_(current_state) ); 605 CLG_(print_bbcc_cost)(-2, last_bbcc); 606 } 607 } 608 else { 609 jmpkind = jk_None; 610 isConditionalJump = False; 611 } 612 613 /* Manipulate JmpKind if needed, only using BB specific info */ 614 615 csp = CLG_(current_call_stack).sp; 616 617 /* A return not matching the top call in our callstack is a jump */ 618 if ( (jmpkind == jk_Return) && (csp >0)) { 619 Int csp_up = csp-1; 620 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); 621 622 /* We have a real return if 623 * - the stack pointer (SP) left the current stack frame, or 624 * - SP has the same value as when reaching the current function 625 * and the address of this BB is the return address of last call 626 * (we even allow to leave multiple frames if the SP stays the 627 * same and we find a matching return address) 628 * The latter condition is needed because on PPC, SP can stay 629 * the same over CALL=b(c)l / RET=b(c)lr boundaries 630 */ 631 if (sp < top_ce->sp) popcount_on_return = 0; 632 else if (top_ce->sp == sp) { 633 while(1) { 634 if (top_ce->ret_addr == bb_addr(bb)) break; 635 if (csp_up>0) { 636 csp_up--; 637 top_ce = &(CLG_(current_call_stack).entry[csp_up]); 638 if (top_ce->sp == sp) { 639 popcount_on_return++; 640 continue; 641 } 642 } 643 popcount_on_return = 0; 644 break; 645 } 646 } 647 if (popcount_on_return == 0) { 648 jmpkind = jk_Jump; 649 ret_without_call = True; 650 } 651 } 652 653 /* Should this jump be converted to call or pop/call ? */ 654 if (( jmpkind != jk_Return) && 655 ( jmpkind != jk_Call) && last_bb) { 656 657 /* We simulate a JMP/Cont to be a CALL if 658 * - jump is in another ELF object or section kind 659 * - jump is to first instruction of a function (tail recursion) 660 */ 661 if (ret_without_call || 662 /* This is for detection of optimized tail recursion. 663 * On PPC, this is only detected as call when going to another 664 * function. The problem is that on PPC it can go wrong 665 * more easily (no stack frame setup needed) 666 */ 667 #if defined(VGA_ppc32) 668 (bb->is_entry && (last_bb->fn != bb->fn)) || 669 #else 670 bb->is_entry || 671 #endif 672 (last_bb->sect_kind != bb->sect_kind) || 673 (last_bb->obj->number != bb->obj->number)) { 674 675 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", 676 last_bb->fn->name, last_bb->obj->name, 677 bb->fn->name, bb->obj->name, 678 ret_without_call?" (RET w/o CALL)":""); 679 680 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { 681 682 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); 683 684 if (top_ce->jcc) { 685 686 CLG_DEBUG(1," Pop on Jump!\n"); 687 688 /* change source for delayed push */ 689 CLG_(current_state).bbcc = top_ce->jcc->from; 690 sp = top_ce->sp; 691 passed = top_ce->jcc->jmp; 692 CLG_(pop_call_stack)(); 693 } 694 else { 695 CLG_ASSERT(CLG_(current_state).nonskipped != 0); 696 } 697 } 698 699 jmpkind = jk_Call; 700 call_emulation = True; 701 } 702 } 703 704 if (jmpkind == jk_Call) 705 skip = CLG_(get_fn_node)(bb)->skip; 706 707 CLG_DEBUGIF(1) { 708 if (isConditionalJump) 709 VG_(printf)("Cond-"); 710 switch(jmpkind) { 711 case jk_None: VG_(printf)("Fall-through"); break; 712 case jk_Jump: VG_(printf)("Jump"); break; 713 case jk_Call: VG_(printf)("Call"); break; 714 case jk_Return: VG_(printf)("Return"); break; 715 default: tl_assert(0); 716 } 717 VG_(printf)(" %08lx -> %08lx, SP %08lx\n", 718 last_bb ? bb_jmpaddr(last_bb) : 0, 719 bb_addr(bb), sp); 720 } 721 722 /* Handle CALL/RET and update context to get correct BBCC */ 723 724 if (jmpkind == jk_Return) { 725 726 if ((csp == 0) || 727 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && 728 ( *(CLG_(current_fn_stack).top-1)==0)) ) { 729 730 /* On an empty call stack or at a signal separation marker, 731 * a RETURN generates an call stack underflow. 732 */ 733 handleUnderflow(bb); 734 CLG_(pop_call_stack)(); 735 } 736 else { 737 CLG_ASSERT(popcount_on_return >0); 738 CLG_(unwind_call_stack)(sp, popcount_on_return); 739 } 740 } 741 else { 742 Int unwind_count = CLG_(unwind_call_stack)(sp, 0); 743 if (unwind_count > 0) { 744 /* if unwinding was done, this actually is a return */ 745 jmpkind = jk_Return; 746 } 747 748 if (jmpkind == jk_Call) { 749 delayed_push = True; 750 751 csp = CLG_(current_call_stack).sp; 752 if (call_emulation && csp>0) 753 sp = CLG_(current_call_stack).entry[csp-1].sp; 754 755 } 756 } 757 758 /* Change new context if needed, taking delayed_push into account */ 759 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { 760 CLG_(push_cxt)(CLG_(get_fn_node)(bb)); 761 } 762 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); 763 764 /* If there is a fresh instrumented BBCC, assign current context */ 765 bbcc = CLG_(get_bbcc)(bb); 766 if (bbcc->cxt == 0) { 767 CLG_ASSERT(bbcc->rec_array == 0); 768 769 bbcc->cxt = CLG_(current_state).cxt; 770 bbcc->rec_array = 771 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); 772 bbcc->rec_array[0] = bbcc; 773 774 insert_bbcc_into_hash(bbcc); 775 } 776 else { 777 /* get BBCC with current context */ 778 779 /* first check LRU of last bbcc executed */ 780 781 if (last_bbcc) { 782 bbcc = last_bbcc->lru_next_bbcc; 783 if (bbcc && 784 ((bbcc->bb != bb) || 785 (bbcc->cxt != CLG_(current_state).cxt))) 786 bbcc = 0; 787 } 788 else 789 bbcc = 0; 790 791 if (!bbcc) 792 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); 793 if (!bbcc) 794 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); 795 796 bb->last_bbcc = bbcc; 797 } 798 799 /* save for fast lookup */ 800 if (last_bbcc) 801 last_bbcc->lru_next_bbcc = bbcc; 802 803 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { 804 UInt level, idx; 805 fn_node* top = *(CLG_(current_fn_stack).top); 806 807 level = *CLG_(get_fn_entry)(top->number); 808 809 if (delayed_push && !skip) { 810 if (CLG_(clo).skip_direct_recursion) { 811 /* a call was detected, which means that the source BB != 0 */ 812 CLG_ASSERT(CLG_(current_state).bbcc != 0); 813 /* only increment rec. level if called from different function */ 814 if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0]) 815 level++; 816 } 817 else level++; 818 } 819 if (level> top->separate_recursions) 820 level = top->separate_recursions; 821 822 if (level == 0) { 823 /* can only happen if instrumentation just was switched on */ 824 level = 1; 825 *CLG_(get_fn_entry)(top->number) = 1; 826 } 827 828 idx = level -1; 829 if (bbcc->rec_array[idx]) 830 bbcc = bbcc->rec_array[idx]; 831 else 832 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); 833 834 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); 835 } 836 837 if (delayed_push) { 838 if (!skip && CLG_(current_state).nonskipped) { 839 /* a call from skipped to nonskipped */ 840 CLG_(current_state).bbcc = CLG_(current_state).nonskipped; 841 /* FIXME: take the real passed count from shadow stack */ 842 passed = CLG_(current_state).bbcc->bb->cjmp_count; 843 } 844 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, 845 bbcc, sp, skip); 846 } 847 848 if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) { 849 850 /* Handle conditional jumps followed, i.e. trace arcs 851 * This uses JCC structures, too */ 852 853 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); 854 CLG_ASSERT(jcc != 0); 855 // Change from default, and check if already changed 856 if (jcc->jmpkind == jk_Call) 857 jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump; 858 else { 859 // FIXME: Why can this fail? 860 // CLG_ASSERT(jcc->jmpkind == jmpkind); 861 } 862 863 jcc->call_counter++; 864 if (isConditionalJump) 865 CLG_(stat).jcnd_counter++; 866 else 867 CLG_(stat).jump_counter++; 868 } 869 870 CLG_(current_state).bbcc = bbcc; 871 // needed for log_* handlers called in this BB 872 CLG_(bb_base) = bb->obj->offset + bb->offset; 873 CLG_(cost_base) = bbcc->cost; 874 875 CLG_DEBUGIF(1) { 876 VG_(printf)(" "); 877 CLG_(print_bbcc_fn)(bbcc); 878 VG_(printf)("\n"); 879 } 880 881 CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %d), Instrs %d (Len %d)\n", 882 bb_addr(bb), bbcc->cost, bb->cost_count, 883 bb->instr_count, bb->instr_len); 884 CLG_DEBUGIF(3) 885 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); 886 CLG_DEBUG(3,"\n"); 887 888 CLG_(stat).bb_executions++; 889 } 890