1 /*--------------------------------------------------------------------*/ 2 /*--- Callgrind ---*/ 3 /*--- bbcc.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Callgrind, a Valgrind tool for call tracing. 8 9 Copyright (C) 2002-2015, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 10 11 This program is free software; you can redistribute it and/or 12 modify it under the terms of the GNU General Public License as 13 published by the Free Software Foundation; either version 2 of the 14 License, or (at your option) any later version. 15 16 This program is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 General Public License for more details. 20 21 You should have received a copy of the GNU General Public License 22 along with this program; if not, write to the Free Software 23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 24 02111-1307, USA. 25 26 The GNU General Public License is contained in the file COPYING. 27 */ 28 29 #include "global.h" 30 #include "costs.h" 31 32 #include "pub_tool_threadstate.h" 33 34 /*------------------------------------------------------------*/ 35 /*--- BBCC operations ---*/ 36 /*------------------------------------------------------------*/ 37 38 #define N_BBCC_INITIAL_ENTRIES 10437 39 40 /* BBCC table (key is BB/Context), per thread, resizable */ 41 bbcc_hash current_bbccs; 42 43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) 44 { 45 Int i; 46 47 CLG_ASSERT(bbccs != 0); 48 49 bbccs->size = N_BBCC_INITIAL_ENTRIES; 50 bbccs->entries = 0; 51 bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1", 52 bbccs->size * sizeof(BBCC*)); 53 54 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; 55 } 56 57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) 58 { 59 CLG_ASSERT(dst != 0); 60 61 dst->size = current_bbccs.size; 62 dst->entries = current_bbccs.entries; 63 dst->table = current_bbccs.table; 64 } 65 66 bbcc_hash* CLG_(get_current_bbcc_hash)() 67 { 68 return ¤t_bbccs; 69 } 70 71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h) 72 { 73 CLG_ASSERT(h != 0); 74 75 current_bbccs.size = h->size; 76 current_bbccs.entries = h->entries; 77 current_bbccs.table = h->table; 78 } 79 80 /* 81 * Zero all costs of a BBCC 82 */ 83 void CLG_(zero_bbcc)(BBCC* bbcc) 84 { 85 Int i; 86 jCC* jcc; 87 88 CLG_ASSERT(bbcc->cxt != 0); 89 CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %u " 90 "(fn '%s', rec %u)\n", 91 bb_addr(bbcc->bb), 92 bbcc->cxt->base_number + bbcc->rec_index, 93 bbcc->cxt->fn[0]->name, 94 bbcc->rec_index); 95 96 if ((bbcc->ecounter_sum ==0) && 97 (bbcc->ret_counter ==0)) return; 98 99 for(i=0;i<bbcc->bb->cost_count;i++) 100 bbcc->cost[i] = 0; 101 for(i=0;i <= bbcc->bb->cjmp_count;i++) { 102 bbcc->jmp[i].ecounter = 0; 103 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) 104 CLG_(init_cost)( CLG_(sets).full, jcc->cost ); 105 } 106 bbcc->ecounter_sum = 0; 107 bbcc->ret_counter = 0; 108 } 109 110 111 112 void CLG_(forall_bbccs)(void (*func)(BBCC*)) 113 { 114 BBCC *bbcc, *bbcc2; 115 int i, j; 116 117 for (i = 0; i < current_bbccs.size; i++) { 118 if ((bbcc=current_bbccs.table[i]) == NULL) continue; 119 while (bbcc) { 120 /* every bbcc should have a rec_array */ 121 CLG_ASSERT(bbcc->rec_array != 0); 122 123 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) { 124 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; 125 126 (*func)(bbcc2); 127 } 128 bbcc = bbcc->next; 129 } 130 } 131 } 132 133 134 /* All BBCCs for recursion level 0 are inserted into a 135 * thread specific hash table with key 136 * - address of BB structure (unique, as never freed) 137 * - current context (includes caller chain) 138 * BBCCs for other recursion levels are in bbcc->rec_array. 139 * 140 * The hash is used in setup_bb(), i.e. to find the cost 141 * counters to be changed in the execution of a BB. 142 */ 143 144 static __inline__ 145 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) 146 { 147 CLG_ASSERT(bb != 0); 148 CLG_ASSERT(cxt != 0); 149 150 return ((Addr)bb + (Addr)cxt) % size; 151 } 152 153 154 /* Lookup for a BBCC in hash. 155 */ 156 static 157 BBCC* lookup_bbcc(BB* bb, Context* cxt) 158 { 159 BBCC* bbcc = bb->last_bbcc; 160 UInt idx; 161 162 /* check LRU */ 163 if (bbcc->cxt == cxt) { 164 if (!CLG_(clo).separate_threads) { 165 /* if we don't dump threads separate, tid doesn't have to match */ 166 return bbcc; 167 } 168 if (bbcc->tid == CLG_(current_tid)) return bbcc; 169 } 170 171 CLG_(stat).bbcc_lru_misses++; 172 173 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); 174 bbcc = current_bbccs.table[idx]; 175 while (bbcc && 176 (bb != bbcc->bb || 177 cxt != bbcc->cxt)) { 178 bbcc = bbcc->next; 179 } 180 181 CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %u, fn '%s'): %p (tid %u)\n", 182 bb_addr(bb), cxt->base_number, cxt->fn[0]->name, 183 bbcc, bbcc ? bbcc->tid : 0); 184 185 CLG_DEBUGIF(2) 186 if (bbcc) CLG_(print_bbcc)(-2,bbcc); 187 188 return bbcc; 189 } 190 191 192 /* double size of hash table 1 (addr->BBCC) */ 193 static void resize_bbcc_hash(void) 194 { 195 Int i, new_size, conflicts1 = 0, conflicts2 = 0; 196 BBCC** new_table; 197 UInt new_idx; 198 BBCC *curr_BBCC, *next_BBCC; 199 200 new_size = 2*current_bbccs.size+3; 201 new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1", 202 new_size * sizeof(BBCC*)); 203 204 for (i = 0; i < new_size; i++) 205 new_table[i] = NULL; 206 207 for (i = 0; i < current_bbccs.size; i++) { 208 if (current_bbccs.table[i] == NULL) continue; 209 210 curr_BBCC = current_bbccs.table[i]; 211 while (NULL != curr_BBCC) { 212 next_BBCC = curr_BBCC->next; 213 214 new_idx = bbcc_hash_idx(curr_BBCC->bb, 215 curr_BBCC->cxt, 216 new_size); 217 218 curr_BBCC->next = new_table[new_idx]; 219 new_table[new_idx] = curr_BBCC; 220 if (curr_BBCC->next) { 221 conflicts1++; 222 if (curr_BBCC->next->next) 223 conflicts2++; 224 } 225 226 curr_BBCC = next_BBCC; 227 } 228 } 229 230 VG_(free)(current_bbccs.table); 231 232 233 CLG_DEBUG(0,"Resize BBCC Hash: %u => %d (entries %u, conflicts %d/%d)\n", 234 current_bbccs.size, new_size, 235 current_bbccs.entries, conflicts1, conflicts2); 236 237 current_bbccs.size = new_size; 238 current_bbccs.table = new_table; 239 CLG_(stat).bbcc_hash_resizes++; 240 } 241 242 243 static __inline 244 BBCC** new_recursion(int size) 245 { 246 BBCC** bbccs; 247 int i; 248 249 bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size); 250 for(i=0;i<size;i++) 251 bbccs[i] = 0; 252 253 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs); 254 255 return bbccs; 256 } 257 258 259 /* 260 * Allocate a new BBCC 261 * 262 * Uninitialized: 263 * cxt, rec_index, rec_array, next_bbcc, next1, next2 264 */ 265 static __inline__ 266 BBCC* new_bbcc(BB* bb) 267 { 268 BBCC* bbcc; 269 Int i; 270 271 /* We need cjmp_count+1 JmpData structs: 272 * the last is for the unconditional jump/call/ret at end of BB 273 */ 274 bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1", 275 sizeof(BBCC) + 276 (bb->cjmp_count+1) * sizeof(JmpData)); 277 bbcc->bb = bb; 278 bbcc->tid = CLG_(current_tid); 279 280 bbcc->ret_counter = 0; 281 bbcc->skipped = 0; 282 bbcc->cost = CLG_(get_costarray)(bb->cost_count); 283 for(i=0;i<bb->cost_count;i++) 284 bbcc->cost[i] = 0; 285 for(i=0; i<=bb->cjmp_count; i++) { 286 bbcc->jmp[i].ecounter = 0; 287 bbcc->jmp[i].jcc_list = 0; 288 } 289 bbcc->ecounter_sum = 0; 290 291 /* Init pointer caches (LRU) */ 292 bbcc->lru_next_bbcc = 0; 293 bbcc->lru_from_jcc = 0; 294 bbcc->lru_to_jcc = 0; 295 296 CLG_(stat).distinct_bbccs++; 297 298 CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n", 299 bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs); 300 301 return bbcc; 302 } 303 304 305 /** 306 * Inserts a new BBCC into hashes. 307 * BBCC specific items must be set as this is used for the hash 308 * keys: 309 * fn : current function 310 * tid : current thread ID 311 * from : position where current function is called from 312 * 313 * Recursion level doesn't need to be set as this is not included 314 * in the hash key: Only BBCCs with rec level 0 are in hashes. 315 */ 316 static 317 void insert_bbcc_into_hash(BBCC* bbcc) 318 { 319 UInt idx; 320 321 CLG_ASSERT(bbcc->cxt != 0); 322 323 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n", 324 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); 325 326 /* check fill degree of hash and resize if needed (>90%) */ 327 current_bbccs.entries++; 328 if (100 * current_bbccs.entries / current_bbccs.size > 90) 329 resize_bbcc_hash(); 330 331 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); 332 bbcc->next = current_bbccs.table[idx]; 333 current_bbccs.table[idx] = bbcc; 334 335 CLG_DEBUG(3,"- insert_bbcc_into_hash: %u entries\n", 336 current_bbccs.entries); 337 } 338 339 /* String is returned in a dynamically allocated buffer. Caller is 340 responsible for free'ing it. */ 341 static HChar* mangled_cxt(const Context* cxt, Int rec_index) 342 { 343 Int i, p; 344 345 if (!cxt) return VG_(strdup)("cl.bbcc.mcxt", "(no context)"); 346 347 /* Overestimate the number of bytes we need to hold the string. */ 348 SizeT need = 20; // rec_index + nul-terminator 349 for (i = 0; i < cxt->size; ++i) 350 need += VG_(strlen)(cxt->fn[i]->name) + 1; // 1 for leading ' 351 352 HChar *mangled = CLG_MALLOC("cl.bbcc.mcxt", need); 353 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); 354 if (rec_index >0) 355 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); 356 for(i=1;i<cxt->size;i++) 357 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); 358 359 return mangled; 360 } 361 362 363 /* Create a new BBCC as a copy of an existing one, 364 * but with costs set to 0 and jcc chains empty. 365 * 366 * This is needed when a BB is executed in another context than 367 * the one at instrumentation time of the BB. 368 * 369 * Use cases: 370 * rec_index == 0: clone from a BBCC with differing tid/cxt 371 * and insert into hashes 372 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 373 * don't insert into hashes 374 */ 375 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) 376 { 377 BBCC* bbcc; 378 379 CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n", 380 bb_addr(orig->bb), rec_index, cxt->fn[0]->name); 381 382 bbcc = new_bbcc(orig->bb); 383 384 if (rec_index == 0) { 385 386 /* hash insertion is only allowed if tid or cxt is different */ 387 CLG_ASSERT((orig->tid != CLG_(current_tid)) || 388 (orig->cxt != cxt)); 389 390 bbcc->rec_index = 0; 391 bbcc->cxt = cxt; 392 bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions); 393 bbcc->rec_array[0] = bbcc; 394 395 insert_bbcc_into_hash(bbcc); 396 } 397 else { 398 if (CLG_(clo).separate_threads) 399 CLG_ASSERT(orig->tid == CLG_(current_tid)); 400 401 CLG_ASSERT(orig->cxt == cxt); 402 CLG_ASSERT(orig->rec_array); 403 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); 404 CLG_ASSERT(orig->rec_array[rec_index] ==0); 405 406 /* new BBCC will only have differing recursion level */ 407 bbcc->rec_index = rec_index; 408 bbcc->cxt = cxt; 409 bbcc->rec_array = orig->rec_array; 410 bbcc->rec_array[rec_index] = bbcc; 411 } 412 413 /* update list of BBCCs for same BB */ 414 bbcc->next_bbcc = orig->bb->bbcc_list; 415 orig->bb->bbcc_list = bbcc; 416 417 418 CLG_DEBUGIF(3) 419 CLG_(print_bbcc)(-2, bbcc); 420 421 HChar *mangled_orig = mangled_cxt(orig->cxt, orig->rec_index); 422 HChar *mangled_bbcc = mangled_cxt(bbcc->cxt, bbcc->rec_index); 423 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n" 424 " orig %s\n" 425 " new %s\n", 426 orig, rec_index, bb_addr(orig->bb), 427 mangled_orig, 428 mangled_bbcc); 429 CLG_FREE(mangled_orig); 430 CLG_FREE(mangled_bbcc); 431 432 CLG_(stat).bbcc_clones++; 433 434 return bbcc; 435 }; 436 437 438 439 /* Get a pointer to the cost centre structure for given basic block 440 * address. If created, the BBCC is inserted into the BBCC hash. 441 * Also sets BB_seen_before by reference. 442 * 443 */ 444 BBCC* CLG_(get_bbcc)(BB* bb) 445 { 446 BBCC* bbcc; 447 448 CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb)); 449 450 bbcc = bb->bbcc_list; 451 452 if (!bbcc) { 453 bbcc = new_bbcc(bb); 454 455 /* initialize BBCC */ 456 bbcc->cxt = 0; 457 bbcc->rec_array = 0; 458 bbcc->rec_index = 0; 459 460 bbcc->next_bbcc = bb->bbcc_list; 461 bb->bbcc_list = bbcc; 462 bb->last_bbcc = bbcc; 463 464 CLG_DEBUGIF(3) 465 CLG_(print_bbcc)(-2, bbcc); 466 } 467 468 CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n", 469 bb_addr(bb), bbcc); 470 471 return bbcc; 472 } 473 474 475 /* Callgrind manages its own call stack for each thread. 476 * When leaving a function, a underflow can happen when 477 * Callgrind's tracing was switched on in the middle of 478 * a run, i.e. when Callgrind was not able to trace the 479 * call instruction. 480 * This function tries to reconstruct the original call. 481 * As we know the return address (the address following 482 * the CALL instruction), we can detect the function 483 * we return back to, but the original call site is unknown. 484 * We suppose a call site at return address - 1. 485 * (TODO: other heuristic: lookup info of instrumented BBs). 486 */ 487 static void handleUnderflow(BB* bb) 488 { 489 /* RET at top of call stack */ 490 BBCC* source_bbcc; 491 BB* source_bb; 492 Bool seen_before; 493 fn_node* caller; 494 int fn_number; 495 unsigned *pactive; 496 call_entry* call_entry_up; 497 498 CLG_DEBUG(1," Callstack underflow !\n"); 499 500 /* we emulate an old call from the function we return to 501 * by using (<return address> -1) */ 502 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); 503 source_bbcc = CLG_(get_bbcc)(source_bb); 504 505 /* seen_before can be true if RET from a signal handler */ 506 if (!seen_before) { 507 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; 508 } 509 else if (CLG_(current_state).collect) 510 source_bbcc->ecounter_sum++; 511 512 /* Force a new top context, will be set active by push_cxt() */ 513 CLG_(current_fn_stack).top--; 514 CLG_(current_state).cxt = 0; 515 caller = CLG_(get_fn_node)(bb); 516 CLG_(push_cxt)( caller ); 517 518 if (!seen_before) { 519 /* set rec array for source BBCC: this is at rec level 1 */ 520 source_bbcc->rec_array = new_recursion(caller->separate_recursions); 521 source_bbcc->rec_array[0] = source_bbcc; 522 523 CLG_ASSERT(source_bbcc->cxt == 0); 524 source_bbcc->cxt = CLG_(current_state).cxt; 525 insert_bbcc_into_hash(source_bbcc); 526 } 527 CLG_ASSERT(CLG_(current_state).bbcc); 528 529 /* correct active counts */ 530 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; 531 pactive = CLG_(get_fn_entry)(fn_number); 532 (*pactive)--; 533 534 /* This assertion is not correct for reentrant 535 * signal handlers */ 536 /* CLG_ASSERT(*pactive == 0); */ 537 538 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ 539 /* back to current context */ 540 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); 541 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, 542 (Addr)-1, False); 543 call_entry_up = 544 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); 545 /* assume this call is lasting since last dump or 546 * for a signal handler since it's call */ 547 if (CLG_(current_state).sig == 0) 548 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, 549 CLG_(get_current_thread)()->lastdump_cost ); 550 else 551 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); 552 } 553 554 555 /* 556 * Helper function called at start of each instrumented BB to setup 557 * pointer to costs for current thread/context/recursion level 558 */ 559 560 VG_REGPARM(1) 561 void CLG_(setup_bbcc)(BB* bb) 562 { 563 BBCC *bbcc, *last_bbcc; 564 Bool call_emulation = False, delayed_push = False, skip = False; 565 Addr sp; 566 BB* last_bb; 567 ThreadId tid; 568 ClgJumpKind jmpkind; 569 Bool isConditionalJump; 570 Int passed = 0, csp; 571 Bool ret_without_call = False; 572 Int popcount_on_return = 1; 573 574 CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb)); 575 576 /* This is needed because thread switches can not reliable be tracked 577 * with callback CLG_(run_thread) only: we have otherwise no way to get 578 * the thread ID after a signal handler returns. 579 * This could be removed again if that bug is fixed in Valgrind. 580 * This is in the hot path but hopefully not to costly. 581 */ 582 tid = VG_(get_running_tid)(); 583 #if 1 584 /* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid). 585 * As this is on the hot path, we only call CLG_(switch_thread)(tid) 586 * if tid differs from the CLG_(current_tid). 587 */ 588 if (UNLIKELY(tid != CLG_(current_tid))) 589 CLG_(switch_thread)(tid); 590 #else 591 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); 592 #endif 593 594 sp = VG_(get_SP)(tid); 595 last_bbcc = CLG_(current_state).bbcc; 596 last_bb = last_bbcc ? last_bbcc->bb : 0; 597 598 if (last_bb) { 599 passed = CLG_(current_state).jmps_passed; 600 CLG_ASSERT(passed <= last_bb->cjmp_count); 601 jmpkind = last_bb->jmp[passed].jmpkind; 602 isConditionalJump = (passed < last_bb->cjmp_count); 603 604 if (CLG_(current_state).collect) { 605 if (!CLG_(current_state).nonskipped) { 606 last_bbcc->ecounter_sum++; 607 last_bbcc->jmp[passed].ecounter++; 608 if (!CLG_(clo).simulate_cache) { 609 /* update Ir cost */ 610 UInt instr_count = last_bb->jmp[passed].instr+1; 611 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 612 } 613 } 614 else { 615 /* do not increment exe counter of BBs in skipped functions, as it 616 * would fool dumping code */ 617 if (!CLG_(clo).simulate_cache) { 618 /* update Ir cost */ 619 UInt instr_count = last_bb->jmp[passed].instr+1; 620 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count; 621 CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ] 622 += instr_count; 623 } 624 } 625 } 626 627 CLG_DEBUGIF(4) { 628 CLG_(print_execstate)(-2, &CLG_(current_state) ); 629 CLG_(print_bbcc_cost)(-2, last_bbcc); 630 } 631 } 632 else { 633 jmpkind = jk_None; 634 isConditionalJump = False; 635 } 636 637 /* Manipulate JmpKind if needed, only using BB specific info */ 638 639 csp = CLG_(current_call_stack).sp; 640 641 /* A return not matching the top call in our callstack is a jump */ 642 if ( (jmpkind == jk_Return) && (csp >0)) { 643 Int csp_up = csp-1; 644 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); 645 646 /* We have a real return if 647 * - the stack pointer (SP) left the current stack frame, or 648 * - SP has the same value as when reaching the current function 649 * and the address of this BB is the return address of last call 650 * (we even allow to leave multiple frames if the SP stays the 651 * same and we find a matching return address) 652 * The latter condition is needed because on PPC, SP can stay 653 * the same over CALL=b(c)l / RET=b(c)lr boundaries 654 */ 655 if (sp < top_ce->sp) popcount_on_return = 0; 656 else if (top_ce->sp == sp) { 657 while(1) { 658 if (top_ce->ret_addr == bb_addr(bb)) break; 659 if (csp_up>0) { 660 csp_up--; 661 top_ce = &(CLG_(current_call_stack).entry[csp_up]); 662 if (top_ce->sp == sp) { 663 popcount_on_return++; 664 continue; 665 } 666 } 667 popcount_on_return = 0; 668 break; 669 } 670 } 671 if (popcount_on_return == 0) { 672 jmpkind = jk_Jump; 673 ret_without_call = True; 674 } 675 } 676 677 /* Should this jump be converted to call or pop/call ? */ 678 if (( jmpkind != jk_Return) && 679 ( jmpkind != jk_Call) && last_bb) { 680 681 /* We simulate a JMP/Cont to be a CALL if 682 * - jump is in another ELF object or section kind 683 * - jump is to first instruction of a function (tail recursion) 684 */ 685 if (ret_without_call || 686 /* This is for detection of optimized tail recursion. 687 * On PPC, this is only detected as call when going to another 688 * function. The problem is that on PPC it can go wrong 689 * more easily (no stack frame setup needed) 690 */ 691 #if defined(VGA_ppc32) 692 (bb->is_entry && (last_bb->fn != bb->fn)) || 693 #else 694 bb->is_entry || 695 #endif 696 (last_bb->sect_kind != bb->sect_kind) || 697 (last_bb->obj->number != bb->obj->number)) { 698 699 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", 700 last_bb->fn->name, last_bb->obj->name, 701 bb->fn->name, bb->obj->name, 702 ret_without_call?" (RET w/o CALL)":""); 703 704 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { 705 706 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); 707 708 if (top_ce->jcc) { 709 710 CLG_DEBUG(1," Pop on Jump!\n"); 711 712 /* change source for delayed push */ 713 CLG_(current_state).bbcc = top_ce->jcc->from; 714 sp = top_ce->sp; 715 passed = top_ce->jcc->jmp; 716 CLG_(pop_call_stack)(); 717 } 718 else { 719 CLG_ASSERT(CLG_(current_state).nonskipped != 0); 720 } 721 } 722 723 jmpkind = jk_Call; 724 call_emulation = True; 725 } 726 } 727 728 if (jmpkind == jk_Call) 729 skip = CLG_(get_fn_node)(bb)->skip; 730 731 CLG_DEBUGIF(1) { 732 if (isConditionalJump) 733 VG_(printf)("Cond-"); 734 switch(jmpkind) { 735 case jk_None: VG_(printf)("Fall-through"); break; 736 case jk_Jump: VG_(printf)("Jump"); break; 737 case jk_Call: VG_(printf)("Call"); break; 738 case jk_Return: VG_(printf)("Return"); break; 739 default: tl_assert(0); 740 } 741 VG_(printf)(" %08lx -> %08lx, SP %08lx\n", 742 last_bb ? bb_jmpaddr(last_bb) : 0, 743 bb_addr(bb), sp); 744 } 745 746 /* Handle CALL/RET and update context to get correct BBCC */ 747 748 if (jmpkind == jk_Return) { 749 750 if ((csp == 0) || 751 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && 752 ( *(CLG_(current_fn_stack).top-1)==0)) ) { 753 754 /* On an empty call stack or at a signal separation marker, 755 * a RETURN generates an call stack underflow. 756 */ 757 handleUnderflow(bb); 758 CLG_(pop_call_stack)(); 759 } 760 else { 761 CLG_ASSERT(popcount_on_return >0); 762 CLG_(unwind_call_stack)(sp, popcount_on_return); 763 } 764 } 765 else { 766 Int unwind_count = CLG_(unwind_call_stack)(sp, 0); 767 if (unwind_count > 0) { 768 /* if unwinding was done, this actually is a return */ 769 jmpkind = jk_Return; 770 } 771 772 if (jmpkind == jk_Call) { 773 delayed_push = True; 774 775 csp = CLG_(current_call_stack).sp; 776 if (call_emulation && csp>0) 777 sp = CLG_(current_call_stack).entry[csp-1].sp; 778 779 } 780 } 781 782 /* Change new context if needed, taking delayed_push into account */ 783 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { 784 CLG_(push_cxt)(CLG_(get_fn_node)(bb)); 785 } 786 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); 787 788 /* If there is a fresh instrumented BBCC, assign current context */ 789 bbcc = CLG_(get_bbcc)(bb); 790 if (bbcc->cxt == 0) { 791 CLG_ASSERT(bbcc->rec_array == 0); 792 793 bbcc->cxt = CLG_(current_state).cxt; 794 bbcc->rec_array = 795 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); 796 bbcc->rec_array[0] = bbcc; 797 798 insert_bbcc_into_hash(bbcc); 799 } 800 else { 801 /* get BBCC with current context */ 802 803 /* first check LRU of last bbcc executed */ 804 805 if (last_bbcc) { 806 bbcc = last_bbcc->lru_next_bbcc; 807 if (bbcc && 808 ((bbcc->bb != bb) || 809 (bbcc->cxt != CLG_(current_state).cxt))) 810 bbcc = 0; 811 } 812 else 813 bbcc = 0; 814 815 if (!bbcc) 816 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); 817 if (!bbcc) 818 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); 819 820 bb->last_bbcc = bbcc; 821 } 822 823 /* save for fast lookup */ 824 if (last_bbcc) 825 last_bbcc->lru_next_bbcc = bbcc; 826 827 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { 828 UInt level, idx; 829 fn_node* top = *(CLG_(current_fn_stack).top); 830 831 level = *CLG_(get_fn_entry)(top->number); 832 833 if (delayed_push && !skip) { 834 if (CLG_(clo).skip_direct_recursion) { 835 /* a call was detected, which means that the source BB != 0 */ 836 CLG_ASSERT(CLG_(current_state).bbcc != 0); 837 /* only increment rec. level if called from different function */ 838 if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0]) 839 level++; 840 } 841 else level++; 842 } 843 if (level> top->separate_recursions) 844 level = top->separate_recursions; 845 846 if (level == 0) { 847 /* can only happen if instrumentation just was switched on */ 848 level = 1; 849 *CLG_(get_fn_entry)(top->number) = 1; 850 } 851 852 idx = level -1; 853 if (bbcc->rec_array[idx]) 854 bbcc = bbcc->rec_array[idx]; 855 else 856 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); 857 858 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); 859 } 860 861 if (delayed_push) { 862 if (!skip && CLG_(current_state).nonskipped) { 863 /* a call from skipped to nonskipped */ 864 CLG_(current_state).bbcc = CLG_(current_state).nonskipped; 865 /* FIXME: take the real passed count from shadow stack */ 866 passed = CLG_(current_state).bbcc->bb->cjmp_count; 867 } 868 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, 869 bbcc, sp, skip); 870 } 871 872 if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) { 873 874 /* Handle conditional jumps followed, i.e. trace arcs 875 * This uses JCC structures, too */ 876 877 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); 878 CLG_ASSERT(jcc != 0); 879 // Change from default, and check if already changed 880 if (jcc->jmpkind == jk_Call) 881 jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump; 882 else { 883 // FIXME: Why can this fail? 884 // CLG_ASSERT(jcc->jmpkind == jmpkind); 885 } 886 887 jcc->call_counter++; 888 if (isConditionalJump) 889 CLG_(stat).jcnd_counter++; 890 else 891 CLG_(stat).jump_counter++; 892 } 893 894 CLG_(current_state).bbcc = bbcc; 895 /* Even though this will be set in instrumented code directly before 896 * side exits, it needs to be set to 0 here in case an exception 897 * happens in first instructions of the BB */ 898 CLG_(current_state).jmps_passed = 0; 899 // needed for log_* handlers called in this BB 900 CLG_(bb_base) = bb->obj->offset + bb->offset; 901 CLG_(cost_base) = bbcc->cost; 902 903 CLG_DEBUGIF(1) { 904 VG_(printf)(" "); 905 CLG_(print_bbcc_fn)(bbcc); 906 VG_(printf)("\n"); 907 } 908 909 CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %u), Instrs %u (Len %u)\n", 910 bb_addr(bb), bbcc->cost, bb->cost_count, 911 bb->instr_count, bb->instr_len); 912 CLG_DEBUGIF(3) 913 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); 914 CLG_DEBUG(3,"\n"); 915 916 CLG_(stat).bb_executions++; 917 } 918