Home | History | Annotate | Download | only in callgrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Callgrind                                                    ---*/
      3 /*---                                                       bbcc.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Callgrind, a Valgrind tool for call tracing.
      8 
      9    Copyright (C) 2002-2013, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de)
     10 
     11    This program is free software; you can redistribute it and/or
     12    modify it under the terms of the GNU General Public License as
     13    published by the Free Software Foundation; either version 2 of the
     14    License, or (at your option) any later version.
     15 
     16    This program is distributed in the hope that it will be useful, but
     17    WITHOUT ANY WARRANTY; without even the implied warranty of
     18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19    General Public License for more details.
     20 
     21    You should have received a copy of the GNU General Public License
     22    along with this program; if not, write to the Free Software
     23    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     24    02111-1307, USA.
     25 
     26    The GNU General Public License is contained in the file COPYING.
     27 */
     28 
     29 #include "global.h"
     30 #include "costs.h"
     31 
     32 #include "pub_tool_threadstate.h"
     33 
     34 /*------------------------------------------------------------*/
     35 /*--- BBCC operations                                      ---*/
     36 /*------------------------------------------------------------*/
     37 
     38 #define N_BBCC_INITIAL_ENTRIES  10437
     39 
     40 /* BBCC table (key is BB/Context), per thread, resizable */
     41 bbcc_hash current_bbccs;
     42 
     43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
     44 {
     45    Int i;
     46 
     47    CLG_ASSERT(bbccs != 0);
     48 
     49    bbccs->size    = N_BBCC_INITIAL_ENTRIES;
     50    bbccs->entries = 0;
     51    bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
     52                                       bbccs->size * sizeof(BBCC*));
     53 
     54    for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
     55 }
     56 
     57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
     58 {
     59   CLG_ASSERT(dst != 0);
     60 
     61   dst->size    = current_bbccs.size;
     62   dst->entries = current_bbccs.entries;
     63   dst->table   = current_bbccs.table;
     64 }
     65 
     66 bbcc_hash* CLG_(get_current_bbcc_hash)()
     67 {
     68   return &current_bbccs;
     69 }
     70 
     71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
     72 {
     73   CLG_ASSERT(h != 0);
     74 
     75   current_bbccs.size    = h->size;
     76   current_bbccs.entries = h->entries;
     77   current_bbccs.table   = h->table;
     78 }
     79 
     80 /*
     81  * Zero all costs of a BBCC
     82  */
     83 void CLG_(zero_bbcc)(BBCC* bbcc)
     84 {
     85   Int i;
     86   jCC* jcc;
     87 
     88   CLG_ASSERT(bbcc->cxt != 0);
     89   CLG_DEBUG(1, "  zero_bbcc: BB %#lx, Cxt %d "
     90 	   "(fn '%s', rec %d)\n",
     91 	   bb_addr(bbcc->bb),
     92 	   bbcc->cxt->base_number + bbcc->rec_index,
     93 	   bbcc->cxt->fn[0]->name,
     94 	   bbcc->rec_index);
     95 
     96   if ((bbcc->ecounter_sum ==0) &&
     97       (bbcc->ret_counter ==0)) return;
     98 
     99   for(i=0;i<bbcc->bb->cost_count;i++)
    100     bbcc->cost[i] = 0;
    101   for(i=0;i <= bbcc->bb->cjmp_count;i++) {
    102     bbcc->jmp[i].ecounter = 0;
    103     for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
    104 	CLG_(init_cost)( CLG_(sets).full, jcc->cost );
    105   }
    106   bbcc->ecounter_sum = 0;
    107   bbcc->ret_counter = 0;
    108 }
    109 
    110 
    111 
    112 void CLG_(forall_bbccs)(void (*func)(BBCC*))
    113 {
    114   BBCC *bbcc, *bbcc2;
    115   int i, j;
    116 
    117   for (i = 0; i < current_bbccs.size; i++) {
    118     if ((bbcc=current_bbccs.table[i]) == NULL) continue;
    119     while (bbcc) {
    120       /* every bbcc should have a rec_array */
    121       CLG_ASSERT(bbcc->rec_array != 0);
    122 
    123       for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
    124 	if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
    125 
    126 	(*func)(bbcc2);
    127       }
    128       bbcc = bbcc->next;
    129     }
    130   }
    131 }
    132 
    133 
    134 /* All BBCCs for recursion level 0 are inserted into a
    135  * thread specific hash table with key
    136  * - address of BB structure (unique, as never freed)
    137  * - current context (includes caller chain)
    138  * BBCCs for other recursion levels are in bbcc->rec_array.
    139  *
    140  * The hash is used in setup_bb(), i.e. to find the cost
    141  * counters to be changed in the execution of a BB.
    142  */
    143 
    144 static __inline__
    145 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
    146 {
    147    CLG_ASSERT(bb != 0);
    148    CLG_ASSERT(cxt != 0);
    149 
    150    return ((Addr)bb + (Addr)cxt) % size;
    151 }
    152 
    153 
    154 /* Lookup for a BBCC in hash.
    155  */
    156 static
    157 BBCC* lookup_bbcc(BB* bb, Context* cxt)
    158 {
    159    BBCC* bbcc = bb->last_bbcc;
    160    UInt  idx;
    161 
    162    /* check LRU */
    163    if (bbcc->cxt == cxt) {
    164        if (!CLG_(clo).separate_threads) {
    165 	   /* if we don't dump threads separate, tid doesn't have to match */
    166 	   return bbcc;
    167        }
    168        if (bbcc->tid == CLG_(current_tid)) return bbcc;
    169    }
    170 
    171    CLG_(stat).bbcc_lru_misses++;
    172 
    173    idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
    174    bbcc = current_bbccs.table[idx];
    175    while (bbcc &&
    176 	  (bb      != bbcc->bb ||
    177 	   cxt     != bbcc->cxt)) {
    178        bbcc = bbcc->next;
    179    }
    180 
    181    CLG_DEBUG(2,"  lookup_bbcc(BB %#lx, Cxt %d, fn '%s'): %p (tid %d)\n",
    182 	    bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
    183 	    bbcc, bbcc ? bbcc->tid : 0);
    184 
    185    CLG_DEBUGIF(2)
    186      if (bbcc) CLG_(print_bbcc)(-2,bbcc);
    187 
    188    return bbcc;
    189 }
    190 
    191 
    192 /* double size of hash table 1 (addr->BBCC) */
    193 static void resize_bbcc_hash(void)
    194 {
    195     Int i, new_size, conflicts1 = 0, conflicts2 = 0;
    196     BBCC** new_table;
    197     UInt new_idx;
    198     BBCC *curr_BBCC, *next_BBCC;
    199 
    200     new_size = 2*current_bbccs.size+3;
    201     new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
    202                                     new_size * sizeof(BBCC*));
    203 
    204     if (!new_table) return;
    205 
    206     for (i = 0; i < new_size; i++)
    207       new_table[i] = NULL;
    208 
    209     for (i = 0; i < current_bbccs.size; i++) {
    210 	if (current_bbccs.table[i] == NULL) continue;
    211 
    212 	curr_BBCC = current_bbccs.table[i];
    213 	while (NULL != curr_BBCC) {
    214 	    next_BBCC = curr_BBCC->next;
    215 
    216 	    new_idx = bbcc_hash_idx(curr_BBCC->bb,
    217 				    curr_BBCC->cxt,
    218 				    new_size);
    219 
    220 	    curr_BBCC->next = new_table[new_idx];
    221 	    new_table[new_idx] = curr_BBCC;
    222 	    if (curr_BBCC->next) {
    223 		conflicts1++;
    224 		if (curr_BBCC->next->next)
    225 		    conflicts2++;
    226 	    }
    227 
    228 	    curr_BBCC = next_BBCC;
    229 	}
    230     }
    231 
    232     VG_(free)(current_bbccs.table);
    233 
    234 
    235     CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n",
    236 	     current_bbccs.size, new_size,
    237 	     current_bbccs.entries, conflicts1, conflicts2);
    238 
    239     current_bbccs.size = new_size;
    240     current_bbccs.table = new_table;
    241     CLG_(stat).bbcc_hash_resizes++;
    242 }
    243 
    244 
    245 static __inline
    246 BBCC** new_recursion(int size)
    247 {
    248     BBCC** bbccs;
    249     int i;
    250 
    251     bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
    252     for(i=0;i<size;i++)
    253 	bbccs[i] = 0;
    254 
    255     CLG_DEBUG(3,"  new_recursion(size %d): %p\n", size, bbccs);
    256 
    257     return bbccs;
    258 }
    259 
    260 
    261 /*
    262  * Allocate a new BBCC
    263  *
    264  * Uninitialized:
    265  * cxt, rec_index, rec_array, next_bbcc, next1, next2
    266  */
    267 static __inline__
    268 BBCC* new_bbcc(BB* bb)
    269 {
    270    BBCC* bbcc;
    271    Int i;
    272 
    273    /* We need cjmp_count+1 JmpData structs:
    274     * the last is for the unconditional jump/call/ret at end of BB
    275     */
    276    bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
    277 			    sizeof(BBCC) +
    278 			    (bb->cjmp_count+1) * sizeof(JmpData));
    279    bbcc->bb  = bb;
    280    bbcc->tid = CLG_(current_tid);
    281 
    282    bbcc->ret_counter = 0;
    283    bbcc->skipped = 0;
    284    bbcc->cost = CLG_(get_costarray)(bb->cost_count);
    285    for(i=0;i<bb->cost_count;i++)
    286      bbcc->cost[i] = 0;
    287    for(i=0; i<=bb->cjmp_count; i++) {
    288        bbcc->jmp[i].ecounter = 0;
    289        bbcc->jmp[i].jcc_list = 0;
    290    }
    291    bbcc->ecounter_sum = 0;
    292 
    293    /* Init pointer caches (LRU) */
    294    bbcc->lru_next_bbcc = 0;
    295    bbcc->lru_from_jcc  = 0;
    296    bbcc->lru_to_jcc  = 0;
    297 
    298    CLG_(stat).distinct_bbccs++;
    299 
    300    CLG_DEBUG(3, "  new_bbcc(BB %#lx): %p (now %d)\n",
    301 	    bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs);
    302 
    303    return bbcc;
    304 }
    305 
    306 
    307 /**
    308  * Inserts a new BBCC into hashes.
    309  * BBCC specific items must be set as this is used for the hash
    310  * keys:
    311  *  fn     : current function
    312  *  tid    : current thread ID
    313  *  from   : position where current function is called from
    314  *
    315  * Recursion level doesn't need to be set as this is not included
    316  * in the hash key: Only BBCCs with rec level 0 are in hashes.
    317  */
    318 static
    319 void insert_bbcc_into_hash(BBCC* bbcc)
    320 {
    321     UInt idx;
    322 
    323     CLG_ASSERT(bbcc->cxt != 0);
    324 
    325     CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n",
    326 	     bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
    327 
    328     /* check fill degree of hash and resize if needed (>90%) */
    329     current_bbccs.entries++;
    330     if (100 * current_bbccs.entries / current_bbccs.size > 90)
    331 	resize_bbcc_hash();
    332 
    333     idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
    334     bbcc->next = current_bbccs.table[idx];
    335     current_bbccs.table[idx] = bbcc;
    336 
    337     CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n",
    338 	     current_bbccs.entries);
    339 }
    340 
    341 static const HChar* mangled_cxt(Context* cxt, int rec_index)
    342 {
    343     static HChar mangled[FN_NAME_LEN];
    344     int i, p;
    345 
    346     if (!cxt) return "(no context)";
    347 
    348     p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
    349     if (rec_index >0)
    350 	p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
    351     for(i=1;i<cxt->size;i++)
    352 	p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
    353 
    354     return mangled;
    355 }
    356 
    357 
    358 /* Create a new BBCC as a copy of an existing one,
    359  * but with costs set to 0 and jcc chains empty.
    360  *
    361  * This is needed when a BB is executed in another context than
    362  * the one at instrumentation time of the BB.
    363  *
    364  * Use cases:
    365  *  rec_index == 0: clone from a BBCC with differing tid/cxt
    366  *                  and insert into hashes
    367  *  rec_index >0  : clone from a BBCC with same tid/cxt and rec_index 0
    368  *                  don't insert into hashes
    369  */
    370 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
    371 {
    372     BBCC* bbcc;
    373 
    374     CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n",
    375 	     bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
    376 
    377     bbcc = new_bbcc(orig->bb);
    378 
    379     if (rec_index == 0) {
    380 
    381       /* hash insertion is only allowed if tid or cxt is different */
    382       CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
    383 		(orig->cxt != cxt));
    384 
    385       bbcc->rec_index = 0;
    386       bbcc->cxt = cxt;
    387       bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
    388       bbcc->rec_array[0] = bbcc;
    389 
    390       insert_bbcc_into_hash(bbcc);
    391     }
    392     else {
    393       if (CLG_(clo).separate_threads)
    394 	CLG_ASSERT(orig->tid == CLG_(current_tid));
    395 
    396       CLG_ASSERT(orig->cxt == cxt);
    397       CLG_ASSERT(orig->rec_array);
    398       CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
    399       CLG_ASSERT(orig->rec_array[rec_index] ==0);
    400 
    401       /* new BBCC will only have differing recursion level */
    402       bbcc->rec_index = rec_index;
    403       bbcc->cxt = cxt;
    404       bbcc->rec_array = orig->rec_array;
    405       bbcc->rec_array[rec_index] = bbcc;
    406     }
    407 
    408     /* update list of BBCCs for same BB */
    409     bbcc->next_bbcc = orig->bb->bbcc_list;
    410     orig->bb->bbcc_list = bbcc;
    411 
    412 
    413     CLG_DEBUGIF(3)
    414       CLG_(print_bbcc)(-2, bbcc);
    415 
    416     // FIXME: mangled_cxt returns a pointer to a static buffer that
    417     // gets overwritten with each invocation.
    418     CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n"
    419 		"   orig %s\n"
    420 		"   new  %s\n",
    421 	     orig, rec_index, bb_addr(orig->bb),
    422 	     mangled_cxt(orig->cxt, orig->rec_index),
    423 	     mangled_cxt(bbcc->cxt, bbcc->rec_index));
    424 
    425     CLG_(stat).bbcc_clones++;
    426 
    427     return bbcc;
    428 };
    429 
    430 
    431 
    432 /* Get a pointer to the cost centre structure for given basic block
    433  * address. If created, the BBCC is inserted into the BBCC hash.
    434  * Also sets BB_seen_before by reference.
    435  *
    436  */
    437 BBCC* CLG_(get_bbcc)(BB* bb)
    438 {
    439    BBCC* bbcc;
    440 
    441    CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb));
    442 
    443    bbcc = bb->bbcc_list;
    444 
    445    if (!bbcc) {
    446      bbcc = new_bbcc(bb);
    447 
    448      /* initialize BBCC */
    449      bbcc->cxt       = 0;
    450      bbcc->rec_array = 0;
    451      bbcc->rec_index = 0;
    452 
    453      bbcc->next_bbcc = bb->bbcc_list;
    454      bb->bbcc_list = bbcc;
    455      bb->last_bbcc = bbcc;
    456 
    457      CLG_DEBUGIF(3)
    458        CLG_(print_bbcc)(-2, bbcc);
    459    }
    460 
    461    CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n",
    462 		bb_addr(bb), bbcc);
    463 
    464    return bbcc;
    465 }
    466 
    467 
    468 /* Callgrind manages its own call stack for each thread.
    469  * When leaving a function, a underflow can happen when
    470  * Callgrind's tracing was switched on in the middle of
    471  * a run, i.e. when Callgrind was not able to trace the
    472  * call instruction.
    473  * This function tries to reconstruct the original call.
    474  * As we know the return address (the address following
    475  * the CALL instruction), we can detect the function
    476  * we return back to, but the original call site is unknown.
    477  * We suppose a call site at return address - 1.
    478  * (TODO: other heuristic: lookup info of instrumented BBs).
    479  */
    480 static void handleUnderflow(BB* bb)
    481 {
    482   /* RET at top of call stack */
    483   BBCC* source_bbcc;
    484   BB* source_bb;
    485   Bool seen_before;
    486   fn_node* caller;
    487   int fn_number;
    488   unsigned *pactive;
    489   call_entry* call_entry_up;
    490 
    491   CLG_DEBUG(1,"  Callstack underflow !\n");
    492 
    493   /* we emulate an old call from the function we return to
    494    * by using (<return address> -1) */
    495   source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
    496   source_bbcc = CLG_(get_bbcc)(source_bb);
    497 
    498   /* seen_before can be true if RET from a signal handler */
    499   if (!seen_before) {
    500     source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
    501   }
    502   else if (CLG_(current_state).collect)
    503     source_bbcc->ecounter_sum++;
    504 
    505   /* Force a new top context, will be set active by push_cxt() */
    506   CLG_(current_fn_stack).top--;
    507   CLG_(current_state).cxt = 0;
    508   caller = CLG_(get_fn_node)(bb);
    509   CLG_(push_cxt)( caller );
    510 
    511   if (!seen_before) {
    512     /* set rec array for source BBCC: this is at rec level 1 */
    513     source_bbcc->rec_array = new_recursion(caller->separate_recursions);
    514     source_bbcc->rec_array[0] = source_bbcc;
    515 
    516     CLG_ASSERT(source_bbcc->cxt == 0);
    517     source_bbcc->cxt = CLG_(current_state).cxt;
    518     insert_bbcc_into_hash(source_bbcc);
    519   }
    520   CLG_ASSERT(CLG_(current_state).bbcc);
    521 
    522   /* correct active counts */
    523   fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
    524   pactive = CLG_(get_fn_entry)(fn_number);
    525   (*pactive)--;
    526 
    527   /* This assertion is not correct for reentrant
    528    * signal handlers */
    529   /* CLG_ASSERT(*pactive == 0); */
    530 
    531   CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
    532   /* back to current context */
    533   CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
    534   CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
    535 		       (Addr)-1, False);
    536   call_entry_up =
    537     &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
    538   /* assume this call is lasting since last dump or
    539    * for a signal handler since it's call */
    540   if (CLG_(current_state).sig == 0)
    541     CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
    542 		    CLG_(get_current_thread)()->lastdump_cost );
    543   else
    544     CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
    545 }
    546 
    547 
    548 /*
    549  * Helper function called at start of each instrumented BB to setup
    550  * pointer to costs for current thread/context/recursion level
    551  */
    552 
    553 VG_REGPARM(1)
    554 void CLG_(setup_bbcc)(BB* bb)
    555 {
    556   BBCC *bbcc, *last_bbcc;
    557   Bool  call_emulation = False, delayed_push = False, skip = False;
    558   Addr sp;
    559   BB* last_bb;
    560   ThreadId tid;
    561   ClgJumpKind jmpkind;
    562   Bool isConditionalJump;
    563   Int passed = 0, csp;
    564   Bool ret_without_call = False;
    565   Int popcount_on_return = 1;
    566 
    567   CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb));
    568 
    569   /* This is needed because thread switches can not reliable be tracked
    570    * with callback CLG_(run_thread) only: we have otherwise no way to get
    571    * the thread ID after a signal handler returns.
    572    * This could be removed again if that bug is fixed in Valgrind.
    573    * This is in the hot path but hopefully not to costly.
    574    */
    575   tid = VG_(get_running_tid)();
    576 #if 1
    577   /* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid).
    578    * As this is on the hot path, we only call CLG_(switch_thread)(tid)
    579    * if tid differs from the CLG_(current_tid).
    580    */
    581   if (UNLIKELY(tid != CLG_(current_tid)))
    582      CLG_(switch_thread)(tid);
    583 #else
    584   CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
    585 #endif
    586 
    587   sp = VG_(get_SP)(tid);
    588   last_bbcc = CLG_(current_state).bbcc;
    589   last_bb = last_bbcc ? last_bbcc->bb : 0;
    590 
    591   if (last_bb) {
    592       passed = CLG_(current_state).jmps_passed;
    593       CLG_ASSERT(passed <= last_bb->cjmp_count);
    594       jmpkind = last_bb->jmp[passed].jmpkind;
    595       isConditionalJump = (passed < last_bb->cjmp_count);
    596 
    597       if (CLG_(current_state).collect) {
    598 	if (!CLG_(current_state).nonskipped) {
    599 	  last_bbcc->ecounter_sum++;
    600 	  last_bbcc->jmp[passed].ecounter++;
    601 	  if (!CLG_(clo).simulate_cache) {
    602 	      /* update Ir cost */
    603               UInt instr_count = last_bb->jmp[passed].instr+1;
    604               CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
    605 	  }
    606 	}
    607 	else {
    608 	  /* do not increment exe counter of BBs in skipped functions, as it
    609 	   * would fool dumping code */
    610 	  if (!CLG_(clo).simulate_cache) {
    611 	      /* update Ir cost */
    612               UInt instr_count = last_bb->jmp[passed].instr+1;
    613               CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
    614               CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ]
    615 		+= instr_count;
    616 	  }
    617 	}
    618       }
    619 
    620       CLG_DEBUGIF(4) {
    621 	  CLG_(print_execstate)(-2, &CLG_(current_state) );
    622 	  CLG_(print_bbcc_cost)(-2, last_bbcc);
    623       }
    624   }
    625   else {
    626       jmpkind = jk_None;
    627       isConditionalJump = False;
    628   }
    629 
    630   /* Manipulate JmpKind if needed, only using BB specific info */
    631 
    632   csp = CLG_(current_call_stack).sp;
    633 
    634   /* A return not matching the top call in our callstack is a jump */
    635   if ( (jmpkind == jk_Return) && (csp >0)) {
    636       Int csp_up = csp-1;
    637       call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
    638 
    639       /* We have a real return if
    640        * - the stack pointer (SP) left the current stack frame, or
    641        * - SP has the same value as when reaching the current function
    642        *   and the address of this BB is the return address of last call
    643        *   (we even allow to leave multiple frames if the SP stays the
    644        *    same and we find a matching return address)
    645        * The latter condition is needed because on PPC, SP can stay
    646        * the same over CALL=b(c)l / RET=b(c)lr boundaries
    647        */
    648       if (sp < top_ce->sp) popcount_on_return = 0;
    649       else if (top_ce->sp == sp) {
    650 	  while(1) {
    651 	      if (top_ce->ret_addr == bb_addr(bb)) break;
    652 	      if (csp_up>0) {
    653 		  csp_up--;
    654 		  top_ce = &(CLG_(current_call_stack).entry[csp_up]);
    655 		  if (top_ce->sp == sp) {
    656 		      popcount_on_return++;
    657 		      continue;
    658 		  }
    659 	      }
    660 	      popcount_on_return = 0;
    661 	      break;
    662 	  }
    663       }
    664       if (popcount_on_return == 0) {
    665 	  jmpkind = jk_Jump;
    666 	  ret_without_call = True;
    667       }
    668   }
    669 
    670   /* Should this jump be converted to call or pop/call ? */
    671   if (( jmpkind != jk_Return) &&
    672       ( jmpkind != jk_Call) && last_bb) {
    673 
    674     /* We simulate a JMP/Cont to be a CALL if
    675      * - jump is in another ELF object or section kind
    676      * - jump is to first instruction of a function (tail recursion)
    677      */
    678     if (ret_without_call ||
    679 	/* This is for detection of optimized tail recursion.
    680 	 * On PPC, this is only detected as call when going to another
    681 	 * function. The problem is that on PPC it can go wrong
    682 	 * more easily (no stack frame setup needed)
    683 	 */
    684 #if defined(VGA_ppc32)
    685 	(bb->is_entry && (last_bb->fn != bb->fn)) ||
    686 #else
    687 	bb->is_entry ||
    688 #endif
    689 	(last_bb->sect_kind != bb->sect_kind) ||
    690 	(last_bb->obj->number != bb->obj->number)) {
    691 
    692 	CLG_DEBUG(1,"     JMP: %s[%s] to %s[%s]%s!\n",
    693 		  last_bb->fn->name, last_bb->obj->name,
    694 		  bb->fn->name, bb->obj->name,
    695 		  ret_without_call?" (RET w/o CALL)":"");
    696 
    697 	if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
    698 
    699 	    call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
    700 
    701 	    if (top_ce->jcc) {
    702 
    703 		CLG_DEBUG(1,"     Pop on Jump!\n");
    704 
    705 		/* change source for delayed push */
    706 		CLG_(current_state).bbcc = top_ce->jcc->from;
    707 		sp = top_ce->sp;
    708 		passed = top_ce->jcc->jmp;
    709 		CLG_(pop_call_stack)();
    710 	    }
    711 	    else {
    712 		CLG_ASSERT(CLG_(current_state).nonskipped != 0);
    713 	    }
    714 	}
    715 
    716 	jmpkind = jk_Call;
    717 	call_emulation = True;
    718     }
    719   }
    720 
    721   if (jmpkind == jk_Call)
    722     skip = CLG_(get_fn_node)(bb)->skip;
    723 
    724   CLG_DEBUGIF(1) {
    725     if (isConditionalJump)
    726       VG_(printf)("Cond-");
    727     switch(jmpkind) {
    728     case jk_None:   VG_(printf)("Fall-through"); break;
    729     case jk_Jump:   VG_(printf)("Jump"); break;
    730     case jk_Call:   VG_(printf)("Call"); break;
    731     case jk_Return: VG_(printf)("Return"); break;
    732     default:        tl_assert(0);
    733     }
    734     VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
    735 		last_bb ? bb_jmpaddr(last_bb) : 0,
    736 		bb_addr(bb), sp);
    737   }
    738 
    739   /* Handle CALL/RET and update context to get correct BBCC */
    740 
    741   if (jmpkind == jk_Return) {
    742 
    743     if ((csp == 0) ||
    744 	((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
    745 	 ( *(CLG_(current_fn_stack).top-1)==0)) ) {
    746 
    747       /* On an empty call stack or at a signal separation marker,
    748        * a RETURN generates an call stack underflow.
    749        */
    750       handleUnderflow(bb);
    751       CLG_(pop_call_stack)();
    752     }
    753     else {
    754 	CLG_ASSERT(popcount_on_return >0);
    755 	CLG_(unwind_call_stack)(sp, popcount_on_return);
    756     }
    757   }
    758   else {
    759     Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
    760     if (unwind_count > 0) {
    761       /* if unwinding was done, this actually is a return */
    762       jmpkind = jk_Return;
    763     }
    764 
    765     if (jmpkind == jk_Call) {
    766       delayed_push = True;
    767 
    768       csp = CLG_(current_call_stack).sp;
    769       if (call_emulation && csp>0)
    770 	sp = CLG_(current_call_stack).entry[csp-1].sp;
    771 
    772     }
    773   }
    774 
    775   /* Change new context if needed, taking delayed_push into account */
    776   if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
    777     CLG_(push_cxt)(CLG_(get_fn_node)(bb));
    778   }
    779   CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
    780 
    781   /* If there is a fresh instrumented BBCC, assign current context */
    782   bbcc = CLG_(get_bbcc)(bb);
    783   if (bbcc->cxt == 0) {
    784     CLG_ASSERT(bbcc->rec_array == 0);
    785 
    786     bbcc->cxt = CLG_(current_state).cxt;
    787     bbcc->rec_array =
    788       new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
    789     bbcc->rec_array[0] = bbcc;
    790 
    791     insert_bbcc_into_hash(bbcc);
    792   }
    793   else {
    794     /* get BBCC with current context */
    795 
    796     /* first check LRU of last bbcc executed */
    797 
    798     if (last_bbcc) {
    799       bbcc = last_bbcc->lru_next_bbcc;
    800       if (bbcc &&
    801 	  ((bbcc->bb != bb) ||
    802 	   (bbcc->cxt != CLG_(current_state).cxt)))
    803 	bbcc = 0;
    804     }
    805     else
    806       bbcc = 0;
    807 
    808     if (!bbcc)
    809       bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
    810     if (!bbcc)
    811       bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
    812 
    813     bb->last_bbcc = bbcc;
    814   }
    815 
    816   /* save for fast lookup */
    817   if (last_bbcc)
    818     last_bbcc->lru_next_bbcc = bbcc;
    819 
    820   if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
    821     UInt level, idx;
    822     fn_node* top = *(CLG_(current_fn_stack).top);
    823 
    824     level = *CLG_(get_fn_entry)(top->number);
    825 
    826     if (delayed_push && !skip) {
    827       if (CLG_(clo).skip_direct_recursion) {
    828         /* a call was detected, which means that the source BB != 0 */
    829 	CLG_ASSERT(CLG_(current_state).bbcc != 0);
    830 	/* only increment rec. level if called from different function */
    831 	if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])
    832 	  level++;
    833       }
    834       else level++;
    835     }
    836     if (level> top->separate_recursions)
    837       level = top->separate_recursions;
    838 
    839     if (level == 0) {
    840       /* can only happen if instrumentation just was switched on */
    841       level = 1;
    842       *CLG_(get_fn_entry)(top->number) = 1;
    843     }
    844 
    845     idx = level -1;
    846     if (bbcc->rec_array[idx])
    847       bbcc = bbcc->rec_array[idx];
    848     else
    849       bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
    850 
    851     CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
    852   }
    853 
    854   if (delayed_push) {
    855     if (!skip && CLG_(current_state).nonskipped) {
    856       /* a call from skipped to nonskipped */
    857       CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
    858       /* FIXME: take the real passed count from shadow stack */
    859       passed = CLG_(current_state).bbcc->bb->cjmp_count;
    860     }
    861     CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
    862 			 bbcc, sp, skip);
    863   }
    864 
    865   if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
    866 
    867     /* Handle conditional jumps followed, i.e. trace arcs
    868      * This uses JCC structures, too */
    869 
    870     jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
    871     CLG_ASSERT(jcc != 0);
    872     // Change from default, and check if already changed
    873     if (jcc->jmpkind == jk_Call)
    874       jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
    875     else {
    876 	// FIXME: Why can this fail?
    877 	// CLG_ASSERT(jcc->jmpkind == jmpkind);
    878     }
    879 
    880     jcc->call_counter++;
    881     if (isConditionalJump)
    882       CLG_(stat).jcnd_counter++;
    883     else
    884       CLG_(stat).jump_counter++;
    885   }
    886 
    887   CLG_(current_state).bbcc = bbcc;
    888   /* Even though this will be set in instrumented code directly before
    889    * side exits, it needs to be set to 0 here in case an exception
    890    * happens in first instructions of the BB */
    891   CLG_(current_state).jmps_passed = 0;
    892   // needed for log_* handlers called in this BB
    893   CLG_(bb_base)   = bb->obj->offset + bb->offset;
    894   CLG_(cost_base) = bbcc->cost;
    895 
    896   CLG_DEBUGIF(1) {
    897     VG_(printf)("     ");
    898     CLG_(print_bbcc_fn)(bbcc);
    899     VG_(printf)("\n");
    900   }
    901 
    902   CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %d), Instrs %d (Len %d)\n",
    903 	   bb_addr(bb), bbcc->cost, bb->cost_count,
    904 	   bb->instr_count, bb->instr_len);
    905   CLG_DEBUGIF(3)
    906     CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
    907   CLG_DEBUG(3,"\n");
    908 
    909   CLG_(stat).bb_executions++;
    910 }
    911