Home | History | Annotate | Download | only in callgrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Callgrind                                                    ---*/
      3 /*---                                                       dump.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Callgrind, a Valgrind tool for call tracing.
      8 
      9    Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de)
     10 
     11    This program is free software; you can redistribute it and/or
     12    modify it under the terms of the GNU General Public License as
     13    published by the Free Software Foundation; either version 2 of the
     14    License, or (at your option) any later version.
     15 
     16    This program is distributed in the hope that it will be useful, but
     17    WITHOUT ANY WARRANTY; without even the implied warranty of
     18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19    General Public License for more details.
     20 
     21    You should have received a copy of the GNU General Public License
     22    along with this program; if not, write to the Free Software
     23    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     24    02111-1307, USA.
     25 
     26    The GNU General Public License is contained in the file COPYING.
     27 */
     28 
     29 #include "config.h"
     30 #include "global.h"
     31 
     32 #include <pub_tool_threadstate.h>
     33 #include <pub_tool_libcfile.h>
     34 
     35 
     36 /* Dump Part Counter */
     37 static Int out_counter = 0;
     38 
     39 static Char* out_file = 0;
     40 static Char* out_directory = 0;
     41 static Bool dumps_initialized = False;
     42 
     43 /* Command */
     44 static Char cmdbuf[BUF_LEN];
     45 
     46 /* Total reads/writes/misses sum over all dumps and threads.
     47  * Updated during CC traversal at dump time.
     48  */
     49 FullCost CLG_(total_cost) = 0;
     50 static FullCost dump_total_cost = 0;
     51 
     52 EventMapping* CLG_(dumpmap) = 0;
     53 
     54 /* Temporary output buffer for
     55  *  print_fn_pos, fprint_apos, fprint_fcost, fprint_jcc,
     56  *  fprint_fcc_ln, dump_run_info, dump_state_info
     57  */
     58 static Char outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN + COSTS_LEN];
     59 
     60 Int CLG_(get_dump_counter)(void)
     61 {
     62   return out_counter;
     63 }
     64 
     65 Char* CLG_(get_out_file)()
     66 {
     67     CLG_(init_dumps)();
     68     return out_file;
     69 }
     70 
     71 Char* CLG_(get_out_directory)()
     72 {
     73     CLG_(init_dumps)();
     74     return out_directory;
     75 }
     76 
     77 /*------------------------------------------------------------*/
     78 /*--- Output file related stuff                            ---*/
     79 /*------------------------------------------------------------*/
     80 
     81 /* Boolean dumping array */
     82 static Bool* dump_array = 0;
     83 static Int   dump_array_size = 0;
     84 static Bool* obj_dumped = 0;
     85 static Bool* file_dumped = 0;
     86 static Bool* fn_dumped = 0;
     87 static Bool* cxt_dumped = 0;
     88 
     89 static
     90 void reset_dump_array(void)
     91 {
     92     int i;
     93 
     94     CLG_ASSERT(dump_array != 0);
     95 
     96     for(i=0;i<dump_array_size;i++)
     97 	dump_array[i] = False;
     98 }
     99 
    100 static
    101 void init_dump_array(void)
    102 {
    103     dump_array_size = CLG_(stat).distinct_objs +
    104       CLG_(stat).distinct_files +
    105       CLG_(stat).distinct_fns +
    106       CLG_(stat).context_counter;
    107     CLG_ASSERT(dump_array == 0);
    108     dump_array = (Bool*) CLG_MALLOC("cl.dump.ida.1",
    109                                     dump_array_size * sizeof(Bool));
    110     obj_dumped  = dump_array;
    111     file_dumped = obj_dumped + CLG_(stat).distinct_objs;
    112     fn_dumped   = file_dumped + CLG_(stat).distinct_files;
    113     cxt_dumped  = fn_dumped + CLG_(stat).distinct_fns;
    114 
    115     reset_dump_array();
    116 
    117     CLG_DEBUG(1, "  init_dump_array: size %d\n", dump_array_size);
    118 }
    119 
    120 static __inline__
    121 void free_dump_array(void)
    122 {
    123     CLG_ASSERT(dump_array != 0);
    124     VG_(free)(dump_array);
    125 
    126     dump_array = 0;
    127     obj_dumped = 0;
    128     file_dumped = 0;
    129     fn_dumped = 0;
    130     cxt_dumped = 0;
    131 }
    132 
    133 
    134 /* Initialize to an invalid position */
    135 static __inline__
    136 void init_fpos(FnPos* p)
    137  {
    138     p->file = 0;
    139     p->fn = 0;
    140     p->obj = 0;
    141     p->cxt = 0;
    142     p->rec_index = 0;
    143 }
    144 
    145 
    146 #if 0
    147 static __inline__
    148 static void my_fwrite(Int fd, Char* buf, Int len)
    149 {
    150 	VG_(write)(fd, (void*)buf, len);
    151 }
    152 #else
    153 
    154 #define FWRITE_BUFSIZE 32000
    155 #define FWRITE_THROUGH 10000
    156 static Char fwrite_buf[FWRITE_BUFSIZE];
    157 static Int fwrite_pos;
    158 static Int fwrite_fd = -1;
    159 
    160 static __inline__
    161 void fwrite_flush(void)
    162 {
    163     if ((fwrite_fd>=0) && (fwrite_pos>0))
    164 	VG_(write)(fwrite_fd, (void*)fwrite_buf, fwrite_pos);
    165     fwrite_pos = 0;
    166 }
    167 
    168 static void my_fwrite(Int fd, Char* buf, Int len)
    169 {
    170     if (fwrite_fd != fd) {
    171 	fwrite_flush();
    172 	fwrite_fd = fd;
    173     }
    174     if (len > FWRITE_THROUGH) {
    175 	fwrite_flush();
    176 	VG_(write)(fd, (void*)buf, len);
    177 	return;
    178     }
    179     if (FWRITE_BUFSIZE - fwrite_pos <= len) fwrite_flush();
    180     VG_(strncpy)(fwrite_buf + fwrite_pos, buf, len);
    181     fwrite_pos += len;
    182 }
    183 #endif
    184 
    185 
    186 static void print_obj(Char* buf, obj_node* obj)
    187 {
    188     //int n;
    189 
    190     if (CLG_(clo).compress_strings) {
    191 	CLG_ASSERT(obj_dumped != 0);
    192 	if (obj_dumped[obj->number])
    193 	    /*n =*/ VG_(sprintf)(buf, "(%d)\n", obj->number);
    194 	else {
    195 	    /*n =*/ VG_(sprintf)(buf, "(%d) %s\n",
    196 			     obj->number, obj->name);
    197 	}
    198     }
    199     else
    200 	/*n =*/ VG_(sprintf)(buf, "%s\n", obj->name);
    201 
    202 #if 0
    203     /* add mapping parameters the first time a object is dumped
    204      * format: mp=0xSTART SIZE 0xOFFSET */
    205     if (!obj_dumped[obj->number]) {
    206 	obj_dumped[obj->number];
    207 	VG_(sprintf)(buf+n, "mp=%p %p %p\n",
    208 		     pos->obj->start, pos->obj->size, pos->obj->offset);
    209     }
    210 #else
    211     obj_dumped[obj->number] = True;
    212 #endif
    213 }
    214 
    215 static void print_file(Char* buf, file_node* file)
    216 {
    217     if (CLG_(clo).compress_strings) {
    218 	CLG_ASSERT(file_dumped != 0);
    219 	if (file_dumped[file->number])
    220 	    VG_(sprintf)(buf, "(%d)\n", file->number);
    221 	else {
    222 	    VG_(sprintf)(buf, "(%d) %s\n",
    223 			 file->number, file->name);
    224 	    file_dumped[file->number] = True;
    225 	}
    226     }
    227     else
    228 	VG_(sprintf)(buf, "%s\n", file->name);
    229 }
    230 
    231 /*
    232  * tag can be "fn", "cfn", "jfn"
    233  */
    234 static void print_fn(Int fd, Char* buf, Char* tag, fn_node* fn)
    235 {
    236     int p;
    237     p = VG_(sprintf)(buf, "%s=",tag);
    238     if (CLG_(clo).compress_strings) {
    239 	CLG_ASSERT(fn_dumped != 0);
    240 	if (fn_dumped[fn->number])
    241 	    p += VG_(sprintf)(buf+p, "(%d)\n", fn->number);
    242 	else {
    243 	    p += VG_(sprintf)(buf+p, "(%d) %s\n",
    244 			      fn->number, fn->name);
    245 	    fn_dumped[fn->number] = True;
    246 	}
    247     }
    248     else
    249 	p += VG_(sprintf)(buf+p, "%s\n", fn->name);
    250 
    251     my_fwrite(fd, buf, p);
    252 }
    253 
    254 static void print_mangled_fn(Int fd, Char* buf, Char* tag,
    255 			     Context* cxt, int rec_index)
    256 {
    257     int p, i;
    258 
    259     if (CLG_(clo).compress_strings && CLG_(clo).compress_mangled) {
    260 
    261 	int n;
    262 	Context* last;
    263 
    264 	CLG_ASSERT(cxt_dumped != 0);
    265 	if (cxt_dumped[cxt->base_number+rec_index]) {
    266 	    p = VG_(sprintf)(buf, "%s=(%d)\n",
    267 			     tag, cxt->base_number + rec_index);
    268 	    my_fwrite(fd, buf, p);
    269 	    return;
    270 	}
    271 
    272 	last = 0;
    273 	/* make sure that for all context parts compressed data is written */
    274 	for(i=cxt->size;i>0;i--) {
    275 	    CLG_ASSERT(cxt->fn[i-1]->pure_cxt != 0);
    276 	    n = cxt->fn[i-1]->pure_cxt->base_number;
    277 	    if (cxt_dumped[n]) continue;
    278 	    p = VG_(sprintf)(buf, "%s=(%d) %s\n",
    279 			     tag, n, cxt->fn[i-1]->name);
    280 	    my_fwrite(fd, buf, p);
    281 
    282 	    cxt_dumped[n] = True;
    283 	    last = cxt->fn[i-1]->pure_cxt;
    284 	}
    285 	/* If the last context was the context to print, we are finished */
    286 	if ((last == cxt) && (rec_index == 0)) return;
    287 
    288 	p = VG_(sprintf)(buf, "%s=(%d) (%d)", tag,
    289 			 cxt->base_number + rec_index,
    290 			 cxt->fn[0]->pure_cxt->base_number);
    291 	if (rec_index >0)
    292 	    p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
    293 	for(i=1;i<cxt->size;i++)
    294 	    p += VG_(sprintf)(buf+p, "'(%d)",
    295 			      cxt->fn[i]->pure_cxt->base_number);
    296 	p += VG_(sprintf)(buf+p, "\n");
    297 	my_fwrite(fd, buf, p);
    298 
    299 	cxt_dumped[cxt->base_number+rec_index] = True;
    300 	return;
    301     }
    302 
    303 
    304     p = VG_(sprintf)(buf, "%s=", tag);
    305     if (CLG_(clo).compress_strings) {
    306 	CLG_ASSERT(cxt_dumped != 0);
    307 	if (cxt_dumped[cxt->base_number+rec_index]) {
    308 	    p += VG_(sprintf)(buf+p, "(%d)\n", cxt->base_number + rec_index);
    309 	    my_fwrite(fd, buf, p);
    310 	    return;
    311 	}
    312 	else {
    313 	    p += VG_(sprintf)(buf+p, "(%d) ", cxt->base_number + rec_index);
    314 	    cxt_dumped[cxt->base_number+rec_index] = True;
    315 	}
    316     }
    317 
    318     p += VG_(sprintf)(buf+p, "%s", cxt->fn[0]->name);
    319     if (rec_index >0)
    320 	p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
    321     for(i=1;i<cxt->size;i++)
    322 	p += VG_(sprintf)(buf+p, "'%s", cxt->fn[i]->name);
    323 
    324     p += VG_(sprintf)(buf+p, "\n");
    325     my_fwrite(fd, buf, p);
    326 }
    327 
    328 
    329 
    330 /**
    331  * Print function position of the BBCC, but only print info differing to
    332  * the <last> position, update <last>
    333  * Return True if something changes.
    334  */
    335 static Bool print_fn_pos(int fd, FnPos* last, BBCC* bbcc)
    336 {
    337     Bool res = False;
    338 
    339     CLG_DEBUGIF(3) {
    340 	CLG_DEBUG(2, "+ print_fn_pos: ");
    341 	CLG_(print_cxt)(16, bbcc->cxt, bbcc->rec_index);
    342     }
    343 
    344     if (!CLG_(clo).mangle_names) {
    345 	if (last->rec_index != bbcc->rec_index) {
    346 	    VG_(sprintf)(outbuf, "rec=%d\n\n", bbcc->rec_index);
    347 	    my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    348 	    last->rec_index = bbcc->rec_index;
    349 	    last->cxt = 0; /* reprint context */
    350 	    res = True;
    351 	}
    352 
    353 	if (last->cxt != bbcc->cxt) {
    354 	    fn_node* last_from = (last->cxt && last->cxt->size>1) ?
    355 				 last->cxt->fn[1] : 0;
    356 	    fn_node* curr_from = (bbcc->cxt && bbcc->cxt->size>1) ?
    357 				 bbcc->cxt->fn[1] : 0;
    358 	    if (curr_from == 0) {
    359 		if (last_from != 0) {
    360 		    /* switch back to no context */
    361 		    VG_(sprintf)(outbuf, "frfn=(spontaneous)\n");
    362 		    my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    363 		    res = True;
    364 		}
    365 	    }
    366 	    else if (last_from != curr_from) {
    367 		print_fn(fd,outbuf,"frfn", curr_from);
    368 		res = True;
    369 	    }
    370 	    last->cxt = bbcc->cxt;
    371 	}
    372     }
    373 
    374     if (last->obj != bbcc->cxt->fn[0]->file->obj) {
    375 	VG_(sprintf)(outbuf, "ob=");
    376 	print_obj(outbuf+3, bbcc->cxt->fn[0]->file->obj);
    377 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    378 	last->obj = bbcc->cxt->fn[0]->file->obj;
    379 	res = True;
    380     }
    381 
    382     if (last->file != bbcc->cxt->fn[0]->file) {
    383 	VG_(sprintf)(outbuf, "fl=");
    384 	print_file(outbuf+3, bbcc->cxt->fn[0]->file);
    385 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    386 	last->file = bbcc->cxt->fn[0]->file;
    387 	res = True;
    388     }
    389 
    390     if (!CLG_(clo).mangle_names) {
    391 	if (last->fn != bbcc->cxt->fn[0]) {
    392 	    print_fn(fd,outbuf, "fn", bbcc->cxt->fn[0]);
    393 	    last->fn = bbcc->cxt->fn[0];
    394 	    res = True;
    395 	}
    396     }
    397     else {
    398 	/* Print mangled name if context or rec_index changes */
    399 	if ((last->rec_index != bbcc->rec_index) ||
    400 	    (last->cxt != bbcc->cxt)) {
    401 
    402 	    print_mangled_fn(fd, outbuf, "fn", bbcc->cxt, bbcc->rec_index);
    403 	    last->fn = bbcc->cxt->fn[0];
    404 	    last->rec_index = bbcc->rec_index;
    405 	    res = True;
    406 	}
    407     }
    408 
    409     last->cxt = bbcc->cxt;
    410 
    411     CLG_DEBUG(2, "- print_fn_pos: %s\n", res ? "changed" : "");
    412 
    413     return res;
    414 }
    415 
    416 /* the debug lookup cache is useful if BBCC for same BB are
    417  * dumped directly in a row. This is a direct mapped cache.
    418  */
    419 #define DEBUG_CACHE_SIZE 1777
    420 
    421 static Addr       debug_cache_addr[DEBUG_CACHE_SIZE];
    422 static file_node* debug_cache_file[DEBUG_CACHE_SIZE];
    423 static int        debug_cache_line[DEBUG_CACHE_SIZE];
    424 static Bool       debug_cache_info[DEBUG_CACHE_SIZE];
    425 
    426 static __inline__
    427 void init_debug_cache(void)
    428 {
    429     int i;
    430     for(i=0;i<DEBUG_CACHE_SIZE;i++) {
    431 	debug_cache_addr[i] = 0;
    432 	debug_cache_file[i] = 0;
    433 	debug_cache_line[i] = 0;
    434 	debug_cache_info[i] = 0;
    435     }
    436 }
    437 
    438 static /* __inline__ */
    439 Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
    440 {
    441     Char file[FILENAME_LEN];
    442     Char dir[FILENAME_LEN];
    443     Bool found_file_line, found_dirname;
    444 
    445     int cachepos = addr % DEBUG_CACHE_SIZE;
    446 
    447     if (debug_cache_addr[cachepos] == addr) {
    448 	p->line = debug_cache_line[cachepos];
    449 	p->file = debug_cache_file[cachepos];
    450 	found_file_line = debug_cache_info[cachepos];
    451     }
    452     else {
    453 	found_file_line = VG_(get_filename_linenum)(addr,
    454 						    file, FILENAME_LEN,
    455 						    dir, FILENAME_LEN,
    456 						    &found_dirname,
    457 						    &(p->line));
    458 	if (!found_file_line) {
    459 	    VG_(strcpy)(file, "???");
    460 	    p->line = 0;
    461 	}
    462 	if (found_dirname) {
    463 	    // +1 for the '/'.
    464 	    CLG_ASSERT(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILENAME_LEN);
    465 	    VG_(strcat)(dir, "/");     // Append '/'
    466 	    VG_(strcat)(dir, file);    // Append file to dir
    467 	    VG_(strcpy)(file, dir);    // Move dir+file to file
    468 	}
    469 	p->file    = CLG_(get_file_node)(bbcc->bb->obj, file);
    470 
    471 	debug_cache_info[cachepos] = found_file_line;
    472 	debug_cache_addr[cachepos] = addr;
    473 	debug_cache_line[cachepos] = p->line;
    474 	debug_cache_file[cachepos] = p->file;
    475     }
    476 
    477     /* Address offset from bbcc start address */
    478     p->addr = addr - bbcc->bb->obj->offset;
    479     p->bb_addr = bbcc->bb->offset;
    480 
    481     CLG_DEBUG(3, "  get_debug_pos(%#lx): BB %#lx, fn '%s', file '%s', line %u\n",
    482 	     addr, bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name,
    483 	     p->file->name, p->line);
    484 
    485     return found_file_line;
    486 }
    487 
    488 
    489 /* copy file position and init cost */
    490 static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file)
    491 {
    492     p->addr    = addr;
    493     p->bb_addr = bbaddr;
    494     p->file    = file;
    495     p->line    = 0;
    496 }
    497 
    498 static void copy_apos(AddrPos* dst, AddrPos* src)
    499 {
    500     dst->addr    = src->addr;
    501     dst->bb_addr = src->bb_addr;
    502     dst->file    = src->file;
    503     dst->line    = src->line;
    504 }
    505 
    506 /* copy file position and init cost */
    507 static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file)
    508 {
    509     init_apos( &(c->p), addr, bbaddr, file);
    510     /* FIXME: This is a memory leak as a AddrCost is inited multiple times */
    511     c->cost = CLG_(get_eventset_cost)( CLG_(sets).full );
    512     CLG_(init_cost)( CLG_(sets).full, c->cost );
    513 }
    514 
    515 
    516 /**
    517  * print position change inside of a BB (last -> curr)
    518  * this doesn't update last to curr!
    519  */
    520 static void fprint_apos(Int fd, AddrPos* curr, AddrPos* last, file_node* func_file)
    521 {
    522     CLG_ASSERT(curr->file != 0);
    523     CLG_DEBUG(2, "    print_apos(file '%s', line %d, bb %#lx, addr %#lx) fnFile '%s'\n",
    524 	     curr->file->name, curr->line, curr->bb_addr, curr->addr,
    525 	     func_file->name);
    526 
    527     if (curr->file != last->file) {
    528 
    529 	/* if we switch back to orig file, use fe=... */
    530 	if (curr->file == func_file)
    531 	    VG_(sprintf)(outbuf, "fe=");
    532 	else
    533 	    VG_(sprintf)(outbuf, "fi=");
    534 	print_file(outbuf+3, curr->file);
    535 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    536     }
    537 
    538     if (CLG_(clo).dump_bbs) {
    539 	if (curr->line != last->line) {
    540 	    VG_(sprintf)(outbuf, "ln=%d\n", curr->line);
    541 	    my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    542 	}
    543     }
    544 }
    545 
    546 
    547 
    548 /**
    549  * Print a position.
    550  * This prints out differences if allowed
    551  *
    552  * This doesn't set last to curr afterwards!
    553  */
    554 static
    555 void fprint_pos(Int fd, AddrPos* curr, AddrPos* last)
    556 {
    557     if (0) //CLG_(clo).dump_bbs)
    558 	VG_(sprintf)(outbuf, "%lu ", curr->addr - curr->bb_addr);
    559     else {
    560 	int p = 0;
    561 	if (CLG_(clo).dump_instr) {
    562 	    int diff = curr->addr - last->addr;
    563 	    if ( CLG_(clo).compress_pos && (last->addr >0) &&
    564 		 (diff > -100) && (diff < 100)) {
    565 		if (diff >0)
    566 		    p = VG_(sprintf)(outbuf, "+%d ", diff);
    567 		else if (diff==0)
    568 		    p = VG_(sprintf)(outbuf, "* ");
    569 	        else
    570 		    p = VG_(sprintf)(outbuf, "%d ", diff);
    571 	    }
    572 	    else
    573 		p = VG_(sprintf)(outbuf, "%#lx ", curr->addr);
    574 	}
    575 
    576 	if (CLG_(clo).dump_bb) {
    577 	    int diff = curr->bb_addr - last->bb_addr;
    578 	    if ( CLG_(clo).compress_pos && (last->bb_addr >0) &&
    579 		 (diff > -100) && (diff < 100)) {
    580 		if (diff >0)
    581 		    p += VG_(sprintf)(outbuf+p, "+%d ", diff);
    582 		else if (diff==0)
    583 		    p += VG_(sprintf)(outbuf+p, "* ");
    584 	        else
    585 		    p += VG_(sprintf)(outbuf+p, "%d ", diff);
    586 	    }
    587 	    else
    588 		p += VG_(sprintf)(outbuf+p, "%#lx ", curr->bb_addr);
    589 	}
    590 
    591 	if (CLG_(clo).dump_line) {
    592 	    int diff = curr->line - last->line;
    593 	    if ( CLG_(clo).compress_pos && (last->line >0) &&
    594 		 (diff > -100) && (diff < 100)) {
    595 
    596 		if (diff >0)
    597 		    VG_(sprintf)(outbuf+p, "+%d ", diff);
    598 		else if (diff==0)
    599 		    VG_(sprintf)(outbuf+p, "* ");
    600 	        else
    601 		    VG_(sprintf)(outbuf+p, "%d ", diff);
    602 	    }
    603 	    else
    604 		VG_(sprintf)(outbuf+p, "%u ", curr->line);
    605 	}
    606     }
    607     my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    608 }
    609 
    610 
    611 /**
    612  * Print events.
    613  */
    614 
    615 static
    616 void fprint_cost(int fd, EventMapping* es, ULong* cost)
    617 {
    618   int p = CLG_(sprint_mappingcost)(outbuf, es, cost);
    619   VG_(sprintf)(outbuf+p, "\n");
    620   my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    621   return;
    622 }
    623 
    624 
    625 
    626 /* Write the cost of a source line; only that parts of the source
    627  * position are written that changed relative to last written position.
    628  * funcPos is the source position of the first line of actual function.
    629  * Something is written only if cost != 0; returns True in this case.
    630  */
    631 static void fprint_fcost(Int fd, AddrCost* c, AddrPos* last)
    632 {
    633   CLG_DEBUGIF(3) {
    634     CLG_DEBUG(2, "   print_fcost(file '%s', line %d, bb %#lx, addr %#lx):\n",
    635 	     c->p.file->name, c->p.line, c->p.bb_addr, c->p.addr);
    636     CLG_(print_cost)(-5, CLG_(sets).full, c->cost);
    637   }
    638 
    639   fprint_pos(fd, &(c->p), last);
    640   copy_apos( last, &(c->p) ); /* update last to current position */
    641 
    642   fprint_cost(fd, CLG_(dumpmap), c->cost);
    643 
    644   /* add cost to total */
    645   CLG_(add_and_zero_cost)( CLG_(sets).full, dump_total_cost, c->cost );
    646 }
    647 
    648 
    649 /* Write out the calls from jcc (at pos)
    650  */
    651 static void fprint_jcc(Int fd, jCC* jcc, AddrPos* curr, AddrPos* last, ULong ecounter)
    652 {
    653     static AddrPos target;
    654     file_node* file;
    655     obj_node*  obj;
    656 
    657     CLG_DEBUGIF(2) {
    658       CLG_DEBUG(2, "   fprint_jcc (jkind %d)\n", jcc->jmpkind);
    659       CLG_(print_jcc)(-10, jcc);
    660     }
    661 
    662     if (!get_debug_pos(jcc->to, bb_addr(jcc->to->bb), &target)) {
    663 	/* if we don't have debug info, don't switch to file "???" */
    664 	target.file = last->file;
    665     }
    666 
    667     if (jcc->from &&
    668 	(jcc->jmpkind == JmpCond || jcc->jmpkind == Ijk_Boring)) {
    669 
    670       /* this is a JCC for a followed conditional or boring jump. */
    671       CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost));
    672 
    673       /* objects among jumps should be the same.
    674        * Otherwise this jump would have been changed to a call
    675        *  (see setup_bbcc)
    676        */
    677       CLG_ASSERT(jcc->from->bb->obj == jcc->to->bb->obj);
    678 
    679 	/* only print if target position info is usefull */
    680 	if (!CLG_(clo).dump_instr && !CLG_(clo).dump_bb && target.line==0) {
    681 	  jcc->call_counter = 0;
    682 	  return;
    683 	}
    684 
    685 	/* Different files/functions are possible e.g. with longjmp's
    686 	 * which change the stack, and thus context
    687 	 */
    688 	if (last->file != target.file) {
    689 	    VG_(sprintf)(outbuf, "jfi=");
    690 	    print_file(outbuf+4, target.file);
    691 	    my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    692 	}
    693 
    694 	if (jcc->from->cxt != jcc->to->cxt) {
    695 	    if (CLG_(clo).mangle_names)
    696 		print_mangled_fn(fd, outbuf, "jfn",
    697 				 jcc->to->cxt, jcc->to->rec_index);
    698 	    else
    699 		print_fn(fd, outbuf, "jfn", jcc->to->cxt->fn[0]);
    700 	}
    701 
    702 	if (jcc->jmpkind == JmpCond) {
    703 	    /* format: jcnd=<followed>/<executions> <target> */
    704 	    VG_(sprintf)(outbuf, "jcnd=%llu/%llu ",
    705 			 jcc->call_counter, ecounter);
    706 	}
    707 	else {
    708 	    /* format: jump=<jump count> <target> */
    709 	    VG_(sprintf)(outbuf, "jump=%llu ",
    710 			 jcc->call_counter);
    711 	}
    712 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    713 
    714 	fprint_pos(fd, &target, last);
    715 	my_fwrite(fd, "\n", 1);
    716 	fprint_pos(fd, curr, last);
    717 	my_fwrite(fd, "\n", 1);
    718 
    719 	jcc->call_counter = 0;
    720 	return;
    721     }
    722 
    723     CLG_ASSERT(jcc->to !=0);
    724 
    725     file = jcc->to->cxt->fn[0]->file;
    726     obj  = jcc->to->bb->obj;
    727 
    728     /* object of called position different to object of this function?*/
    729     if (jcc->from->cxt->fn[0]->file->obj != obj) {
    730 	VG_(sprintf)(outbuf, "cob=");
    731 	print_obj(outbuf+4, obj);
    732 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    733     }
    734 
    735     /* file of called position different to current file? */
    736     if (last->file != file) {
    737 	VG_(sprintf)(outbuf, "cfi=");
    738 	print_file(outbuf+4, file);
    739 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    740     }
    741 
    742     if (CLG_(clo).mangle_names)
    743 	print_mangled_fn(fd, outbuf, "cfn", jcc->to->cxt, jcc->to->rec_index);
    744     else
    745 	print_fn(fd, outbuf, "cfn", jcc->to->cxt->fn[0]);
    746 
    747     if (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)) {
    748       VG_(sprintf)(outbuf, "calls=%llu ",
    749 		   jcc->call_counter);
    750 	my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    751 
    752 	fprint_pos(fd, &target, last);
    753 	my_fwrite(fd, "\n", 1);
    754 	fprint_pos(fd, curr, last);
    755 	fprint_cost(fd, CLG_(dumpmap), jcc->cost);
    756 
    757 	CLG_(init_cost)( CLG_(sets).full, jcc->cost );
    758 
    759 	jcc->call_counter = 0;
    760     }
    761 }
    762 
    763 
    764 
    765 /* Cost summation of functions.We use alternately ccSum[0/1], thus
    766  * ssSum[currSum] for recently read lines with same line number.
    767  */
    768 static AddrCost ccSum[2];
    769 static int currSum;
    770 
    771 /*
    772  * Print all costs of a BBCC:
    773  * - FCCs of instructions
    774  * - JCCs of the unique jump of this BB
    775  * returns True if something was written
    776  */
    777 static Bool fprint_bbcc(Int fd, BBCC* bbcc, AddrPos* last)
    778 {
    779   InstrInfo* instr_info;
    780   ULong ecounter;
    781   Bool something_written = False;
    782   jCC* jcc;
    783   AddrCost *currCost, *newCost;
    784   Int jcc_count = 0, instr, i, jmp;
    785   BB* bb = bbcc->bb;
    786 
    787   CLG_ASSERT(bbcc->cxt != 0);
    788   CLG_DEBUGIF(1) {
    789     VG_(printf)("+ fprint_bbcc (Instr %d): ", bb->instr_count);
    790     CLG_(print_bbcc)(15, bbcc);
    791   }
    792 
    793   CLG_ASSERT(currSum == 0 || currSum == 1);
    794   currCost = &(ccSum[currSum]);
    795   newCost  = &(ccSum[1-currSum]);
    796 
    797   ecounter = bbcc->ecounter_sum;
    798   jmp = 0;
    799   instr_info = &(bb->instr[0]);
    800   for(instr=0; instr<bb->instr_count; instr++, instr_info++) {
    801 
    802     /* get debug info of current instruction address and dump cost
    803      * if CLG_(clo).dump_bbs or file/line has changed
    804      */
    805     if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset,
    806 		       &(newCost->p))) {
    807       /* if we don't have debug info, don't switch to file "???" */
    808       newCost->p.file = bbcc->cxt->fn[0]->file;
    809     }
    810 
    811     if (CLG_(clo).dump_bbs || CLG_(clo).dump_instr ||
    812 	(newCost->p.line != currCost->p.line) ||
    813 	(newCost->p.file != currCost->p.file)) {
    814 
    815       if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
    816 	something_written = True;
    817 
    818 	fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    819 	fprint_fcost(fd, currCost, last);
    820       }
    821 
    822       /* switch buffers */
    823       currSum = 1 - currSum;
    824       currCost = &(ccSum[currSum]);
    825       newCost  = &(ccSum[1-currSum]);
    826     }
    827 
    828     /* add line cost to current cost sum */
    829     (*CLG_(cachesim).add_icost)(currCost->cost, bbcc, instr_info, ecounter);
    830 
    831     /* print jcc's if there are: only jumps */
    832     if (bb->jmp[jmp].instr == instr) {
    833 	jcc_count=0;
    834 	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from)
    835 	    if (((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
    836 		(!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
    837 	      jcc_count++;
    838 
    839 	if (jcc_count>0) {
    840 	    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
    841 		/* no need to switch buffers, as position is the same */
    842 		fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    843 		fprint_fcost(fd, currCost, last);
    844 	    }
    845 	    get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p));
    846 	    fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    847 	    something_written = True;
    848 	    for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
    849 		if (((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
    850 		    (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
    851 		    fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
    852 	    }
    853 	}
    854     }
    855 
    856     /* update execution counter */
    857     if (jmp < bb->cjmp_count)
    858 	if (bb->jmp[jmp].instr == instr) {
    859 	    ecounter -= bbcc->jmp[jmp].ecounter;
    860 	    jmp++;
    861 	}
    862   }
    863 
    864   /* jCCs at end? If yes, dump cumulated line info first */
    865   jcc_count = 0;
    866   for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
    867       /* yes, if JCC only counts jmp arcs or cost >0 */
    868       if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
    869 	   (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
    870 	  jcc_count++;
    871   }
    872 
    873   if ( (bbcc->skipped &&
    874 	!CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
    875        (jcc_count>0) ) {
    876 
    877     if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
    878       /* no need to switch buffers, as position is the same */
    879       fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    880       fprint_fcost(fd, currCost, last);
    881     }
    882 
    883     get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p));
    884     fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    885     something_written = True;
    886 
    887     /* first, print skipped costs for calls */
    888     if (bbcc->skipped && !CLG_(is_zero_cost)( CLG_(sets).full,
    889 					     bbcc->skipped )) {
    890       CLG_(add_and_zero_cost)( CLG_(sets).full,
    891 			      currCost->cost, bbcc->skipped );
    892 #if 0
    893       VG_(sprintf)(outbuf, "# Skipped\n");
    894       my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
    895 #endif
    896       fprint_fcost(fd, currCost, last);
    897     }
    898 
    899     if (jcc_count > 0)
    900 	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
    901 	    CLG_ASSERT(jcc->jmp == jmp);
    902 	    if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
    903 		 (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
    904 
    905 		fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
    906 	}
    907   }
    908 
    909   if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) {
    910     if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
    911       something_written = True;
    912 
    913       fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
    914       fprint_fcost(fd, currCost, last);
    915     }
    916     if (CLG_(clo).dump_bbs) my_fwrite(fd, (void*)"\n", 1);
    917 
    918     /* when every cost was immediatly written, we must have done so,
    919      * as this function is only called when there's cost in a BBCC
    920      */
    921     CLG_ASSERT(something_written);
    922   }
    923 
    924   bbcc->ecounter_sum = 0;
    925   for(i=0; i<=bbcc->bb->cjmp_count; i++)
    926     bbcc->jmp[i].ecounter = 0;
    927   bbcc->ret_counter = 0;
    928 
    929   CLG_DEBUG(1, "- fprint_bbcc: JCCs %d\n", jcc_count);
    930 
    931   return something_written;
    932 }
    933 
    934 /* order by
    935  *  recursion,
    936  *  from->bb->obj, from->bb->fn
    937  *  obj, fn[0]->file, fn
    938  *  address
    939  */
    940 static int my_cmp(BBCC** pbbcc1, BBCC** pbbcc2)
    941 {
    942 #if 0
    943     return (*pbbcc1)->bb->offset - (*pbbcc2)->bb->offset;
    944 #else
    945     BBCC *bbcc1 = *pbbcc1;
    946     BBCC *bbcc2 = *pbbcc2;
    947     Context* cxt1 = bbcc1->cxt;
    948     Context* cxt2 = bbcc2->cxt;
    949     int off = 1;
    950 
    951     if (cxt1->fn[0]->file->obj != cxt2->fn[0]->file->obj)
    952 	return cxt1->fn[0]->file->obj - cxt2->fn[0]->file->obj;
    953 
    954     if (cxt1->fn[0]->file != cxt2->fn[0]->file)
    955 	return cxt1->fn[0]->file - cxt2->fn[0]->file;
    956 
    957     if (cxt1->fn[0] != cxt2->fn[0])
    958 	return cxt1->fn[0] - cxt2->fn[0];
    959 
    960     if (bbcc1->rec_index != bbcc2->rec_index)
    961 	return bbcc1->rec_index - bbcc2->rec_index;
    962 
    963     while((off < cxt1->size) && (off < cxt2->size)) {
    964 	fn_node* ffn1 = cxt1->fn[off];
    965 	fn_node* ffn2 = cxt2->fn[off];
    966 	if (ffn1->file->obj != ffn2->file->obj)
    967 	    return ffn1->file->obj - ffn2->file->obj;
    968 	if (ffn1 != ffn2)
    969 	    return ffn1 - ffn2;
    970 	off++;
    971     }
    972     if      (cxt1->size > cxt2->size) return 1;
    973     else if (cxt1->size < cxt2->size) return -1;
    974 
    975     return bbcc1->bb->offset - bbcc2->bb->offset;
    976 #endif
    977 }
    978 
    979 
    980 
    981 
    982 
    983 /* modified version of:
    984  *
    985  * qsort -- qsort interface implemented by faster quicksort.
    986  * J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
    987  * Copyright 1993, John Wiley.
    988 */
    989 
    990 static __inline__
    991 void swapfunc(BBCC** a, BBCC** b, int n)
    992 {
    993     while(n>0) {
    994 	BBCC* t = *a; *a = *b; *b = t;
    995 	a++, b++;
    996 	n--;
    997     }
    998 }
    999 
   1000 static __inline__
   1001 void swap(BBCC** a, BBCC** b)
   1002 {
   1003     BBCC* t;
   1004     t = *a; *a = *b; *b = t;
   1005 }
   1006 
   1007 #define min(x, y) ((x)<=(y) ? (x) : (y))
   1008 
   1009 static
   1010 BBCC** med3(BBCC **a, BBCC **b, BBCC **c, int (*cmp)(BBCC**,BBCC**))
   1011 {	return cmp(a, b) < 0 ?
   1012 		  (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
   1013 		: (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
   1014 }
   1015 
   1016 static BBCC** qsort_start = 0;
   1017 
   1018 static void qsort(BBCC **a, int n, int (*cmp)(BBCC**,BBCC**))
   1019 {
   1020 	BBCC **pa, **pb, **pc, **pd, **pl, **pm, **pn, **pv;
   1021 	int s, r;
   1022 	BBCC* v;
   1023 
   1024 	CLG_DEBUG(8, "  qsort(%ld,%ld)\n", a-qsort_start + 0L, n + 0L);
   1025 
   1026 	if (n < 7) {	 /* Insertion sort on smallest arrays */
   1027 		for (pm = a+1; pm < a+n; pm++)
   1028 			for (pl = pm; pl > a && cmp(pl-1, pl) > 0; pl --)
   1029 				swap(pl, pl-1);
   1030 
   1031 		CLG_DEBUGIF(8) {
   1032 		    for (pm = a; pm < a+n; pm++) {
   1033 			VG_(printf)("   %3ld BB %#lx, ",
   1034                                     pm - qsort_start + 0L,
   1035 				    bb_addr((*pm)->bb));
   1036 			CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
   1037 		    }
   1038 		}
   1039 		return;
   1040 	}
   1041 	pm = a + n/2;    /* Small arrays, middle element */
   1042 	if (n > 7) {
   1043 		pl = a;
   1044 		pn = a + (n-1);
   1045 		if (n > 40) {    /* Big arrays, pseudomedian of 9 */
   1046 			s = n/8;
   1047 			pl = med3(pl, pl+s, pl+2*s, cmp);
   1048 			pm = med3(pm-s, pm, pm+s, cmp);
   1049 			pn = med3(pn-2*s, pn-s, pn, cmp);
   1050 		}
   1051 		pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
   1052 	}
   1053 
   1054 
   1055 	v = *pm;
   1056 	pv = &v;
   1057 	pa = pb = a;
   1058 	pc = pd = a + (n-1);
   1059 	for (;;) {
   1060 		while ((pb <= pc) && ((r=cmp(pb, pv)) <= 0)) {
   1061 		    if (r==0) {
   1062 			/* same as pivot, to start */
   1063 			swap(pa,pb); pa++;
   1064 		    }
   1065 		    pb ++;
   1066 		}
   1067 		while ((pb <= pc) && ((r=cmp(pc, pv)) >= 0)) {
   1068 		    if (r==0) {
   1069 			/* same as pivot, to end */
   1070 			swap(pc,pd); pd--;
   1071 		    }
   1072 		    pc --;
   1073 		}
   1074 		if (pb > pc) { break; }
   1075 		swap(pb, pc);
   1076 		pb ++;
   1077 		pc --;
   1078 	}
   1079 	pb--;
   1080 	pc++;
   1081 
   1082 	/* put pivot from start into middle */
   1083 	if ((s = pa-a)>0) { for(r=0;r<s;r++) swap(a+r, pb+1-s+r); }
   1084 	/* put pivot from end into middle */
   1085 	if ((s = a+n-1-pd)>0) { for(r=0;r<s;r++) swap(pc+r, a+n-s+r); }
   1086 
   1087 	CLG_DEBUGIF(8) {
   1088 	  VG_(printf)("   PV BB %#lx, ", bb_addr((*pv)->bb));
   1089 	    CLG_(print_cxt)(9, (*pv)->cxt, (*pv)->rec_index);
   1090 
   1091 	    s = pb-pa+1;
   1092 	    VG_(printf)("    Lower %ld - %ld:\n",
   1093                         a-qsort_start + 0L,
   1094                         a+s-1-qsort_start + 0L);
   1095 	    for (r=0;r<s;r++) {
   1096 		pm = a+r;
   1097 		VG_(printf)("     %3ld BB %#lx, ",
   1098 			    pm-qsort_start + 0L,
   1099                             bb_addr((*pm)->bb));
   1100 		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
   1101 	    }
   1102 
   1103 	    s = pd-pc+1;
   1104 	    VG_(printf)("    Upper %ld - %ld:\n",
   1105 			a+n-s-qsort_start + 0L,
   1106                         a+n-1-qsort_start + 0L);
   1107 	    for (r=0;r<s;r++) {
   1108 		pm = a+n-s+r;
   1109 		VG_(printf)("     %3ld BB %#lx, ",
   1110 			    pm-qsort_start + 0L,
   1111                             bb_addr((*pm)->bb));
   1112 		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
   1113 	    }
   1114 	}
   1115 
   1116 	if ((s = pb+1-pa) > 1) qsort(a,     s, cmp);
   1117 	if ((s = pd+1-pc) > 1) qsort(a+n-s, s, cmp);
   1118 }
   1119 
   1120 
   1121 /* Helpers for prepare_dump */
   1122 
   1123 static Int    prepare_count;
   1124 static BBCC** prepare_ptr;
   1125 
   1126 
   1127 static void hash_addCount(BBCC* bbcc)
   1128 {
   1129   if ((bbcc->ecounter_sum > 0) || (bbcc->ret_counter>0))
   1130     prepare_count++;
   1131 }
   1132 
   1133 static void hash_addPtr(BBCC* bbcc)
   1134 {
   1135   if ((bbcc->ecounter_sum == 0) &&
   1136       (bbcc->ret_counter == 0)) return;
   1137 
   1138   *prepare_ptr = bbcc;
   1139   prepare_ptr++;
   1140 }
   1141 
   1142 
   1143 static void cs_addCount(thread_info* ti)
   1144 {
   1145   Int i;
   1146   BBCC* bbcc;
   1147 
   1148   /* add BBCCs with active call in call stack of current thread.
   1149    * update cost sums for active calls
   1150    */
   1151 
   1152   for(i = 0; i < CLG_(current_call_stack).sp; i++) {
   1153     call_entry* e = &(CLG_(current_call_stack).entry[i]);
   1154     if (e->jcc == 0) continue;
   1155 
   1156     CLG_(add_diff_cost_lz)( CLG_(sets).full, &(e->jcc->cost),
   1157 			   e->enter_cost, CLG_(current_state).cost);
   1158     bbcc = e->jcc->from;
   1159 
   1160     CLG_DEBUG(1, " [%2d] (tid %d), added active: %s\n",
   1161 	     i,CLG_(current_tid),bbcc->cxt->fn[0]->name);
   1162 
   1163     if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
   1164       /* already counted */
   1165       continue;
   1166     }
   1167     prepare_count++;
   1168   }
   1169 }
   1170 
   1171 static void cs_addPtr(thread_info* ti)
   1172 {
   1173   Int i;
   1174   BBCC* bbcc;
   1175 
   1176   /* add BBCCs with active call in call stack of current thread.
   1177    * update cost sums for active calls
   1178    */
   1179 
   1180   for(i = 0; i < CLG_(current_call_stack).sp; i++) {
   1181     call_entry* e = &(CLG_(current_call_stack).entry[i]);
   1182     if (e->jcc == 0) continue;
   1183 
   1184     bbcc = e->jcc->from;
   1185 
   1186     if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
   1187       /* already counted */
   1188       continue;
   1189     }
   1190 
   1191     *prepare_ptr = bbcc;
   1192     prepare_ptr++;
   1193   }
   1194 }
   1195 
   1196 
   1197 /**
   1198  * Put all BBCCs with costs into a sorted array.
   1199  * The returned arrays ends with a null pointer.
   1200  * Must be freed after dumping.
   1201  */
   1202 static
   1203 BBCC** prepare_dump(void)
   1204 {
   1205     BBCC **array;
   1206 
   1207     prepare_count = 0;
   1208 
   1209     /* if we do not separate among threads, this gives all */
   1210     /* count number of BBCCs with >0 executions */
   1211     CLG_(forall_bbccs)(hash_addCount);
   1212 
   1213     /* even if we do not separate among threads,
   1214      * call stacks are separated */
   1215     if (CLG_(clo).separate_threads)
   1216       cs_addCount(0);
   1217     else
   1218       CLG_(forall_threads)(cs_addCount);
   1219 
   1220     CLG_DEBUG(0, "prepare_dump: %d BBCCs\n", prepare_count);
   1221 
   1222     /* allocate bbcc array, insert BBCCs and sort */
   1223     prepare_ptr = array =
   1224       (BBCC**) CLG_MALLOC("cl.dump.pd.1",
   1225                           (prepare_count+1) * sizeof(BBCC*));
   1226 
   1227     CLG_(forall_bbccs)(hash_addPtr);
   1228 
   1229     if (CLG_(clo).separate_threads)
   1230       cs_addPtr(0);
   1231     else
   1232       CLG_(forall_threads)(cs_addPtr);
   1233 
   1234     CLG_ASSERT(array + prepare_count == prepare_ptr);
   1235 
   1236     /* end mark */
   1237     *prepare_ptr = 0;
   1238 
   1239     CLG_DEBUG(0,"             BBCCs inserted\n");
   1240 
   1241     qsort_start = array;
   1242     qsort(array, prepare_count, my_cmp);
   1243 
   1244     CLG_DEBUG(0,"             BBCCs sorted\n");
   1245 
   1246     return array;
   1247 }
   1248 
   1249 
   1250 
   1251 
   1252 static void fprint_cost_ln(int fd, Char* prefix,
   1253 			   EventMapping* em, ULong* cost)
   1254 {
   1255     int p;
   1256 
   1257     p = VG_(sprintf)(outbuf, "%s", prefix);
   1258     p += CLG_(sprint_mappingcost)(outbuf + p, em, cost);
   1259     VG_(sprintf)(outbuf + p, "\n");
   1260     my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
   1261 }
   1262 
   1263 static ULong bbs_done = 0;
   1264 static Char* filename = 0;
   1265 
   1266 static
   1267 void file_err(void)
   1268 {
   1269    VG_(message)(Vg_UserMsg,
   1270                 "Error: can not open cache simulation output file `%s'\n",
   1271                 filename );
   1272    VG_(exit)(1);
   1273 }
   1274 
   1275 /**
   1276  * Create a new dump file and write header.
   1277  *
   1278  * Naming: <CLG_(clo).filename_base>.<pid>[.<part>][-<tid>]
   1279  *         <part> is skipped for final dump (trigger==0)
   1280  *         <tid>  is skipped for thread 1 with CLG_(clo).separate_threads=no
   1281  *
   1282  * Returns the file descriptor, and -1 on error (no write permission)
   1283  */
   1284 static int new_dumpfile(Char buf[BUF_LEN], int tid, Char* trigger)
   1285 {
   1286     Bool appending = False;
   1287     int i, fd;
   1288     FullCost sum = 0;
   1289     SysRes res;
   1290 
   1291     CLG_ASSERT(dumps_initialized);
   1292     CLG_ASSERT(filename != 0);
   1293 
   1294     if (!CLG_(clo).combine_dumps) {
   1295 	i = VG_(sprintf)(filename, "%s", out_file);
   1296 
   1297 	if (trigger)
   1298 	    i += VG_(sprintf)(filename+i, ".%d", out_counter);
   1299 
   1300 	if (CLG_(clo).separate_threads)
   1301 	    VG_(sprintf)(filename+i, "-%02d", tid);
   1302 
   1303 	res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
   1304     }
   1305     else {
   1306 	VG_(sprintf)(filename, "%s", out_file);
   1307         res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_APPEND, 0);
   1308 	if (!sr_isError(res) && out_counter>1)
   1309 	    appending = True;
   1310     }
   1311 
   1312     if (sr_isError(res)) {
   1313 	res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
   1314 			VKI_S_IRUSR|VKI_S_IWUSR);
   1315 	if (sr_isError(res)) {
   1316 	    /* If the file can not be opened for whatever reason (conflict
   1317 	       between multiple supervised processes?), give up now. */
   1318 	    file_err();
   1319 	}
   1320     }
   1321     fd = (Int) sr_Res(res);
   1322 
   1323     CLG_DEBUG(2, "  new_dumpfile '%s'\n", filename);
   1324 
   1325     if (!appending)
   1326 	reset_dump_array();
   1327 
   1328 
   1329     if (!appending) {
   1330 	/* version */
   1331 	VG_(sprintf)(buf, "version: 1\n");
   1332 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1333 
   1334 	/* creator */
   1335 	VG_(sprintf)(buf, "creator: callgrind-" VERSION "\n");
   1336 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1337 
   1338 	/* "pid:" line */
   1339 	VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)());
   1340 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1341 
   1342 	/* "cmd:" line */
   1343 	VG_(strcpy)(buf, "cmd: ");
   1344 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1345 	my_fwrite(fd, (void*)cmdbuf, VG_(strlen)(cmdbuf));
   1346     }
   1347 
   1348     VG_(sprintf)(buf, "\npart: %d\n", out_counter);
   1349     my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1350     if (CLG_(clo).separate_threads) {
   1351 	VG_(sprintf)(buf, "thread: %d\n", tid);
   1352 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1353     }
   1354 
   1355     /* "desc:" lines */
   1356     if (!appending) {
   1357 	my_fwrite(fd, "\n", 1);
   1358 
   1359 #if 0
   1360 	/* Global options changing the tracing behaviour */
   1361 	VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n",
   1362 		     CLG_(clo).skip_plt ? "yes" : "no");
   1363 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1364 	VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n",
   1365 		     CLG_(clo).collect_jumps ? "yes" : "no");
   1366 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1367 	VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n",
   1368 		     CLG_(clo).separate_recursions);
   1369 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1370 	VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n",
   1371 		     CLG_(clo).separate_callers);
   1372 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1373 
   1374 	VG_(sprintf)(buf, "desc: Option: --dump-bbs=%s\n",
   1375 		     CLG_(clo).dump_bbs ? "yes" : "no");
   1376 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1377 	VG_(sprintf)(buf, "desc: Option: --separate-threads=%s\n",
   1378 		     CLG_(clo).separate_threads ? "yes" : "no");
   1379 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1380 #endif
   1381 
   1382 	(*CLG_(cachesim).getdesc)(buf);
   1383 	my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1384     }
   1385 
   1386     VG_(sprintf)(buf, "\ndesc: Timerange: Basic block %llu - %llu\n",
   1387 		 bbs_done, CLG_(stat).bb_executions);
   1388 
   1389     my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1390     VG_(sprintf)(buf, "desc: Trigger: %s\n",
   1391 		 trigger ? trigger : (Char*)"Program termination");
   1392     my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1393 
   1394 #if 0
   1395    /* Output function specific config
   1396     * FIXME */
   1397    for (i = 0; i < N_FNCONFIG_ENTRIES; i++) {
   1398        fnc = fnc_table[i];
   1399        while (fnc) {
   1400 	   if (fnc->skip) {
   1401 	       VG_(sprintf)(buf, "desc: Option: --fn-skip=%s\n", fnc->name);
   1402 	       my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1403 	   }
   1404 	   if (fnc->dump_at_enter) {
   1405 	       VG_(sprintf)(buf, "desc: Option: --fn-dump-at-enter=%s\n",
   1406 			    fnc->name);
   1407 	       my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1408 	   }
   1409 	   if (fnc->dump_at_leave) {
   1410 	       VG_(sprintf)(buf, "desc: Option: --fn-dump-at-leave=%s\n",
   1411 			    fnc->name);
   1412 	       my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1413 	   }
   1414 	   if (fnc->separate_callers != CLG_(clo).separate_callers) {
   1415 	       VG_(sprintf)(buf, "desc: Option: --separate-callers%d=%s\n",
   1416 			    fnc->separate_callers, fnc->name);
   1417 	       my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1418 	   }
   1419 	   if (fnc->separate_recursions != CLG_(clo).separate_recursions) {
   1420 	       VG_(sprintf)(buf, "desc: Option: --separate-recs%d=%s\n",
   1421 			    fnc->separate_recursions, fnc->name);
   1422 	       my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1423 	   }
   1424 	   fnc = fnc->next;
   1425        }
   1426    }
   1427 #endif
   1428 
   1429    /* "positions:" line */
   1430    VG_(sprintf)(buf, "\npositions:%s%s%s\n",
   1431 		CLG_(clo).dump_instr ? " instr" : "",
   1432 		CLG_(clo).dump_bb    ? " bb" : "",
   1433 		CLG_(clo).dump_line  ? " line" : "");
   1434    my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1435 
   1436    /* "events:" line */
   1437    i = VG_(sprintf)(buf, "events: ");
   1438    CLG_(sprint_eventmapping)(buf+i, CLG_(dumpmap));
   1439    my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
   1440    my_fwrite(fd, "\n", 1);
   1441 
   1442    /* summary lines */
   1443    sum = CLG_(get_eventset_cost)( CLG_(sets).full );
   1444    CLG_(zero_cost)(CLG_(sets).full, sum);
   1445    if (CLG_(clo).separate_threads) {
   1446      thread_info* ti = CLG_(get_current_thread)();
   1447      CLG_(add_diff_cost)(CLG_(sets).full, sum, ti->lastdump_cost,
   1448 			   ti->states.entry[0]->cost);
   1449    }
   1450    else {
   1451      /* This function is called once for thread 1, where
   1452       * all costs are summed up when not dumping separate per thread.
   1453       * But this is not true for summary: we need to add all threads.
   1454       */
   1455      int t;
   1456      thread_info** thr = CLG_(get_threads)();
   1457      for(t=1;t<VG_N_THREADS;t++) {
   1458        if (!thr[t]) continue;
   1459        CLG_(add_diff_cost)(CLG_(sets).full, sum,
   1460 			  thr[t]->lastdump_cost,
   1461 			  thr[t]->states.entry[0]->cost);
   1462      }
   1463    }
   1464    fprint_cost_ln(fd, "summary: ", CLG_(dumpmap), sum);
   1465 
   1466    /* all dumped cost will be added to total_fcc */
   1467    CLG_(init_cost_lz)( CLG_(sets).full, &dump_total_cost );
   1468 
   1469    my_fwrite(fd, "\n\n",2);
   1470 
   1471    if (VG_(clo_verbosity) > 1)
   1472        VG_(message)(Vg_DebugMsg, "Dump to %s\n", filename);
   1473 
   1474    return fd;
   1475 }
   1476 
   1477 
   1478 static void close_dumpfile(int fd)
   1479 {
   1480     if (fd <0) return;
   1481 
   1482     fprint_cost_ln(fd, "totals: ", CLG_(dumpmap),
   1483 		   dump_total_cost);
   1484     //fprint_fcc_ln(fd, "summary: ", &dump_total_fcc);
   1485     CLG_(add_cost_lz)(CLG_(sets).full,
   1486 		     &CLG_(total_cost), dump_total_cost);
   1487 
   1488     fwrite_flush();
   1489     VG_(close)(fd);
   1490 
   1491     if (filename[0] == '.') {
   1492 	if (-1 == VG_(rename) (filename, filename+1)) {
   1493 	    /* Can not rename to correct file name: give out warning */
   1494 	    VG_(message)(Vg_DebugMsg, "Warning: Can not rename .%s to %s\n",
   1495 			 filename, filename);
   1496        }
   1497    }
   1498 }
   1499 
   1500 
   1501 /* Helper for print_bbccs */
   1502 
   1503 static Int   print_fd;
   1504 static Char* print_trigger;
   1505 static Char  print_buf[BUF_LEN];
   1506 
   1507 static void print_bbccs_of_thread(thread_info* ti)
   1508 {
   1509   BBCC **p, **array;
   1510   FnPos lastFnPos;
   1511   AddrPos lastAPos;
   1512 
   1513   CLG_DEBUG(1, "+ print_bbccs(tid %d)\n", CLG_(current_tid));
   1514 
   1515   print_fd = new_dumpfile(print_buf, CLG_(current_tid), print_trigger);
   1516   if (print_fd <0) {
   1517     CLG_DEBUG(1, "- print_bbccs(tid %d): No output...\n", CLG_(current_tid));
   1518     return;
   1519   }
   1520 
   1521   p = array = prepare_dump();
   1522   init_fpos(&lastFnPos);
   1523   init_apos(&lastAPos, 0, 0, 0);
   1524 
   1525   if (p) while(1) {
   1526 
   1527     /* on context/function change, print old cost buffer before */
   1528     if (lastFnPos.cxt && ((*p==0) ||
   1529 			 (lastFnPos.cxt != (*p)->cxt) ||
   1530 			 (lastFnPos.rec_index != (*p)->rec_index))) {
   1531       if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) {
   1532 	/* no need to switch buffers, as position is the same */
   1533 	fprint_apos(print_fd, &(ccSum[currSum].p), &lastAPos,
   1534 		    lastFnPos.cxt->fn[0]->file);
   1535 	fprint_fcost(print_fd, &ccSum[currSum], &lastAPos);
   1536       }
   1537 
   1538       if (ccSum[currSum].p.file != lastFnPos.cxt->fn[0]->file) {
   1539 	/* switch back to file of function */
   1540 	VG_(sprintf)(print_buf, "fe=");
   1541 	print_file(print_buf+3, lastFnPos.cxt->fn[0]->file);
   1542 	my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf));
   1543       }
   1544       my_fwrite(print_fd, "\n", 1);
   1545     }
   1546 
   1547     if (*p == 0) break;
   1548 
   1549     if (print_fn_pos(print_fd, &lastFnPos, *p)) {
   1550 
   1551       /* new function */
   1552       init_apos(&lastAPos, 0, 0, (*p)->cxt->fn[0]->file);
   1553       init_fcost(&ccSum[0], 0, 0, 0);
   1554       init_fcost(&ccSum[1], 0, 0, 0);
   1555       currSum = 0;
   1556     }
   1557 
   1558     if (CLG_(clo).dump_bbs) {
   1559 	/* FIXME: Specify Object of BB if different to object of fn */
   1560 	int i, pos = 0;
   1561 	ULong ecounter = (*p)->ecounter_sum;
   1562 	pos = VG_(sprintf)(print_buf, "bb=%#lx ", (*p)->bb->offset);
   1563 	for(i = 0; i<(*p)->bb->cjmp_count;i++) {
   1564 	    pos += VG_(sprintf)(print_buf+pos, "%d %llu ",
   1565 				(*p)->bb->jmp[i].instr,
   1566 				ecounter);
   1567 	    ecounter -= (*p)->jmp[i].ecounter;
   1568 	}
   1569 	VG_(sprintf)(print_buf+pos, "%d %llu\n",
   1570 		     (*p)->bb->instr_count,
   1571 		     ecounter);
   1572 	my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf));
   1573     }
   1574 
   1575     fprint_bbcc(print_fd, *p, &lastAPos);
   1576 
   1577     p++;
   1578   }
   1579 
   1580   close_dumpfile(print_fd);
   1581   if (array) VG_(free)(array);
   1582 
   1583   /* set counters of last dump */
   1584   CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost,
   1585 		  CLG_(current_state).cost );
   1586 
   1587   CLG_DEBUG(1, "- print_bbccs(tid %d)\n", CLG_(current_tid));
   1588 }
   1589 
   1590 
   1591 static void print_bbccs(Char* trigger, Bool only_current_thread)
   1592 {
   1593   init_dump_array();
   1594   init_debug_cache();
   1595 
   1596   print_fd = -1;
   1597   print_trigger = trigger;
   1598 
   1599   if (!CLG_(clo).separate_threads) {
   1600     /* All BBCC/JCC costs is stored for thread 1 */
   1601     Int orig_tid = CLG_(current_tid);
   1602 
   1603     CLG_(switch_thread)(1);
   1604     print_bbccs_of_thread( CLG_(get_current_thread)() );
   1605     CLG_(switch_thread)(orig_tid);
   1606   }
   1607   else if (only_current_thread)
   1608     print_bbccs_of_thread( CLG_(get_current_thread)() );
   1609   else
   1610     CLG_(forall_threads)(print_bbccs_of_thread);
   1611 
   1612   free_dump_array();
   1613 }
   1614 
   1615 
   1616 void CLG_(dump_profile)(Char* trigger, Bool only_current_thread)
   1617 {
   1618    CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
   1619 	    trigger ? trigger : (Char*)"Prg.Term.");
   1620 
   1621    CLG_(init_dumps)();
   1622 
   1623    if (VG_(clo_verbosity) > 1)
   1624        VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...\n",
   1625 		    CLG_(stat).bb_executions,
   1626 		    trigger ? trigger : (Char*)"Prg.Term.");
   1627 
   1628    out_counter++;
   1629 
   1630    print_bbccs(trigger, only_current_thread);
   1631 
   1632    bbs_done = CLG_(stat).bb_executions++;
   1633 
   1634    if (VG_(clo_verbosity) > 1)
   1635      VG_(message)(Vg_DebugMsg, "Dumping done.\n");
   1636 }
   1637 
   1638 /* copy command to cmd buffer (could change) */
   1639 static
   1640 void init_cmdbuf(void)
   1641 {
   1642   Int i,j,size = 0;
   1643   HChar* argv;
   1644 
   1645   if (VG_(args_the_exename))
   1646       size = VG_(sprintf)(cmdbuf, " %s", VG_(args_the_exename));
   1647 
   1648   for(i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
   1649       argv = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
   1650       if (!argv) continue;
   1651       if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' ';
   1652       for(j=0;argv[j]!=0;j++)
   1653 	  if (size < BUF_LEN) cmdbuf[size++] = argv[j];
   1654   }
   1655 
   1656   if (size == BUF_LEN) size--;
   1657   cmdbuf[size] = 0;
   1658 }
   1659 
   1660 /*
   1661  * Set up file names for dump output: <out_directory>, <out_file>.
   1662  * <out_file> is derived from the output format string, which defaults
   1663  * to "callgrind.out.%p", where %p is replaced with the PID.
   1664  * For the final file name, on intermediate dumps a counter is appended,
   1665  * and further, if separate dumps per thread are requested, the thread ID.
   1666  *
   1667  * <out_file> always starts with a full absolute path.
   1668  * If the output format string represents a relative path, the current
   1669  * working directory at program start is used.
   1670  *
   1671  * This function has to be called every time a profile dump is generated
   1672  * to be able to react on PID changes.
   1673  */
   1674 void CLG_(init_dumps)()
   1675 {
   1676    Int lastSlash, i;
   1677    SysRes res;
   1678 
   1679    static int thisPID = 0;
   1680    int currentPID = VG_(getpid)();
   1681    if (currentPID == thisPID) {
   1682        /* already initialized, and no PID change */
   1683        CLG_ASSERT(out_file != 0);
   1684        return;
   1685    }
   1686    thisPID = currentPID;
   1687 
   1688    if (!CLG_(clo).out_format)
   1689      CLG_(clo).out_format = DEFAULT_OUTFORMAT;
   1690 
   1691    /* If a file name was already set, clean up before */
   1692    if (out_file) {
   1693        VG_(free)(out_file);
   1694        VG_(free)(out_directory);
   1695        VG_(free)(filename);
   1696        out_counter = 0;
   1697    }
   1698 
   1699    // Setup output filename.
   1700    out_file =
   1701        VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format);
   1702 
   1703    /* get base directory for dump/command/result files */
   1704    CLG_ASSERT(out_file[0] == '/');
   1705    lastSlash = 0;
   1706    i = 1;
   1707    while(out_file[i]) {
   1708        if (out_file[i] == '/') lastSlash = i;
   1709        i++;
   1710    }
   1711    i = lastSlash;
   1712    out_directory = (Char*) CLG_MALLOC("cl.dump.init_dumps.1", i+1);
   1713    VG_(strncpy)(out_directory, out_file, i);
   1714    out_directory[i] = 0;
   1715 
   1716    /* allocate space big enough for final filenames */
   1717    filename = (Char*) CLG_MALLOC("cl.dump.init_dumps.2",
   1718                                  VG_(strlen)(out_file)+32);
   1719    CLG_ASSERT(filename != 0);
   1720 
   1721    /* Make sure the output base file can be written.
   1722     * This is used for the dump at program termination.
   1723     * We stop with an error here if we can not create the
   1724     * file: This is probably because of missing rights,
   1725     * and trace parts wouldn't be allowed to be written, too.
   1726     */
   1727     VG_(strcpy)(filename, out_file);
   1728     res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
   1729     if (sr_isError(res)) {
   1730 	res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
   1731 		       VKI_S_IRUSR|VKI_S_IWUSR);
   1732 	if (sr_isError(res)) {
   1733 	    file_err();
   1734 	}
   1735     }
   1736     if (!sr_isError(res)) VG_(close)( (Int)sr_Res(res) );
   1737 
   1738     if (!dumps_initialized)
   1739 	init_cmdbuf();
   1740 
   1741     dumps_initialized = True;
   1742 }
   1743