1 //--------------------------------------------------------------------*/ 2 //--- BBV: a SimPoint basic block vector generator bbv_main.c ---*/ 3 //--------------------------------------------------------------------*/ 4 5 /* 6 This file is part of BBV, a Valgrind tool for generating SimPoint 7 basic block vectors. 8 9 Copyright (C) 2006-2013 Vince Weaver 10 vince _at_ csl.cornell.edu 11 12 pcfile code is Copyright (C) 2006-2013 Oriol Prat 13 oriol.prat _at _ bsc.es 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 34 #include "pub_tool_basics.h" 35 #include "pub_tool_tooliface.h" 36 #include "pub_tool_options.h" /* command line options */ 37 38 #include "pub_tool_vki.h" /* vki_stat */ 39 #include "pub_tool_libcbase.h" /* VG_(strlen) */ 40 #include "pub_tool_libcfile.h" /* VG_(write) */ 41 #include "pub_tool_libcprint.h" /* VG_(printf) */ 42 #include "pub_tool_libcassert.h" /* VG_(exit) */ 43 #include "pub_tool_mallocfree.h" /* plain_free */ 44 #include "pub_tool_machine.h" /* VG_(fnptr_to_fnentry) */ 45 #include "pub_tool_debuginfo.h" /* VG_(get_fnname) */ 46 47 #include "pub_tool_oset.h" /* ordered set stuff */ 48 49 /* instruction special cases */ 50 #define REP_INSTRUCTION 0x1 51 #define FLDCW_INSTRUCTION 0x2 52 53 /* interval variables */ 54 #define DEFAULT_GRAIN_SIZE 100000000 /* 100 million by default */ 55 static Int interval_size=DEFAULT_GRAIN_SIZE; 56 57 /* filenames */ 58 static const HChar *clo_bb_out_file="bb.out.%p"; 59 static const HChar *clo_pc_out_file="pc.out.%p"; 60 static HChar *pc_out_file=NULL; 61 static HChar *bb_out_file=NULL; 62 63 64 /* output parameters */ 65 static Bool instr_count_only=False; 66 static Bool generate_pc_file=False; 67 68 /* write buffer */ 69 static HChar buf[1024]; 70 71 /* Global values */ 72 static OSet* instr_info_table; /* table that holds the basic block info */ 73 static Int block_num=1; /* global next block number */ 74 static Int current_thread=0; 75 static Int allocated_threads=1; 76 struct thread_info *bbv_thread=NULL; 77 78 /* Per-thread variables */ 79 struct thread_info { 80 ULong dyn_instr; /* Current retired instruction count */ 81 ULong total_instr; /* Total retired instruction count */ 82 Addr last_rep_addr; /* rep counting values */ 83 ULong rep_count; 84 ULong global_rep_count; 85 ULong unique_rep_count; 86 ULong fldcw_count; /* fldcw count */ 87 Int bbtrace_fd; /* file descriptor */ 88 }; 89 90 #define FUNCTION_NAME_LENGTH 20 91 92 struct BB_info { 93 Addr BB_addr; /* used as key, must be first */ 94 Int n_instrs; /* instructions in the basic block */ 95 Int block_num; /* unique block identifier */ 96 Int *inst_counter; /* times entered * num_instructions */ 97 Bool is_entry; /* is this block a function entry point */ 98 HChar fn_name[FUNCTION_NAME_LENGTH]; /* Function block is in */ 99 }; 100 101 102 /* dump the optional PC file, which contains basic block number to */ 103 /* instruction address and function name mappings */ 104 static void dumpPcFile(void) 105 { 106 struct BB_info *bb_elem; 107 Int pctrace_fd; 108 SysRes sres; 109 110 pc_out_file = 111 VG_(expand_file_name)("--pc-out-file", clo_pc_out_file); 112 113 sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 114 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP); 115 if (sr_isError(sres)) { 116 VG_(umsg)("Error: cannot create pc file %s\n", pc_out_file); 117 VG_(exit)(1); 118 } else { 119 pctrace_fd = sr_Res(sres); 120 } 121 122 /* Loop through the table, printing the number, address, */ 123 /* and function name for each basic block */ 124 VG_(OSetGen_ResetIter)(instr_info_table); 125 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) { 126 VG_(write)(pctrace_fd,"F",1); 127 VG_(sprintf)( buf,":%d:%x:%s\n", 128 bb_elem->block_num, 129 (Int)bb_elem->BB_addr, 130 bb_elem->fn_name); 131 VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf)); 132 } 133 134 VG_(close)(pctrace_fd); 135 } 136 137 static Int open_tracefile(Int thread_num) 138 { 139 SysRes sres; 140 HChar temp_string[2048]; 141 142 /* For thread 1, don't append any thread number */ 143 /* This lets the single-thread case not have any */ 144 /* extra values appended to the file name. */ 145 if (thread_num==1) { 146 VG_(strncpy)(temp_string,bb_out_file,2047); 147 } 148 else { 149 VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num); 150 } 151 152 sres = VG_(open)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 153 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP); 154 155 if (sr_isError(sres)) { 156 VG_(umsg)("Error: cannot create bb file %s\n",temp_string); 157 VG_(exit)(1); 158 } 159 160 return sr_Res(sres); 161 } 162 163 static void handle_overflow(void) 164 { 165 struct BB_info *bb_elem; 166 167 if (bbv_thread[current_thread].dyn_instr > interval_size) { 168 169 if (!instr_count_only) { 170 171 /* If our output fd hasn't been opened, open it */ 172 if (bbv_thread[current_thread].bbtrace_fd < 0) { 173 bbv_thread[current_thread].bbtrace_fd=open_tracefile(current_thread); 174 } 175 176 /* put an entry to the bb.out file */ 177 178 VG_(write)(bbv_thread[current_thread].bbtrace_fd,"T",1); 179 180 VG_(OSetGen_ResetIter)(instr_info_table); 181 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) { 182 if ( bb_elem->inst_counter[current_thread] != 0 ) { 183 VG_(sprintf)( buf,":%d:%d ", 184 bb_elem->block_num, 185 bb_elem->inst_counter[current_thread]); 186 VG_(write)(bbv_thread[current_thread].bbtrace_fd, 187 (void*)buf, VG_(strlen)(buf)); 188 bb_elem->inst_counter[current_thread] = 0; 189 } 190 } 191 192 VG_(write)(bbv_thread[current_thread].bbtrace_fd,"\n",1); 193 } 194 195 bbv_thread[current_thread].dyn_instr -= interval_size; 196 } 197 } 198 199 200 static void close_out_reps(void) 201 { 202 bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count; 203 bbv_thread[current_thread].unique_rep_count++; 204 bbv_thread[current_thread].rep_count=0; 205 } 206 207 /* Generic function to get called each instruction */ 208 static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo) 209 { 210 Int n_instrs=1; 211 212 tl_assert(bbInfo); 213 214 /* we finished rep but didn't clear out count */ 215 if (bbv_thread[current_thread].rep_count) { 216 n_instrs++; 217 close_out_reps(); 218 } 219 220 bbInfo->inst_counter[current_thread]+=n_instrs; 221 222 bbv_thread[current_thread].total_instr+=n_instrs; 223 bbv_thread[current_thread].dyn_instr +=n_instrs; 224 225 handle_overflow(); 226 } 227 228 /* Function to get called if instruction has a rep prefix */ 229 static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr) 230 { 231 /* handle back-to-back rep instructions */ 232 if (bbv_thread[current_thread].last_rep_addr!=addr) { 233 if (bbv_thread[current_thread].rep_count) { 234 close_out_reps(); 235 bbv_thread[current_thread].total_instr++; 236 bbv_thread[current_thread].dyn_instr++; 237 } 238 bbv_thread[current_thread].last_rep_addr=addr; 239 } 240 241 bbv_thread[current_thread].rep_count++; 242 243 } 244 245 /* Function to call if our instruction has a fldcw instruction */ 246 static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo) 247 { 248 Int n_instrs=1; 249 250 tl_assert(bbInfo); 251 252 /* we finished rep but didn't clear out count */ 253 if (bbv_thread[current_thread].rep_count) { 254 n_instrs++; 255 close_out_reps(); 256 } 257 258 /* count fldcw instructions */ 259 bbv_thread[current_thread].fldcw_count++; 260 261 bbInfo->inst_counter[current_thread]+=n_instrs; 262 263 bbv_thread[current_thread].total_instr+=n_instrs; 264 bbv_thread[current_thread].dyn_instr +=n_instrs; 265 266 handle_overflow(); 267 } 268 269 /* Check if the instruction pointed to is one that needs */ 270 /* special handling. If so, set a bit in the return */ 271 /* value indicating what type. */ 272 static Int get_inst_type(Int len, Addr addr) 273 { 274 int result=0; 275 276 #if defined(VGA_x86) || defined(VGA_amd64) 277 278 UChar *inst_pointer; 279 UChar inst_byte; 280 int i,possible_rep; 281 282 /* rep prefixed instructions are counted as one instruction on */ 283 /* x86 processors and must be handled as a special case */ 284 285 /* Also, the rep prefix is re-used as part of the opcode for */ 286 /* SSE instructions. So we need to specifically check for */ 287 /* the following: movs, cmps, scas, lods, stos, ins, outs */ 288 289 inst_pointer=(UChar *)addr; 290 i=0; 291 inst_byte=0; 292 possible_rep=0; 293 294 while (i<len) { 295 296 inst_byte=*inst_pointer; 297 298 if ( (inst_byte == 0x67) || /* size override prefix */ 299 (inst_byte == 0x66) || /* size override prefix */ 300 (inst_byte == 0x48) ) { /* 64-bit prefix */ 301 } else if ( (inst_byte == 0xf2) || /* rep prefix */ 302 (inst_byte == 0xf3) ) { /* repne prefix */ 303 possible_rep=1; 304 } else { 305 break; /* other byte, exit */ 306 } 307 308 i++; 309 inst_pointer++; 310 } 311 312 if ( possible_rep && 313 ( ( (inst_byte >= 0xa4) && /* movs,cmps,scas */ 314 (inst_byte <= 0xaf) ) || /* lods,stos */ 315 ( (inst_byte >= 0x6c) && 316 (inst_byte <= 0x6f) ) ) ) { /* ins,outs */ 317 318 result|=REP_INSTRUCTION; 319 } 320 321 /* fldcw instructions are double-counted by the hardware */ 322 /* performance counters on pentium 4 processors so it is */ 323 /* useful to have that count when doing validation work. */ 324 325 inst_pointer=(UChar *)addr; 326 if (len>1) { 327 /* FLDCW detection */ 328 /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */ 329 if ((*inst_pointer==0xd9) && 330 (*(inst_pointer+1)<0xb0) && /* need this case of fldz, etc, count */ 331 ( (*(inst_pointer+1) & 0x38) == 0x28)) { 332 result|=FLDCW_INSTRUCTION; 333 } 334 } 335 336 #endif 337 return result; 338 } 339 340 341 342 /* Our instrumentation function */ 343 /* sbIn = super block to translate */ 344 /* layout = guest layout */ 345 /* gWordTy = size of guest word */ 346 /* hWordTy = size of host word */ 347 static IRSB* bbv_instrument ( VgCallbackClosure* closure, 348 IRSB* sbIn, VexGuestLayout* layout, 349 VexGuestExtents* vge, 350 VexArchInfo* archinfo_host, 351 IRType gWordTy, IRType hWordTy ) 352 { 353 Int i,n_instrs=1; 354 IRSB *sbOut; 355 IRStmt *st; 356 struct BB_info *bbInfo; 357 Addr64 origAddr,ourAddr; 358 IRDirty *di; 359 IRExpr **argv, *arg1; 360 Int regparms,opcode_type; 361 362 /* We don't handle a host/guest word size mismatch */ 363 if (gWordTy != hWordTy) { 364 VG_(tool_panic)("host/guest word size mismatch"); 365 } 366 367 /* Set up SB */ 368 sbOut = deepCopyIRSBExceptStmts(sbIn); 369 370 /* Copy verbatim any IR preamble preceding the first IMark */ 371 i = 0; 372 while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) { 373 addStmtToIRSB( sbOut, sbIn->stmts[i] ); 374 i++; 375 } 376 377 /* Get the first statement */ 378 tl_assert(sbIn->stmts_used > 0); 379 st = sbIn->stmts[i]; 380 381 /* double check we are at a Mark statement */ 382 tl_assert(Ist_IMark == st->tag); 383 384 origAddr=st->Ist.IMark.addr; 385 386 /* Get the BB_info */ 387 bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr); 388 389 if (bbInfo==NULL) { 390 391 /* BB never translated before (at this address, at least; */ 392 /* could have been unloaded and then reloaded elsewhere in memory) */ 393 394 /* allocate and initialize a new basic block structure */ 395 bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info)); 396 bbInfo->BB_addr = origAddr; 397 bbInfo->n_instrs = n_instrs; 398 bbInfo->inst_counter=VG_(calloc)("bbv_instrument", 399 allocated_threads, 400 sizeof(Int)); 401 402 /* assign a unique block number */ 403 bbInfo->block_num=block_num; 404 block_num++; 405 /* get function name and entry point information */ 406 VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH); 407 bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name, 408 FUNCTION_NAME_LENGTH); 409 /* insert structure into table */ 410 VG_(OSetGen_Insert)( instr_info_table, bbInfo ); 411 } 412 413 /* Iterate through the basic block, putting the original */ 414 /* instructions in place, plus putting a call to updateBBV */ 415 /* for each original instruction */ 416 417 /* This is less efficient than only instrumenting the BB */ 418 /* But it gives proper results given the fact that */ 419 /* valgrind uses superblocks (not basic blocks) by default */ 420 421 422 while(i < sbIn->stmts_used) { 423 st=sbIn->stmts[i]; 424 425 if (st->tag == Ist_IMark) { 426 427 ourAddr = st->Ist.IMark.addr; 428 429 opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr); 430 431 regparms=1; 432 arg1= mkIRExpr_HWord( (HWord)bbInfo); 433 argv= mkIRExprVec_1(arg1); 434 435 436 if (opcode_type&REP_INSTRUCTION) { 437 arg1= mkIRExpr_HWord(ourAddr); 438 argv= mkIRExprVec_1(arg1); 439 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep", 440 VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ), 441 argv); 442 } 443 else if (opcode_type&FLDCW_INSTRUCTION) { 444 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw", 445 VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ), 446 argv); 447 } 448 else { 449 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV", 450 VG_(fnptr_to_fnentry)( &per_instruction_BBV ), 451 argv); 452 } 453 454 455 /* Insert our call */ 456 addStmtToIRSB( sbOut, IRStmt_Dirty(di)); 457 } 458 459 /* Insert the original instruction */ 460 addStmtToIRSB( sbOut, st ); 461 462 i++; 463 } 464 465 return sbOut; 466 } 467 468 static struct thread_info *allocate_new_thread(struct thread_info *old, 469 Int old_number, Int new_number) 470 { 471 struct thread_info *temp; 472 struct BB_info *bb_elem; 473 Int i; 474 475 temp=VG_(realloc)("bbv_main.c allocate_threads", 476 old, 477 new_number*sizeof(struct thread_info)); 478 479 /* init the new thread */ 480 /* We loop in case the new thread is not contiguous */ 481 for(i=old_number;i<new_number;i++) { 482 temp[i].last_rep_addr=0; 483 temp[i].dyn_instr=0; 484 temp[i].total_instr=0; 485 temp[i].global_rep_count=0; 486 temp[i].unique_rep_count=0; 487 temp[i].rep_count=0; 488 temp[i].fldcw_count=0; 489 temp[i].bbtrace_fd=-1; 490 } 491 /* expand the inst_counter on all allocated basic blocks */ 492 VG_(OSetGen_ResetIter)(instr_info_table); 493 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) { 494 bb_elem->inst_counter = 495 VG_(realloc)("bbv_main.c inst_counter", 496 bb_elem->inst_counter, 497 new_number*sizeof(Int)); 498 for(i=old_number;i<new_number;i++) { 499 bb_elem->inst_counter[i]=0; 500 } 501 } 502 503 return temp; 504 } 505 506 static void bbv_thread_called ( ThreadId tid, ULong nDisp ) 507 { 508 if (tid >= allocated_threads) { 509 bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1); 510 allocated_threads=tid+1; 511 } 512 current_thread=tid; 513 } 514 515 516 517 518 /*--------------------------------------------------------------------*/ 519 /*--- Setup ---*/ 520 /*--------------------------------------------------------------------*/ 521 522 static void bbv_post_clo_init(void) 523 { 524 bb_out_file = 525 VG_(expand_file_name)("--bb-out-file", clo_bb_out_file); 526 527 /* Try a closer approximation of basic blocks */ 528 /* This is the same as the command line option */ 529 /* --vex-guest-chase-thresh=0 */ 530 VG_(clo_vex_control).guest_chase_thresh = 0; 531 } 532 533 /* Parse the command line options */ 534 static Bool bbv_process_cmd_line_option(const HChar* arg) 535 { 536 if VG_INT_CLO (arg, "--interval-size", interval_size) {} 537 else if VG_STR_CLO (arg, "--bb-out-file", clo_bb_out_file) {} 538 else if VG_STR_CLO (arg, "--pc-out-file", clo_pc_out_file) { 539 generate_pc_file = True; 540 } 541 else if VG_BOOL_CLO (arg, "--instr-count-only", instr_count_only) {} 542 else { 543 return False; 544 } 545 546 return True; 547 } 548 549 static void bbv_print_usage(void) 550 { 551 VG_(printf)( 552 " --bb-out-file=<file> filename for BBV info\n" 553 " --pc-out-file=<file> filename for BB addresses and function names\n" 554 " --interval-size=<num> interval size\n" 555 " --instr-count-only=yes|no only print total instruction count\n" 556 ); 557 } 558 559 static void bbv_print_debug_usage(void) 560 { 561 VG_(printf)(" (none)\n"); 562 } 563 564 static void bbv_fini(Int exitcode) 565 { 566 Int i; 567 568 if (generate_pc_file) { 569 dumpPcFile(); 570 } 571 572 for(i=0;i<allocated_threads;i++) { 573 574 if (bbv_thread[i].total_instr!=0) { 575 576 VG_(sprintf)(buf,"\n\n" 577 "# Thread %d\n" 578 "# Total intervals: %d (Interval Size %d)\n" 579 "# Total instructions: %lld\n" 580 "# Total reps: %lld\n" 581 "# Unique reps: %lld\n" 582 "# Total fldcw instructions: %lld\n\n", 583 i, 584 (Int)(bbv_thread[i].total_instr/(ULong)interval_size), 585 interval_size, 586 bbv_thread[i].total_instr, 587 bbv_thread[i].global_rep_count, 588 bbv_thread[i].unique_rep_count, 589 bbv_thread[i].fldcw_count); 590 591 /* Print results to display */ 592 VG_(umsg)("%s\n", buf); 593 594 /* open the output file if it hasn't already */ 595 if (bbv_thread[i].bbtrace_fd < 0) { 596 bbv_thread[i].bbtrace_fd=open_tracefile(i); 597 } 598 /* Also print to results file */ 599 VG_(write)(bbv_thread[i].bbtrace_fd,(void*)buf,VG_(strlen)(buf)); 600 VG_(close)(bbv_thread[i].bbtrace_fd); 601 } 602 } 603 } 604 605 static void bbv_pre_clo_init(void) 606 { 607 VG_(details_name) ("exp-bbv"); 608 VG_(details_version) (NULL); 609 VG_(details_description) ("a SimPoint basic block vector generator"); 610 VG_(details_copyright_author)( 611 "Copyright (C) 2006-2013 Vince Weaver"); 612 VG_(details_bug_reports_to) (VG_BUGS_TO); 613 614 VG_(basic_tool_funcs) (bbv_post_clo_init, 615 bbv_instrument, 616 bbv_fini); 617 618 VG_(needs_command_line_options)(bbv_process_cmd_line_option, 619 bbv_print_usage, 620 bbv_print_debug_usage); 621 622 VG_(track_start_client_code)( bbv_thread_called ); 623 624 625 instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0, 626 NULL, 627 VG_(malloc), "bbv.1", VG_(free)); 628 629 bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads); 630 } 631 632 VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init) 633 634 /*--------------------------------------------------------------------*/ 635 /*--- end ---*/ 636 /*--------------------------------------------------------------------*/ 637