1 2 /*--------------------------------------------------------------------*/ 3 /*--- An example Valgrind tool. lk_main.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Lackey, an example Valgrind tool that does 8 some simple program measurement and tracing. 9 10 Copyright (C) 2002-2015 Nicholas Nethercote 11 njn (at) valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 // This tool shows how to do some basic instrumentation. 32 // 33 // There are four kinds of instrumentation it can do. They can be turned 34 // on/off independently with command line options: 35 // 36 // * --basic-counts : do basic counts, eg. number of instructions 37 // executed, jumps executed, etc. 38 // * --detailed-counts: do more detailed counts: number of loads, stores 39 // and ALU operations of different sizes. 40 // * --trace-mem=yes: trace all (data) memory accesses. 41 // * --trace-superblocks=yes: 42 // trace all superblock entries. Mostly of interest 43 // to the Valgrind developers. 44 // 45 // The code for each kind of instrumentation is guarded by a clo_* variable: 46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs. 47 // 48 // If you want to modify any of the instrumentation code, look for the code 49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem) 50 // If you're not interested in the other kinds of instrumentation you can 51 // remove them. If you want to do more complex modifications, please read 52 // VEX/pub/libvex_ir.h to understand the intermediate representation. 53 // 54 // 55 // Specific Details about --trace-mem=yes 56 // -------------------------------------- 57 // Lackey's --trace-mem code is a good starting point for building Valgrind 58 // tools that act on memory loads and stores. It also could be used as is, 59 // with its output used as input to a post-mortem processing step. However, 60 // because memory traces can be very large, online analysis is generally 61 // better. 62 // 63 // It prints memory data access traces that look like this: 64 // 65 // I 0023C790,2 # instruction read at 0x0023C790 of size 2 66 // I 0023C792,5 67 // S BE80199C,4 # data store at 0xBE80199C of size 4 68 // I 0025242B,3 69 // L BE801950,4 # data load at 0xBE801950 of size 4 70 // I 0023D476,7 71 // M 0025747C,1 # data modify at 0x0025747C of size 1 72 // I 0023DC20,2 73 // L 00254962,1 74 // L BE801FB3,1 75 // I 00252305,1 76 // L 00254AEB,1 77 // S 00257998,1 78 // 79 // Every instruction executed has an "instr" event representing it. 80 // Instructions that do memory accesses are followed by one or more "load", 81 // "store" or "modify" events. Some instructions do more than one load or 82 // store, as in the last two examples in the above trace. 83 // 84 // Here are some examples of x86 instructions that do different combinations 85 // of loads, stores, and modifies. 86 // 87 // Instruction Memory accesses Event sequence 88 // ----------- --------------- -------------- 89 // add %eax, %ebx No loads or stores instr 90 // 91 // movl (%eax), %ebx loads (%eax) instr, load 92 // 93 // movl %eax, (%ebx) stores (%ebx) instr, store 94 // 95 // incl (%ecx) modifies (%ecx) instr, modify 96 // 97 // cmpsb loads (%esi), loads(%edi) instr, load, load 98 // 99 // call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store 100 // pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store 101 // movsw loads (%esi), stores (%edi) instr, load, store 102 // 103 // Instructions using x86 "rep" prefixes are traced as if they are repeated 104 // N times. 105 // 106 // Lackey with --trace-mem gives good traces, but they are not perfect, for 107 // the following reasons: 108 // 109 // - It does not trace into the OS kernel, so system calls and other kernel 110 // operations (eg. some scheduling and signal handling code) are ignored. 111 // 112 // - It could model loads and stores done at the system call boundary using 113 // the pre_mem_read/post_mem_write events. For example, if you call 114 // fstat() you know that the passed in buffer has been written. But it 115 // currently does not do this. 116 // 117 // - Valgrind replaces some code (not much) with its own, notably parts of 118 // code for scheduling operations and signal handling. This code is not 119 // traced. 120 // 121 // - There is no consideration of virtual-to-physical address mapping. 122 // This may not matter for many purposes. 123 // 124 // - Valgrind modifies the instruction stream in some very minor ways. For 125 // example, on x86 the bts, btc, btr instructions are incorrectly 126 // considered to always touch memory (this is a consequence of these 127 // instructions being very difficult to simulate). 128 // 129 // - Valgrind tools layout memory differently to normal programs, so the 130 // addresses you get will not be typical. Thus Lackey (and all Valgrind 131 // tools) is suitable for getting relative memory traces -- eg. if you 132 // want to analyse locality of memory accesses -- but is not good if 133 // absolute addresses are important. 134 // 135 // Despite all these warnings, Lackey's results should be good enough for a 136 // wide range of purposes. For example, Cachegrind shares all the above 137 // shortcomings and it is still useful. 138 // 139 // For further inspiration, you should look at cachegrind/cg_main.c which 140 // uses the same basic technique for tracing memory accesses, but also groups 141 // events together for processing into twos and threes so that fewer C calls 142 // are made and things run faster. 143 // 144 // Specific Details about --trace-superblocks=yes 145 // ---------------------------------------------- 146 // Valgrind splits code up into single entry, multiple exit blocks 147 // known as superblocks. By itself, --trace-superblocks=yes just 148 // prints a message as each superblock is run: 149 // 150 // SB 04013170 151 // SB 04013177 152 // SB 04013173 153 // SB 04013177 154 // 155 // The hex number is the address of the first instruction in the 156 // superblock. You can see the relationship more obviously if you use 157 // --trace-superblocks=yes and --trace-mem=yes together. Then a "SB" 158 // message at address X is immediately followed by an "instr:" message 159 // for that address, as the first instruction in the block is 160 // executed, for example: 161 // 162 // SB 04014073 163 // I 04014073,3 164 // L 7FEFFF7F8,8 165 // I 04014076,4 166 // I 0401407A,3 167 // I 0401407D,3 168 // I 04014080,3 169 // I 04014083,6 170 171 172 #include "pub_tool_basics.h" 173 #include "pub_tool_tooliface.h" 174 #include "pub_tool_libcassert.h" 175 #include "pub_tool_libcprint.h" 176 #include "pub_tool_debuginfo.h" 177 #include "pub_tool_libcbase.h" 178 #include "pub_tool_options.h" 179 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 180 181 /*------------------------------------------------------------*/ 182 /*--- Command line options ---*/ 183 /*------------------------------------------------------------*/ 184 185 /* Command line options controlling instrumentation kinds, as described at 186 * the top of this file. */ 187 static Bool clo_basic_counts = True; 188 static Bool clo_detailed_counts = False; 189 static Bool clo_trace_mem = False; 190 static Bool clo_trace_sbs = False; 191 192 /* The name of the function of which the number of calls (under 193 * --basic-counts=yes) is to be counted, with default. Override with command 194 * line option --fnname. */ 195 static const HChar* clo_fnname = "main"; 196 197 static Bool lk_process_cmd_line_option(const HChar* arg) 198 { 199 if VG_STR_CLO(arg, "--fnname", clo_fnname) {} 200 else if VG_BOOL_CLO(arg, "--basic-counts", clo_basic_counts) {} 201 else if VG_BOOL_CLO(arg, "--detailed-counts", clo_detailed_counts) {} 202 else if VG_BOOL_CLO(arg, "--trace-mem", clo_trace_mem) {} 203 else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {} 204 else 205 return False; 206 207 tl_assert(clo_fnname); 208 tl_assert(clo_fnname[0]); 209 return True; 210 } 211 212 static void lk_print_usage(void) 213 { 214 VG_(printf)( 215 " --basic-counts=no|yes count instructions, jumps, etc. [yes]\n" 216 " --detailed-counts=no|yes count loads, stores and alu ops [no]\n" 217 " --trace-mem=no|yes trace all loads and stores [no]\n" 218 " --trace-superblocks=no|yes trace all superblock entries [no]\n" 219 " --fnname=<name> count calls to <name> (only used if\n" 220 " --basic-count=yes) [main]\n" 221 ); 222 } 223 224 static void lk_print_debug_usage(void) 225 { 226 VG_(printf)( 227 " (none)\n" 228 ); 229 } 230 231 /*------------------------------------------------------------*/ 232 /*--- Stuff for --basic-counts ---*/ 233 /*------------------------------------------------------------*/ 234 235 /* Nb: use ULongs because the numbers can get very big */ 236 static ULong n_func_calls = 0; 237 static ULong n_SBs_entered = 0; 238 static ULong n_SBs_completed = 0; 239 static ULong n_IRStmts = 0; 240 static ULong n_guest_instrs = 0; 241 static ULong n_Jccs = 0; 242 static ULong n_Jccs_untaken = 0; 243 static ULong n_IJccs = 0; 244 static ULong n_IJccs_untaken = 0; 245 246 static void add_one_func_call(void) 247 { 248 n_func_calls++; 249 } 250 251 static void add_one_SB_entered(void) 252 { 253 n_SBs_entered++; 254 } 255 256 static void add_one_SB_completed(void) 257 { 258 n_SBs_completed++; 259 } 260 261 static void add_one_IRStmt(void) 262 { 263 n_IRStmts++; 264 } 265 266 static void add_one_guest_instr(void) 267 { 268 n_guest_instrs++; 269 } 270 271 static void add_one_Jcc(void) 272 { 273 n_Jccs++; 274 } 275 276 static void add_one_Jcc_untaken(void) 277 { 278 n_Jccs_untaken++; 279 } 280 281 static void add_one_inverted_Jcc(void) 282 { 283 n_IJccs++; 284 } 285 286 static void add_one_inverted_Jcc_untaken(void) 287 { 288 n_IJccs_untaken++; 289 } 290 291 /*------------------------------------------------------------*/ 292 /*--- Stuff for --detailed-counts ---*/ 293 /*------------------------------------------------------------*/ 294 295 typedef 296 IRExpr 297 IRAtom; 298 299 /* --- Operations --- */ 300 301 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op; 302 303 #define N_OPS 3 304 305 306 /* --- Types --- */ 307 308 #define N_TYPES 14 309 310 static Int type2index ( IRType ty ) 311 { 312 switch (ty) { 313 case Ity_I1: return 0; 314 case Ity_I8: return 1; 315 case Ity_I16: return 2; 316 case Ity_I32: return 3; 317 case Ity_I64: return 4; 318 case Ity_I128: return 5; 319 case Ity_F32: return 6; 320 case Ity_F64: return 7; 321 case Ity_F128: return 8; 322 case Ity_V128: return 9; 323 case Ity_V256: return 10; 324 case Ity_D32: return 11; 325 case Ity_D64: return 12; 326 case Ity_D128: return 13; 327 default: tl_assert(0); 328 } 329 } 330 331 static const HChar* nameOfTypeIndex ( Int i ) 332 { 333 switch (i) { 334 case 0: return "I1"; break; 335 case 1: return "I8"; break; 336 case 2: return "I16"; break; 337 case 3: return "I32"; break; 338 case 4: return "I64"; break; 339 case 5: return "I128"; break; 340 case 6: return "F32"; break; 341 case 7: return "F64"; break; 342 case 8: return "F128"; break; 343 case 9: return "V128"; break; 344 case 10: return "V256"; break; 345 case 11: return "D32"; break; 346 case 12: return "D64"; break; 347 case 13: return "D128"; break; 348 default: tl_assert(0); 349 } 350 } 351 352 353 /* --- Counts --- */ 354 355 static ULong detailCounts[N_OPS][N_TYPES]; 356 357 /* The helper that is called from the instrumented code. */ 358 static VG_REGPARM(1) 359 void increment_detail(ULong* detail) 360 { 361 (*detail)++; 362 } 363 364 /* A helper that adds the instrumentation for a detail. guard :: 365 Ity_I1 is the guarding condition for the event. If NULL it is 366 assumed to mean "always True". */ 367 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard) 368 { 369 IRDirty* di; 370 IRExpr** argv; 371 const UInt typeIx = type2index(type); 372 373 tl_assert(op < N_OPS); 374 tl_assert(typeIx < N_TYPES); 375 376 argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) ); 377 di = unsafeIRDirty_0_N( 1, "increment_detail", 378 VG_(fnptr_to_fnentry)( &increment_detail ), 379 argv); 380 if (guard) di->guard = guard; 381 addStmtToIRSB( sb, IRStmt_Dirty(di) ); 382 } 383 384 /* Summarize and print the details. */ 385 static void print_details ( void ) 386 { 387 Int typeIx; 388 VG_(umsg)(" Type Loads Stores AluOps\n"); 389 VG_(umsg)(" -------------------------------------------\n"); 390 for (typeIx = 0; typeIx < N_TYPES; typeIx++) { 391 VG_(umsg)(" %-4s %'12llu %'12llu %'12llu\n", 392 nameOfTypeIndex( typeIx ), 393 detailCounts[OpLoad ][typeIx], 394 detailCounts[OpStore][typeIx], 395 detailCounts[OpAlu ][typeIx] 396 ); 397 } 398 } 399 400 401 /*------------------------------------------------------------*/ 402 /*--- Stuff for --trace-mem ---*/ 403 /*------------------------------------------------------------*/ 404 405 #define MAX_DSIZE 512 406 407 typedef 408 enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm } 409 EventKind; 410 411 typedef 412 struct { 413 EventKind ekind; 414 IRAtom* addr; 415 Int size; 416 IRAtom* guard; /* :: Ity_I1, or NULL=="always True" */ 417 } 418 Event; 419 420 /* Up to this many unnotified events are allowed. Must be at least two, 421 so that reads and writes to the same address can be merged into a modify. 422 Beyond that, larger numbers just potentially induce more spilling due to 423 extending live ranges of address temporaries. */ 424 #define N_EVENTS 4 425 426 /* Maintain an ordered list of memory events which are outstanding, in 427 the sense that no IR has yet been generated to do the relevant 428 helper calls. The SB is scanned top to bottom and memory events 429 are added to the end of the list, merging with the most recent 430 notified event where possible (Dw immediately following Dr and 431 having the same size and EA can be merged). 432 433 This merging is done so that for architectures which have 434 load-op-store instructions (x86, amd64), the instr is treated as if 435 it makes just one memory reference (a modify), rather than two (a 436 read followed by a write at the same address). 437 438 At various points the list will need to be flushed, that is, IR 439 generated from it. That must happen before any possible exit from 440 the block (the end, or an IRStmt_Exit). Flushing also takes place 441 when there is no space to add a new event, and before entering a 442 RMW (read-modify-write) section on processors supporting LL/SC. 443 444 If we require the simulation statistics to be up to date with 445 respect to possible memory exceptions, then the list would have to 446 be flushed before each memory reference. That's a pain so we don't 447 bother. 448 449 Flushing the list consists of walking it start to end and emitting 450 instrumentation IR for each event, in the order in which they 451 appear. */ 452 453 static Event events[N_EVENTS]; 454 static Int events_used = 0; 455 456 457 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size) 458 { 459 VG_(printf)("I %08lx,%lu\n", addr, size); 460 } 461 462 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size) 463 { 464 VG_(printf)(" L %08lx,%lu\n", addr, size); 465 } 466 467 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size) 468 { 469 VG_(printf)(" S %08lx,%lu\n", addr, size); 470 } 471 472 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size) 473 { 474 VG_(printf)(" M %08lx,%lu\n", addr, size); 475 } 476 477 478 static void flushEvents(IRSB* sb) 479 { 480 Int i; 481 const HChar* helperName; 482 void* helperAddr; 483 IRExpr** argv; 484 IRDirty* di; 485 Event* ev; 486 487 for (i = 0; i < events_used; i++) { 488 489 ev = &events[i]; 490 491 // Decide on helper fn to call and args to pass it. 492 switch (ev->ekind) { 493 case Event_Ir: helperName = "trace_instr"; 494 helperAddr = trace_instr; break; 495 496 case Event_Dr: helperName = "trace_load"; 497 helperAddr = trace_load; break; 498 499 case Event_Dw: helperName = "trace_store"; 500 helperAddr = trace_store; break; 501 502 case Event_Dm: helperName = "trace_modify"; 503 helperAddr = trace_modify; break; 504 default: 505 tl_assert(0); 506 } 507 508 // Add the helper. 509 argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) ); 510 di = unsafeIRDirty_0_N( /*regparms*/2, 511 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 512 argv ); 513 if (ev->guard) { 514 di->guard = ev->guard; 515 } 516 addStmtToIRSB( sb, IRStmt_Dirty(di) ); 517 } 518 519 events_used = 0; 520 } 521 522 // WARNING: If you aren't interested in instruction reads, you can omit the 523 // code that adds calls to trace_instr() in flushEvents(). However, you 524 // must still call this function, addEvent_Ir() -- it is necessary to add 525 // the Ir events to the events list so that merging of paired load/store 526 // events into modify events works correctly. 527 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize ) 528 { 529 Event* evt; 530 tl_assert(clo_trace_mem); 531 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 532 || VG_CLREQ_SZB == isize ); 533 if (events_used == N_EVENTS) 534 flushEvents(sb); 535 tl_assert(events_used >= 0 && events_used < N_EVENTS); 536 evt = &events[events_used]; 537 evt->ekind = Event_Ir; 538 evt->addr = iaddr; 539 evt->size = isize; 540 evt->guard = NULL; 541 events_used++; 542 } 543 544 /* Add a guarded read event. */ 545 static 546 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard ) 547 { 548 Event* evt; 549 tl_assert(clo_trace_mem); 550 tl_assert(isIRAtom(daddr)); 551 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE); 552 if (events_used == N_EVENTS) 553 flushEvents(sb); 554 tl_assert(events_used >= 0 && events_used < N_EVENTS); 555 evt = &events[events_used]; 556 evt->ekind = Event_Dr; 557 evt->addr = daddr; 558 evt->size = dsize; 559 evt->guard = guard; 560 events_used++; 561 } 562 563 /* Add an ordinary read event, by adding a guarded read event with an 564 always-true guard. */ 565 static 566 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize ) 567 { 568 addEvent_Dr_guarded(sb, daddr, dsize, NULL); 569 } 570 571 /* Add a guarded write event. */ 572 static 573 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard ) 574 { 575 Event* evt; 576 tl_assert(clo_trace_mem); 577 tl_assert(isIRAtom(daddr)); 578 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE); 579 if (events_used == N_EVENTS) 580 flushEvents(sb); 581 tl_assert(events_used >= 0 && events_used < N_EVENTS); 582 evt = &events[events_used]; 583 evt->ekind = Event_Dw; 584 evt->addr = daddr; 585 evt->size = dsize; 586 evt->guard = guard; 587 events_used++; 588 } 589 590 /* Add an ordinary write event. Try to merge it with an immediately 591 preceding ordinary read event of the same size to the same 592 address. */ 593 static 594 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize ) 595 { 596 Event* lastEvt; 597 Event* evt; 598 tl_assert(clo_trace_mem); 599 tl_assert(isIRAtom(daddr)); 600 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE); 601 602 // Is it possible to merge this write with the preceding read? 603 lastEvt = &events[events_used-1]; 604 if (events_used > 0 605 && lastEvt->ekind == Event_Dr 606 && lastEvt->size == dsize 607 && lastEvt->guard == NULL 608 && eqIRAtom(lastEvt->addr, daddr)) 609 { 610 lastEvt->ekind = Event_Dm; 611 return; 612 } 613 614 // No. Add as normal. 615 if (events_used == N_EVENTS) 616 flushEvents(sb); 617 tl_assert(events_used >= 0 && events_used < N_EVENTS); 618 evt = &events[events_used]; 619 evt->ekind = Event_Dw; 620 evt->size = dsize; 621 evt->addr = daddr; 622 evt->guard = NULL; 623 events_used++; 624 } 625 626 627 /*------------------------------------------------------------*/ 628 /*--- Stuff for --trace-superblocks ---*/ 629 /*------------------------------------------------------------*/ 630 631 static void trace_superblock(Addr addr) 632 { 633 VG_(printf)("SB %08lx\n", addr); 634 } 635 636 637 /*------------------------------------------------------------*/ 638 /*--- Basic tool functions ---*/ 639 /*------------------------------------------------------------*/ 640 641 static void lk_post_clo_init(void) 642 { 643 Int op, tyIx; 644 645 if (clo_detailed_counts) { 646 for (op = 0; op < N_OPS; op++) 647 for (tyIx = 0; tyIx < N_TYPES; tyIx++) 648 detailCounts[op][tyIx] = 0; 649 } 650 } 651 652 static 653 IRSB* lk_instrument ( VgCallbackClosure* closure, 654 IRSB* sbIn, 655 const VexGuestLayout* layout, 656 const VexGuestExtents* vge, 657 const VexArchInfo* archinfo_host, 658 IRType gWordTy, IRType hWordTy ) 659 { 660 IRDirty* di; 661 Int i; 662 IRSB* sbOut; 663 IRTypeEnv* tyenv = sbIn->tyenv; 664 Addr iaddr = 0, dst; 665 UInt ilen = 0; 666 Bool condition_inverted = False; 667 668 if (gWordTy != hWordTy) { 669 /* We don't currently support this case. */ 670 VG_(tool_panic)("host/guest word size mismatch"); 671 } 672 673 /* Set up SB */ 674 sbOut = deepCopyIRSBExceptStmts(sbIn); 675 676 // Copy verbatim any IR preamble preceding the first IMark 677 i = 0; 678 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 679 addStmtToIRSB( sbOut, sbIn->stmts[i] ); 680 i++; 681 } 682 683 if (clo_basic_counts) { 684 /* Count this superblock. */ 685 di = unsafeIRDirty_0_N( 0, "add_one_SB_entered", 686 VG_(fnptr_to_fnentry)( &add_one_SB_entered ), 687 mkIRExprVec_0() ); 688 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 689 } 690 691 if (clo_trace_sbs) { 692 /* Print this superblock's address. */ 693 di = unsafeIRDirty_0_N( 694 0, "trace_superblock", 695 VG_(fnptr_to_fnentry)( &trace_superblock ), 696 mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) ) 697 ); 698 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 699 } 700 701 if (clo_trace_mem) { 702 events_used = 0; 703 } 704 705 for (/*use current i*/; i < sbIn->stmts_used; i++) { 706 IRStmt* st = sbIn->stmts[i]; 707 if (!st || st->tag == Ist_NoOp) continue; 708 709 if (clo_basic_counts) { 710 /* Count one VEX statement. */ 711 di = unsafeIRDirty_0_N( 0, "add_one_IRStmt", 712 VG_(fnptr_to_fnentry)( &add_one_IRStmt ), 713 mkIRExprVec_0() ); 714 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 715 } 716 717 switch (st->tag) { 718 case Ist_NoOp: 719 case Ist_AbiHint: 720 case Ist_Put: 721 case Ist_PutI: 722 case Ist_MBE: 723 addStmtToIRSB( sbOut, st ); 724 break; 725 726 case Ist_IMark: 727 if (clo_basic_counts) { 728 /* Needed to be able to check for inverted condition in Ist_Exit */ 729 iaddr = st->Ist.IMark.addr; 730 ilen = st->Ist.IMark.len; 731 732 /* Count guest instruction. */ 733 di = unsafeIRDirty_0_N( 0, "add_one_guest_instr", 734 VG_(fnptr_to_fnentry)( &add_one_guest_instr ), 735 mkIRExprVec_0() ); 736 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 737 738 /* An unconditional branch to a known destination in the 739 * guest's instructions can be represented, in the IRSB to 740 * instrument, by the VEX statements that are the 741 * translation of that known destination. This feature is 742 * called 'SB chasing' and can be influenced by command 743 * line option --vex-guest-chase-thresh. 744 * 745 * To get an accurate count of the calls to a specific 746 * function, taking SB chasing into account, we need to 747 * check for each guest instruction (Ist_IMark) if it is 748 * the entry point of a function. 749 */ 750 tl_assert(clo_fnname); 751 tl_assert(clo_fnname[0]); 752 const HChar *fnname; 753 if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr, 754 &fnname) 755 && 0 == VG_(strcmp)(fnname, clo_fnname)) { 756 di = unsafeIRDirty_0_N( 757 0, "add_one_func_call", 758 VG_(fnptr_to_fnentry)( &add_one_func_call ), 759 mkIRExprVec_0() ); 760 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 761 } 762 } 763 if (clo_trace_mem) { 764 // WARNING: do not remove this function call, even if you 765 // aren't interested in instruction reads. See the comment 766 // above the function itself for more detail. 767 addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ), 768 st->Ist.IMark.len ); 769 } 770 addStmtToIRSB( sbOut, st ); 771 break; 772 773 case Ist_WrTmp: 774 // Add a call to trace_load() if --trace-mem=yes. 775 if (clo_trace_mem) { 776 IRExpr* data = st->Ist.WrTmp.data; 777 if (data->tag == Iex_Load) { 778 addEvent_Dr( sbOut, data->Iex.Load.addr, 779 sizeofIRType(data->Iex.Load.ty) ); 780 } 781 } 782 if (clo_detailed_counts) { 783 IRExpr* expr = st->Ist.WrTmp.data; 784 IRType type = typeOfIRExpr(sbOut->tyenv, expr); 785 tl_assert(type != Ity_INVALID); 786 switch (expr->tag) { 787 case Iex_Load: 788 instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ ); 789 break; 790 case Iex_Unop: 791 case Iex_Binop: 792 case Iex_Triop: 793 case Iex_Qop: 794 case Iex_ITE: 795 instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ ); 796 break; 797 default: 798 break; 799 } 800 } 801 addStmtToIRSB( sbOut, st ); 802 break; 803 804 case Ist_Store: { 805 IRExpr* data = st->Ist.Store.data; 806 IRType type = typeOfIRExpr(tyenv, data); 807 tl_assert(type != Ity_INVALID); 808 if (clo_trace_mem) { 809 addEvent_Dw( sbOut, st->Ist.Store.addr, 810 sizeofIRType(type) ); 811 } 812 if (clo_detailed_counts) { 813 instrument_detail( sbOut, OpStore, type, NULL/*guard*/ ); 814 } 815 addStmtToIRSB( sbOut, st ); 816 break; 817 } 818 819 case Ist_StoreG: { 820 IRStoreG* sg = st->Ist.StoreG.details; 821 IRExpr* data = sg->data; 822 IRType type = typeOfIRExpr(tyenv, data); 823 tl_assert(type != Ity_INVALID); 824 if (clo_trace_mem) { 825 addEvent_Dw_guarded( sbOut, sg->addr, 826 sizeofIRType(type), sg->guard ); 827 } 828 if (clo_detailed_counts) { 829 instrument_detail( sbOut, OpStore, type, sg->guard ); 830 } 831 addStmtToIRSB( sbOut, st ); 832 break; 833 } 834 835 case Ist_LoadG: { 836 IRLoadG* lg = st->Ist.LoadG.details; 837 IRType type = Ity_INVALID; /* loaded type */ 838 IRType typeWide = Ity_INVALID; /* after implicit widening */ 839 typeOfIRLoadGOp(lg->cvt, &typeWide, &type); 840 tl_assert(type != Ity_INVALID); 841 if (clo_trace_mem) { 842 addEvent_Dr_guarded( sbOut, lg->addr, 843 sizeofIRType(type), lg->guard ); 844 } 845 if (clo_detailed_counts) { 846 instrument_detail( sbOut, OpLoad, type, lg->guard ); 847 } 848 addStmtToIRSB( sbOut, st ); 849 break; 850 } 851 852 case Ist_Dirty: { 853 if (clo_trace_mem) { 854 Int dsize; 855 IRDirty* d = st->Ist.Dirty.details; 856 if (d->mFx != Ifx_None) { 857 // This dirty helper accesses memory. Collect the details. 858 tl_assert(d->mAddr != NULL); 859 tl_assert(d->mSize != 0); 860 dsize = d->mSize; 861 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 862 addEvent_Dr( sbOut, d->mAddr, dsize ); 863 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 864 addEvent_Dw( sbOut, d->mAddr, dsize ); 865 } else { 866 tl_assert(d->mAddr == NULL); 867 tl_assert(d->mSize == 0); 868 } 869 } 870 addStmtToIRSB( sbOut, st ); 871 break; 872 } 873 874 case Ist_CAS: { 875 /* We treat it as a read and a write of the location. I 876 think that is the same behaviour as it was before IRCAS 877 was introduced, since prior to that point, the Vex 878 front ends would translate a lock-prefixed instruction 879 into a (normal) read followed by a (normal) write. */ 880 Int dataSize; 881 IRType dataTy; 882 IRCAS* cas = st->Ist.CAS.details; 883 tl_assert(cas->addr != NULL); 884 tl_assert(cas->dataLo != NULL); 885 dataTy = typeOfIRExpr(tyenv, cas->dataLo); 886 dataSize = sizeofIRType(dataTy); 887 if (cas->dataHi != NULL) 888 dataSize *= 2; /* since it's a doubleword-CAS */ 889 if (clo_trace_mem) { 890 addEvent_Dr( sbOut, cas->addr, dataSize ); 891 addEvent_Dw( sbOut, cas->addr, dataSize ); 892 } 893 if (clo_detailed_counts) { 894 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ ); 895 if (cas->dataHi != NULL) /* dcas */ 896 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ ); 897 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ ); 898 if (cas->dataHi != NULL) /* dcas */ 899 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ ); 900 } 901 addStmtToIRSB( sbOut, st ); 902 break; 903 } 904 905 case Ist_LLSC: { 906 IRType dataTy; 907 if (st->Ist.LLSC.storedata == NULL) { 908 /* LL */ 909 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); 910 if (clo_trace_mem) { 911 addEvent_Dr( sbOut, st->Ist.LLSC.addr, 912 sizeofIRType(dataTy) ); 913 /* flush events before LL, helps SC to succeed */ 914 flushEvents(sbOut); 915 } 916 if (clo_detailed_counts) 917 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ ); 918 } else { 919 /* SC */ 920 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); 921 if (clo_trace_mem) 922 addEvent_Dw( sbOut, st->Ist.LLSC.addr, 923 sizeofIRType(dataTy) ); 924 if (clo_detailed_counts) 925 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ ); 926 } 927 addStmtToIRSB( sbOut, st ); 928 break; 929 } 930 931 case Ist_Exit: 932 if (clo_basic_counts) { 933 // The condition of a branch was inverted by VEX if a taken 934 // branch is in fact a fall trough according to client address 935 tl_assert(iaddr != 0); 936 dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 : 937 st->Ist.Exit.dst->Ico.U64; 938 condition_inverted = (dst == iaddr + ilen); 939 940 /* Count Jcc */ 941 if (!condition_inverted) 942 di = unsafeIRDirty_0_N( 0, "add_one_Jcc", 943 VG_(fnptr_to_fnentry)( &add_one_Jcc ), 944 mkIRExprVec_0() ); 945 else 946 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc", 947 VG_(fnptr_to_fnentry)( 948 &add_one_inverted_Jcc ), 949 mkIRExprVec_0() ); 950 951 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 952 } 953 if (clo_trace_mem) { 954 flushEvents(sbOut); 955 } 956 957 addStmtToIRSB( sbOut, st ); // Original statement 958 959 if (clo_basic_counts) { 960 /* Count non-taken Jcc */ 961 if (!condition_inverted) 962 di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken", 963 VG_(fnptr_to_fnentry)( 964 &add_one_Jcc_untaken ), 965 mkIRExprVec_0() ); 966 else 967 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken", 968 VG_(fnptr_to_fnentry)( 969 &add_one_inverted_Jcc_untaken ), 970 mkIRExprVec_0() ); 971 972 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 973 } 974 break; 975 976 default: 977 ppIRStmt(st); 978 tl_assert(0); 979 } 980 } 981 982 if (clo_basic_counts) { 983 /* Count this basic block. */ 984 di = unsafeIRDirty_0_N( 0, "add_one_SB_completed", 985 VG_(fnptr_to_fnentry)( &add_one_SB_completed ), 986 mkIRExprVec_0() ); 987 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 988 } 989 990 if (clo_trace_mem) { 991 /* At the end of the sbIn. Flush outstandings. */ 992 flushEvents(sbOut); 993 } 994 995 return sbOut; 996 } 997 998 static void lk_fini(Int exitcode) 999 { 1000 tl_assert(clo_fnname); 1001 tl_assert(clo_fnname[0]); 1002 1003 if (clo_basic_counts) { 1004 ULong total_Jccs = n_Jccs + n_IJccs; 1005 ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken; 1006 1007 VG_(umsg)("Counted %'llu call%s to %s()\n", 1008 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname); 1009 1010 VG_(umsg)("\n"); 1011 VG_(umsg)("Jccs:\n"); 1012 VG_(umsg)(" total: %'llu\n", total_Jccs); 1013 VG_(umsg)(" taken: %'llu (%.0f%%)\n", 1014 taken_Jccs, taken_Jccs * 100.0 / total_Jccs ?: 1); 1015 1016 VG_(umsg)("\n"); 1017 VG_(umsg)("Executed:\n"); 1018 VG_(umsg)(" SBs entered: %'llu\n", n_SBs_entered); 1019 VG_(umsg)(" SBs completed: %'llu\n", n_SBs_completed); 1020 VG_(umsg)(" guest instrs: %'llu\n", n_guest_instrs); 1021 VG_(umsg)(" IRStmts: %'llu\n", n_IRStmts); 1022 1023 VG_(umsg)("\n"); 1024 VG_(umsg)("Ratios:\n"); 1025 tl_assert(n_SBs_entered); // Paranoia time. 1026 VG_(umsg)(" guest instrs : SB entered = %'llu : 10\n", 1027 10 * n_guest_instrs / n_SBs_entered); 1028 VG_(umsg)(" IRStmts : SB entered = %'llu : 10\n", 1029 10 * n_IRStmts / n_SBs_entered); 1030 tl_assert(n_guest_instrs); // Paranoia time. 1031 VG_(umsg)(" IRStmts : guest instr = %'llu : 10\n", 1032 10 * n_IRStmts / n_guest_instrs); 1033 } 1034 1035 if (clo_detailed_counts) { 1036 VG_(umsg)("\n"); 1037 VG_(umsg)("IR-level counts by type:\n"); 1038 print_details(); 1039 } 1040 1041 if (clo_basic_counts) { 1042 VG_(umsg)("\n"); 1043 VG_(umsg)("Exit code: %d\n", exitcode); 1044 } 1045 } 1046 1047 static void lk_pre_clo_init(void) 1048 { 1049 VG_(details_name) ("Lackey"); 1050 VG_(details_version) (NULL); 1051 VG_(details_description) ("an example Valgrind tool"); 1052 VG_(details_copyright_author)( 1053 "Copyright (C) 2002-2015, and GNU GPL'd, by Nicholas Nethercote."); 1054 VG_(details_bug_reports_to) (VG_BUGS_TO); 1055 VG_(details_avg_translation_sizeB) ( 200 ); 1056 1057 VG_(basic_tool_funcs) (lk_post_clo_init, 1058 lk_instrument, 1059 lk_fini); 1060 VG_(needs_command_line_options)(lk_process_cmd_line_option, 1061 lk_print_usage, 1062 lk_print_debug_usage); 1063 } 1064 1065 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init) 1066 1067 /*--------------------------------------------------------------------*/ 1068 /*--- end lk_main.c ---*/ 1069 /*--------------------------------------------------------------------*/ 1070