1 //===-- msandr.cc ---------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of MemorySanitizer. 11 // 12 // DynamoRio client for MemorySanitizer. 13 // 14 // MemorySanitizer requires that all program code is instrumented. Any memory 15 // store that can turn an uninitialized value into an initialized value must be 16 // observed by the tool, otherwise we risk reporting a false UMR. 17 // 18 // This also includes any libraries that the program depends on. 19 // 20 // In the case when rebuilding all program dependencies with MemorySanitizer is 21 // problematic, an experimental MSanDR tool (the code you are currently looking 22 // at) can be used. It is a DynamoRio-based tool that uses dynamic 23 // instrumentation to 24 // * Unpoison all memory stores. 25 // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and 26 // return value shadow on anything that looks like a function call or a return 27 // from a function. 28 // 29 // This tool does not detect the use of uninitialized values in uninstrumented 30 // libraries. It merely gets rid of false positives by marking all data that 31 // passes through uninstrumented code as fully initialized. 32 //===----------------------------------------------------------------------===// 33 34 #include <dr_api.h> 35 #include <drutil.h> 36 #include <drmgr.h> 37 #include <drsyscall.h> 38 39 #include <sys/mman.h> 40 #include <sys/syscall.h> /* for SYS_mmap */ 41 42 #include <string.h> 43 44 // XXX: it seems setting macro in CMakeLists.txt does not work, 45 // so manually set it here now. 46 47 // Building msandr client for running in DynamoRIO hybrid mode, 48 // which allows some module running natively. 49 // TODO: turn it on by default when hybrid is stable enough 50 // #define MSANDR_NATIVE_EXEC 51 52 #ifndef MSANDR_NATIVE_EXEC 53 #include <algorithm> 54 #include <set> 55 #include <string> 56 #include <vector> 57 #endif 58 59 #define TESTALL(mask, var) (((mask) & (var)) == (mask)) 60 #define TESTANY(mask, var) (((mask) & (var)) != 0) 61 62 #define CHECK_IMPL(condition, file, line) \ 63 do { \ 64 if (!(condition)) { \ 65 dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ 66 dr_abort(); \ 67 } \ 68 } while (0) // TODO: stacktrace 69 70 #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) 71 72 #define VERBOSITY 0 73 74 // Building msandr client for standalone test that does not need to 75 // run with msan build executables. Disable by default. 76 // #define MSANDR_STANDALONE_TEST 77 78 #define NUM_TLS_RETVAL 1 79 #define NUM_TLS_PARAM 6 80 81 #ifdef MSANDR_STANDALONE_TEST 82 // For testing purpose, we map app to shadow memory at [0x100000, 0x20000). 83 // Normally, the app starts at 0x400000: 84 // 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash 85 // so there should be no problem. 86 # define SHADOW_MEMORY_BASE ((void *)0x100000) 87 # define SHADOW_MEMORY_SIZE (0x100000) 88 # define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */) 89 #else 90 // shadow memory range [0x200000000000, 0x400000000000) 91 // assuming no app memory below 0x200000000000 92 # define SHADOW_MEMORY_MASK 0x3fffffffffffULL 93 #endif /* MSANDR_STANDALONE_TEST */ 94 95 typedef void *(*WrapperFn)(void *); 96 extern "C" void __msan_set_indirect_call_wrapper(WrapperFn wrapper); 97 extern "C" void __msan_dr_is_initialized(); 98 99 namespace { 100 101 int msan_retval_tls_offset; 102 int msan_param_tls_offset; 103 104 #ifndef MSANDR_NATIVE_EXEC 105 class ModuleData { 106 public: 107 ModuleData(); 108 ModuleData(const module_data_t *info); 109 // Yes, we want default copy, assign, and dtor semantics. 110 111 public: 112 app_pc start_; 113 app_pc end_; 114 // Full path to the module. 115 std::string path_; 116 module_handle_t handle_; 117 bool should_instrument_; 118 bool executed_; 119 }; 120 121 // A vector of loaded modules sorted by module bounds. We lookup the current PC 122 // in here from the bb event. This is better than an rb tree because the lookup 123 // is faster and the bb event occurs far more than the module load event. 124 std::vector<ModuleData> g_module_list; 125 126 ModuleData::ModuleData() 127 : start_(NULL), end_(NULL), path_(""), handle_(NULL), 128 should_instrument_(false), executed_(false) { 129 } 130 131 ModuleData::ModuleData(const module_data_t *info) 132 : start_(info->start), end_(info->end), path_(info->full_path), 133 handle_(info->handle), 134 // We'll check the black/white lists later and adjust this. 135 should_instrument_(true), executed_(false) { 136 } 137 #endif /* !MSANDR_NATIVE_EXEC */ 138 139 int(*__msan_get_retval_tls_offset)(); 140 int(*__msan_get_param_tls_offset)(); 141 void (*__msan_unpoison)(void *base, size_t size); 142 bool (*__msan_is_in_loader)(); 143 144 #ifdef MSANDR_STANDALONE_TEST 145 uint mock_msan_retval_tls_offset; 146 uint mock_msan_param_tls_offset; 147 static int mock_msan_get_retval_tls_offset() { 148 return (int)mock_msan_retval_tls_offset; 149 } 150 151 static int mock_msan_get_param_tls_offset() { 152 return (int)mock_msan_param_tls_offset; 153 } 154 155 static void mock_msan_unpoison(void *base, size_t size) { 156 /* do nothing */ 157 } 158 159 static bool mock_msan_is_in_loader() { 160 return false; 161 } 162 #endif /* MSANDR_STANDALONE_TEST */ 163 164 static generic_func_t LookupCallback(module_data_t *app, const char *name) { 165 #ifdef MSANDR_STANDALONE_TEST 166 if (strcmp("__msan_get_retval_tls_offset", name) == 0) { 167 return (generic_func_t)mock_msan_get_retval_tls_offset; 168 } else if (strcmp("__msan_get_param_tls_offset", name) == 0) { 169 return (generic_func_t)mock_msan_get_param_tls_offset; 170 } else if (strcmp("__msan_unpoison", name) == 0) { 171 return (generic_func_t)mock_msan_unpoison; 172 } else if (strcmp("__msan_is_in_loader", name) == 0) { 173 return (generic_func_t)mock_msan_is_in_loader; 174 } 175 CHECK(false); 176 return NULL; 177 #else /* !MSANDR_STANDALONE_TEST */ 178 generic_func_t callback = dr_get_proc_address(app->handle, name); 179 if (callback == NULL) { 180 dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); 181 CHECK(callback); 182 } 183 return callback; 184 #endif /* !MSANDR_STANDALONE_TEST */ 185 } 186 187 void InitializeMSanCallbacks() { 188 module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); 189 if (!app) { 190 dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", 191 dr_get_application_name()); 192 CHECK(app); 193 } 194 195 __msan_get_retval_tls_offset = (int (*)()) 196 LookupCallback(app, "__msan_get_retval_tls_offset"); 197 __msan_get_param_tls_offset = (int (*)()) 198 LookupCallback(app, "__msan_get_param_tls_offset"); 199 __msan_unpoison = (void(*)(void *, size_t)) 200 LookupCallback(app, "__msan_unpoison"); 201 __msan_is_in_loader = (bool (*)()) 202 LookupCallback(app, "__msan_is_in_loader"); 203 204 dr_free_module_data(app); 205 } 206 207 // FIXME: Handle absolute addresses and PC-relative addresses. 208 // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have 209 // a zero base anyway. 210 bool OperandIsInteresting(opnd_t opnd) { 211 return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && 212 opnd_get_segment(opnd) != DR_SEG_GS); 213 } 214 215 bool WantToInstrument(instr_t *instr) { 216 // TODO: skip push instructions? 217 switch (instr_get_opcode(instr)) { 218 // FIXME: support the instructions excluded below: 219 case OP_rep_cmps: 220 // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx 221 return false; 222 } 223 224 // Labels appear due to drutil_expand_rep_string() 225 if (instr_is_label(instr)) 226 return false; 227 228 CHECK(instr_ok_to_mangle(instr) == true); 229 230 if (instr_writes_memory(instr)) { 231 for (int d = 0; d < instr_num_dsts(instr); d++) { 232 opnd_t op = instr_get_dst(instr, d); 233 if (OperandIsInteresting(op)) 234 return true; 235 } 236 } 237 238 return false; 239 } 240 241 #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); 242 #define PREF(at, what) instrlist_meta_preinsert(bb, at, what); 243 244 void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, 245 bool is_write) { 246 bool need_to_restore_eflags = false; 247 uint flags = instr_get_arith_flags(instr); 248 // TODO: do something smarter with flags and spills in general? 249 // For example, spill them only once for a sequence of instrumented 250 // instructions that don't change/read flags. 251 252 if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { 253 if (VERBOSITY > 1) 254 dr_printf("Spilling eflags...\n"); 255 need_to_restore_eflags = true; 256 // TODO: Maybe sometimes don't need to 'seto'. 257 // TODO: Maybe sometimes don't want to spill XAX here? 258 // TODO: No need to spill XAX here if XAX is not used in the BB. 259 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 260 dr_save_arith_flags_to_xax(drcontext, bb, instr); 261 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); 262 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 263 } 264 265 #if 0 266 dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", 267 opnd_is_memory_reference(op), opnd_is_base_disp(op), 268 opnd_is_base_disp(op) ? opnd_get_index(op) : -1, 269 opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), 270 opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); 271 #endif 272 273 reg_id_t R1; 274 bool address_in_R1 = false; 275 if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && 276 opnd_get_disp(op) == 0) { 277 // If this is a simple access with no offset or index, we can just use the 278 // base for R1. 279 address_in_R1 = true; 280 R1 = opnd_get_base(op); 281 } else { 282 // Otherwise, we need to compute the addr into R1. 283 // TODO: reuse some spare register? e.g. r15 on x64 284 // TODO: might be used as a non-mem-ref register? 285 R1 = DR_REG_XAX; 286 } 287 CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. 288 289 // Pick R2 from R8 to R15. 290 // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr. 291 reg_id_t R2; 292 for (R2 = DR_REG_R8; R2 <= DR_REG_R15; R2++) { 293 if (!opnd_uses_reg(op, R2)) 294 break; 295 } 296 CHECK((R2 <= DR_REG_R15) && R1 != R2); 297 298 // Save the current values of R1 and R2. 299 dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); 300 // TODO: Something smarter than spilling a "fixed" register R2? 301 dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); 302 303 if (!address_in_R1) 304 CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); 305 PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), 306 OPND_CREATE_INT64(SHADOW_MEMORY_MASK))); 307 PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); 308 #ifdef MSANDR_STANDALONE_TEST 309 PRE(instr, add(drcontext, opnd_create_reg(R1), 310 OPND_CREATE_INT32(SHADOW_MEMORY_BASE))); 311 #endif 312 // There is no mov_st of a 64-bit immediate, so... 313 opnd_size_t op_size = opnd_get_size(op); 314 CHECK(op_size != OPSZ_NA); 315 uint access_size = opnd_size_in_bytes(op_size); 316 if (access_size <= 4 || op_size == OPSZ_PTR /* x64 support sign extension */) { 317 instr_t *label = INSTR_CREATE_label(drcontext); 318 opnd_t immed; 319 if (op_size == OPSZ_PTR || op_size == OPSZ_4) 320 immed = OPND_CREATE_INT32(0); 321 else 322 immed = opnd_create_immed_int((ptr_int_t) 0, op_size); 323 // we check if target is 0 before write to reduce unnecessary memory stores. 324 PRE(instr, cmp(drcontext, 325 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), 326 immed)); 327 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); 328 PRE(instr, mov_st(drcontext, 329 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), 330 immed)); 331 PREF(instr, label); 332 } else { 333 // FIXME: tail? 334 for (uint ofs = 0; ofs < access_size; ofs += 4) { 335 instr_t *label = INSTR_CREATE_label(drcontext); 336 opnd_t immed = OPND_CREATE_INT32(0); 337 PRE(instr, cmp(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); 338 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); 339 PRE(instr, mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); 340 PREF(instr, label) 341 } 342 } 343 344 // Restore the registers and flags. 345 dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); 346 dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); 347 348 // TODO: move aflags save/restore to per instr instead of per opnd 349 if (need_to_restore_eflags) { 350 if (VERBOSITY > 1) 351 dr_printf("Restoring eflags\n"); 352 // TODO: Check if it's reverse to the dr_restore_reg above and optimize. 353 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 354 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); 355 dr_restore_arith_flags_from_xax(drcontext, bb, instr); 356 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 357 } 358 359 // The original instruction is left untouched. The above instrumentation is just 360 // a prefix. 361 } 362 363 void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { 364 #ifdef MSANDR_STANDALONE_TEST 365 PRE(instr, 366 mov_st(drcontext, 367 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, 368 DR_REG_NULL, DR_REG_NULL, 369 0, msan_retval_tls_offset, 370 OPSZ_PTR), 371 OPND_CREATE_INT32(0))); 372 #else /* !MSANDR_STANDALONE_TEST */ 373 # ifdef MSANDR_NATIVE_EXEC 374 /* For optimized native exec, -mangle_app_seg and -private_loader are turned off, 375 * so we can reference msan_retval_tls_offset directly. 376 */ 377 PRE(instr, 378 mov_st(drcontext, 379 opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0, 380 msan_retval_tls_offset, OPSZ_PTR), 381 OPND_CREATE_INT32(0))); 382 # else /* !MSANDR_NATIVE_EXEC */ 383 /* XXX: the code below only works if -mangle_app_seg and -private_loader, 384 * which is turned off for optimized native exec 385 */ 386 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 387 388 // Clobbers nothing except xax. 389 bool res = 390 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); 391 CHECK(res); 392 393 // TODO: unpoison more bytes? 394 PRE(instr, 395 mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), 396 OPND_CREATE_INT32(0))); 397 398 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 399 # endif /* !MSANDR_NATIVE_EXEC */ 400 // The original instruction is left untouched. The above instrumentation is just 401 // a prefix. 402 #endif /* !MSANDR_STANDALONE_TEST */ 403 } 404 405 void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, 406 instr_t *instr) { 407 #ifdef MSANDR_STANDALONE_TEST 408 for (int i = 0; i < NUM_TLS_PARAM; ++i) { 409 PRE(instr, 410 mov_st(drcontext, 411 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, 412 DR_REG_NULL, DR_REG_NULL, 413 0, 414 msan_param_tls_offset + 415 i * sizeof(void *), 416 OPSZ_PTR), 417 OPND_CREATE_INT32(0))); 418 } 419 #else /* !MSANDR_STANDALONE_TEST */ 420 # ifdef MSANDR_NATIVE_EXEC 421 for (int i = 0; i < NUM_TLS_PARAM; ++i) { 422 PRE(instr, 423 mov_st(drcontext, 424 opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0, 425 msan_param_tls_offset + i*sizeof(void*), 426 OPSZ_PTR), 427 OPND_CREATE_INT32(0))); 428 } 429 # else /* !MSANDR_NATIVE_EXEC */ 430 /* XXX: the code below only works if -mangle_app_seg and -private_loader, 431 * which is turned off for optimized native exec 432 */ 433 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 434 435 // Clobbers nothing except xax. 436 bool res = 437 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); 438 CHECK(res); 439 440 // TODO: unpoison more bytes? 441 for (int i = 0; i < NUM_TLS_PARAM; ++i) { 442 PRE(instr, 443 mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + 444 i * sizeof(void *)), 445 OPND_CREATE_INT32(0))); 446 } 447 448 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 449 # endif /* !MSANDR_NATIVE_EXEC */ 450 // The original instruction is left untouched. The above instrumentation is just 451 // a prefix. 452 #endif /* !MSANDR_STANDALONE_TEST */ 453 } 454 455 #ifndef MSANDR_NATIVE_EXEC 456 // For use with binary search. Modules shouldn't overlap, so we shouldn't have 457 // to look at end_. If that can happen, we won't support such an application. 458 bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { 459 return left.start_ < right.start_; 460 } 461 462 // Look up the module containing PC. Should be relatively fast, as its called 463 // for each bb instrumentation. 464 ModuleData *LookupModuleByPC(app_pc pc) { 465 ModuleData fake_mod_data; 466 fake_mod_data.start_ = pc; 467 std::vector<ModuleData>::iterator it = 468 lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, 469 ModuleDataCompareStart); 470 // if (it == g_module_list.end()) 471 // return NULL; 472 if (it == g_module_list.end() || pc < it->start_) 473 --it; 474 CHECK(it->start_ <= pc); 475 if (pc >= it->end_) { 476 // We're past the end of this module. We shouldn't be in the next module, 477 // or lower_bound lied to us. 478 ++it; 479 CHECK(it == g_module_list.end() || pc < it->start_); 480 return NULL; 481 } 482 483 // OK, we found the module. 484 return &*it; 485 } 486 487 bool ShouldInstrumentNonModuleCode() { return true; } 488 489 bool ShouldInstrumentModule(ModuleData *mod_data) { 490 // TODO(rnk): Flags for blacklist would get wired in here. 491 generic_func_t p = 492 dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); 493 return !p; 494 } 495 496 bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { 497 ModuleData *mod_data = LookupModuleByPC(pc); 498 if (pmod_data) 499 *pmod_data = mod_data; 500 if (mod_data != NULL) { 501 // This module is on a blacklist. 502 if (!mod_data->should_instrument_) { 503 return false; 504 } 505 } else if (!ShouldInstrumentNonModuleCode()) { 506 return false; 507 } 508 return true; 509 } 510 #endif /* !MSANDR_NATIVE_CLIENT */ 511 512 // TODO(rnk): Make sure we instrument after __msan_init. 513 dr_emit_flags_t 514 event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, 515 bool for_trace, bool translating) { 516 #ifndef MSANDR_NATIVE_EXEC 517 app_pc pc = dr_fragment_app_pc(tag); 518 if (ShouldInstrumentPc(pc, NULL)) 519 CHECK(drutil_expand_rep_string(drcontext, bb)); 520 #else /* MSANDR_NATIVE_EXEC */ 521 CHECK(drutil_expand_rep_string(drcontext, bb)); 522 #endif /* MSANDR_NATIVE_EXEC */ 523 return DR_EMIT_PERSISTABLE; 524 } 525 526 dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, 527 bool for_trace, bool translating) { 528 app_pc pc = dr_fragment_app_pc(tag); 529 #ifndef MSANDR_NATIVE_EXEC 530 ModuleData *mod_data; 531 532 if (!ShouldInstrumentPc(pc, &mod_data)) 533 return DR_EMIT_PERSISTABLE; 534 535 if (VERBOSITY > 1) 536 dr_printf("============================================================\n"); 537 if (VERBOSITY > 0) { 538 std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>"); 539 if (mod_data && !mod_data->executed_) { 540 mod_data->executed_ = true; // Nevermind this race. 541 dr_printf("Executing from new module: %s\n", mod_path.c_str()); 542 } 543 dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, 544 mod_path.c_str(), translating ? "true" : "false"); 545 if (mod_data) { 546 // Match standard sanitizer trace format for free symbols. 547 // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 548 dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), 549 pc - mod_data->start_); 550 } 551 } 552 #endif /* !MSANDR_NATIVE_EXEC */ 553 554 if (VERBOSITY > 1) { 555 instrlist_disassemble(drcontext, pc, bb, STDOUT); 556 instr_t *instr; 557 for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { 558 dr_printf("opcode: %d\n", instr_get_opcode(instr)); 559 } 560 } 561 562 for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { 563 int opcode = instr_get_opcode(i); 564 if (opcode == OP_ret || opcode == OP_ret_far) { 565 InstrumentReturn(drcontext, bb, i); 566 continue; 567 } 568 569 // These instructions hopefully cover all cases where control is transferred 570 // to a function in a different module (we only care about calls into 571 // compiler-instrumented modules). 572 // * call_ind is used for normal indirect calls. 573 // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT 574 // stub includes a jump to an address from GOT). 575 if (opcode == OP_call_ind || opcode == OP_call_far_ind || 576 opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { 577 InstrumentIndirectBranch(drcontext, bb, i); 578 continue; 579 } 580 581 if (!WantToInstrument(i)) 582 continue; 583 584 if (VERBOSITY > 1) { 585 app_pc orig_pc = dr_fragment_app_pc(tag); 586 uint flags = instr_get_arith_flags(i); 587 dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", 588 instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); 589 } 590 591 if (instr_writes_memory(i)) { 592 // Instrument memory writes 593 // bool instrumented_anything = false; 594 for (int d = 0; d < instr_num_dsts(i); d++) { 595 opnd_t op = instr_get_dst(i, d); 596 if (!OperandIsInteresting(op)) 597 continue; 598 599 // CHECK(!instrumented_anything); 600 // instrumented_anything = true; 601 InstrumentMops(drcontext, bb, i, op, true); 602 break; // only instrumenting the first dst 603 } 604 } 605 } 606 607 // TODO: optimize away redundant restore-spill pairs? 608 609 if (VERBOSITY > 1) { 610 pc = dr_fragment_app_pc(tag); 611 dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); 612 instrlist_disassemble(drcontext, pc, bb, STDOUT); 613 } 614 return DR_EMIT_PERSISTABLE; 615 } 616 617 #ifndef MSANDR_NATIVE_EXEC 618 void event_module_load(void *drcontext, const module_data_t *info, 619 bool loaded) { 620 // Insert the module into the list while maintaining the ordering. 621 ModuleData mod_data(info); 622 std::vector<ModuleData>::iterator it = 623 upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, 624 ModuleDataCompareStart); 625 it = g_module_list.insert(it, mod_data); 626 // Check if we should instrument this module. 627 it->should_instrument_ = ShouldInstrumentModule(&*it); 628 dr_module_set_should_instrument(info->handle, it->should_instrument_); 629 630 if (VERBOSITY > 0) 631 dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", 632 info->full_path, info->start, info->end, 633 it->should_instrument_ ? "on" : "off"); 634 } 635 636 void event_module_unload(void *drcontext, const module_data_t *info) { 637 if (VERBOSITY > 0) 638 dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, 639 info->start, info->end); 640 641 // Remove the module from the list. 642 ModuleData mod_data(info); 643 std::vector<ModuleData>::iterator it = 644 lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, 645 ModuleDataCompareStart); 646 // It's a bug if we didn't actually find the module. 647 CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && 648 it->end_ == mod_data.end_ && it->path_ == mod_data.path_); 649 g_module_list.erase(it); 650 } 651 #endif /* !MSANDR_NATIVE_EXEC */ 652 653 void event_exit() { 654 // Clean up so DR doesn't tell us we're leaking memory. 655 drsys_exit(); 656 drutil_exit(); 657 drmgr_exit(); 658 659 #ifdef MSANDR_STANDALONE_TEST 660 /* free tls */ 661 bool res; 662 res = dr_raw_tls_cfree(msan_retval_tls_offset, NUM_TLS_RETVAL); 663 CHECK(res); 664 res = dr_raw_tls_cfree(msan_param_tls_offset, NUM_TLS_PARAM); 665 CHECK(res); 666 /* we do not bother to free the shadow memory */ 667 #endif /* !MSANDR_STANDALONE_TEST */ 668 if (VERBOSITY > 0) 669 dr_printf("==DRMSAN== DONE\n"); 670 } 671 672 bool event_filter_syscall(void *drcontext, int sysnum) { 673 // FIXME: only intercept syscalls with memory effects. 674 return true; /* intercept everything */ 675 } 676 677 bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { 678 CHECK(arg->valid); 679 680 if (arg->pre) 681 return true; 682 if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) 683 return true; 684 685 size_t sz = arg->size; 686 687 if (sz > 0xFFFFFFFF) { 688 drmf_status_t res; 689 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; 690 const char *name; 691 res = drsys_syscall_name(syscall, &name); 692 CHECK(res == DRMF_SUCCESS); 693 694 dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" 695 " Clipping to %llu.\n", 696 name, arg->ordinal, (unsigned long long) sz, 697 (unsigned long long)(sz & 0xFFFFFFFF)); 698 } 699 700 if (VERBOSITY > 0) { 701 drmf_status_t res; 702 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; 703 const char *name; 704 res = drsys_syscall_name(syscall, &name); 705 CHECK(res == DRMF_SUCCESS); 706 dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", 707 name, arg->ordinal, arg->start_addr, 708 (char *)arg->start_addr + sz); 709 } 710 711 // We don't switch to the app context because __msan_unpoison() doesn't need 712 // TLS segments. 713 __msan_unpoison(arg->start_addr, sz); 714 715 return true; /* keep going */ 716 } 717 718 bool event_pre_syscall(void *drcontext, int sysnum) { 719 drsys_syscall_t *syscall; 720 drsys_sysnum_t sysnum_full; 721 bool known; 722 drsys_param_type_t ret_type; 723 drmf_status_t res; 724 const char *name; 725 726 res = drsys_cur_syscall(drcontext, &syscall); 727 CHECK(res == DRMF_SUCCESS); 728 729 res = drsys_syscall_number(syscall, &sysnum_full); 730 CHECK(res == DRMF_SUCCESS); 731 CHECK(sysnum == sysnum_full.number); 732 733 res = drsys_syscall_is_known(syscall, &known); 734 CHECK(res == DRMF_SUCCESS); 735 736 res = drsys_syscall_name(syscall, &name); 737 CHECK(res == DRMF_SUCCESS); 738 739 res = drsys_syscall_return_type(syscall, &ret_type); 740 CHECK(res == DRMF_SUCCESS); 741 CHECK(ret_type != DRSYS_TYPE_INVALID); 742 CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); 743 744 res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); 745 CHECK(res == DRMF_SUCCESS); 746 747 return true; 748 } 749 750 static bool IsInLoader(void *drcontext) { 751 // TODO: This segment swap is inefficient. DR should just let us query the 752 // app segment base, which it has. Alternatively, if we disable 753 // -mangle_app_seg, then we won't need the swap. 754 bool need_swap = !dr_using_app_state(drcontext); 755 if (need_swap) 756 dr_switch_to_app_state(drcontext); 757 bool is_in_loader = __msan_is_in_loader(); 758 if (need_swap) 759 dr_switch_to_dr_state(drcontext); 760 return is_in_loader; 761 } 762 763 void event_post_syscall(void *drcontext, int sysnum) { 764 drsys_syscall_t *syscall; 765 drsys_sysnum_t sysnum_full; 766 bool success = false; 767 drmf_status_t res; 768 769 res = drsys_cur_syscall(drcontext, &syscall); 770 CHECK(res == DRMF_SUCCESS); 771 772 res = drsys_syscall_number(syscall, &sysnum_full); 773 CHECK(res == DRMF_SUCCESS); 774 CHECK(sysnum == sysnum_full.number); 775 776 res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), 777 &success); 778 CHECK(res == DRMF_SUCCESS); 779 780 if (success) { 781 res = 782 drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); 783 CHECK(res == DRMF_SUCCESS); 784 } 785 786 // Our normal mmap interceptor can't intercept calls from the loader itself. 787 // This means we don't clear the shadow for calls to dlopen. For now, we 788 // solve this by intercepting mmap from ld.so here, but ideally we'd have a 789 // solution that doesn't rely on msandr. 790 // 791 // Be careful not to intercept maps done by the msan rtl. Otherwise we end up 792 // unpoisoning vast regions of memory and OOMing. 793 // TODO: __msan_unpoison() could "flush" large regions of memory like tsan 794 // does instead of doing a large memset. However, we need the memory to be 795 // zeroed, where as tsan does not, so plain madvise is not enough. 796 if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { 797 if (IsInLoader(drcontext)) { 798 app_pc base = (app_pc)dr_syscall_get_result(drcontext); 799 ptr_uint_t size; 800 drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); 801 CHECK(res == DRMF_SUCCESS); 802 if (VERBOSITY > 0) 803 dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); 804 // We don't switch to the app context because __msan_unpoison() doesn't 805 // need TLS segments. 806 __msan_unpoison(base, size); 807 } 808 } 809 } 810 811 } // namespace 812 813 DR_EXPORT void dr_init(client_id_t id) { 814 drmf_status_t res; 815 816 drmgr_init(); 817 drutil_init(); 818 819 #ifndef MSANDR_NATIVE_EXEC 820 // We should use drconfig to ignore these applications. 821 std::string app_name = dr_get_application_name(); 822 // This blacklist will still run these apps through DR's code cache. On the 823 // other hand, we are able to follow children of these apps. 824 // FIXME: Once DR has detach, we could just detach here. Alternatively, 825 // if DR had a fork or exec hook to let us decide there, that would be nice. 826 // FIXME: make the blacklist cmd-adjustable. 827 if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || 828 app_name == "sh" || app_name == "true" || app_name == "exit" || 829 app_name == "yes" || app_name == "echo") 830 return; 831 #endif /* !MSANDR_NATIVE_EXEC */ 832 833 drsys_options_t ops; 834 memset(&ops, 0, sizeof(ops)); 835 ops.struct_size = sizeof(ops); 836 ops.analyze_unknown_syscalls = false; 837 838 res = drsys_init(id, &ops); 839 CHECK(res == DRMF_SUCCESS); 840 841 dr_register_filter_syscall_event(event_filter_syscall); 842 drmgr_register_pre_syscall_event(event_pre_syscall); 843 drmgr_register_post_syscall_event(event_post_syscall); 844 res = drsys_filter_all_syscalls(); 845 CHECK(res == DRMF_SUCCESS); 846 847 #ifdef MSANDR_STANDALONE_TEST 848 reg_id_t reg_seg; 849 /* alloc tls */ 850 if (!dr_raw_tls_calloc(®_seg, &mock_msan_retval_tls_offset, NUM_TLS_RETVAL, 0)) 851 CHECK(false); 852 CHECK(reg_seg == DR_SEG_GS /* x64 only! */); 853 if (!dr_raw_tls_calloc(®_seg, &mock_msan_param_tls_offset, NUM_TLS_PARAM, 0)) 854 CHECK(false); 855 CHECK(reg_seg == DR_SEG_GS /* x64 only! */); 856 /* alloc shadow memory */ 857 if (mmap(SHADOW_MEMORY_BASE, SHADOW_MEMORY_SIZE, PROT_READ|PROT_WRITE, 858 MAP_PRIVATE | MAP_ANON, -1, 0) != SHADOW_MEMORY_BASE) { 859 CHECK(false); 860 } 861 #endif /* MSANDR_STANDALONE_TEST */ 862 InitializeMSanCallbacks(); 863 864 // FIXME: the shadow is initialized earlier when DR calls one of our wrapper 865 // functions. This may change one day. 866 // TODO: make this more robust. 867 868 void *drcontext = dr_get_current_drcontext(); 869 870 dr_switch_to_app_state(drcontext); 871 msan_retval_tls_offset = __msan_get_retval_tls_offset(); 872 msan_param_tls_offset = __msan_get_param_tls_offset(); 873 dr_switch_to_dr_state(drcontext); 874 if (VERBOSITY > 0) { 875 dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); 876 dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); 877 } 878 879 // Standard DR events. 880 dr_register_exit_event(event_exit); 881 882 drmgr_priority_t priority = { 883 sizeof(priority), /* size of struct */ 884 "msandr", /* name of our operation */ 885 NULL, /* optional name of operation we should precede */ 886 NULL, /* optional name of operation we should follow */ 887 0 888 }; /* numeric priority */ 889 890 drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); 891 drmgr_register_bb_instru2instru_event(event_basic_block, &priority); 892 #ifndef MSANDR_NATIVE_EXEC 893 drmgr_register_module_load_event(event_module_load); 894 drmgr_register_module_unload_event(event_module_unload); 895 #endif /* MSANDR_NATIVE_EXEC */ 896 __msan_dr_is_initialized(); 897 __msan_set_indirect_call_wrapper(dr_app_handle_mbr_target); 898 if (VERBOSITY > 0) 899 dr_printf("==MSANDR== Starting!\n"); 900 } 901