1 //===-- msandr.cc ---------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of MemorySanitizer. 11 // 12 // DynamoRio client for MemorySanitizer. 13 // 14 // MemorySanitizer requires that all program code is instrumented. Any memory 15 // store that can turn an uninitialized value into an initialized value must be 16 // observed by the tool, otherwise we risk reporting a false UMR. 17 // 18 // This also includes any libraries that the program depends on. 19 // 20 // In the case when rebuilding all program dependencies with MemorySanitizer is 21 // problematic, an experimental MSanDR tool (the code you are currently looking 22 // at) can be used. It is a DynamoRio-based tool that uses dynamic 23 // instrumentation to 24 // * Unpoison all memory stores. 25 // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and 26 // return value shadow on anything that looks like a function call or a return 27 // from a function. 28 // 29 // This tool does not detect the use of uninitialized values in uninstrumented 30 // libraries. It merely gets rid of false positives by marking all data that 31 // passes through uninstrumented code as fully initialized. 32 //===----------------------------------------------------------------------===// 33 34 #include <dr_api.h> 35 #include <drutil.h> 36 #include <drmgr.h> 37 #include <drsyscall.h> 38 39 #include <sys/mman.h> 40 #include <sys/syscall.h> /* for SYS_mmap */ 41 42 #include <algorithm> 43 #include <string> 44 #include <set> 45 #include <vector> 46 #include <string.h> 47 48 using std::string; 49 50 #define TESTALL(mask, var) (((mask) & (var)) == (mask)) 51 #define TESTANY(mask, var) (((mask) & (var)) != 0) 52 53 #define CHECK_IMPL(condition, file, line) \ 54 do { \ 55 if (!(condition)) { \ 56 dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ 57 dr_abort(); \ 58 } \ 59 } while (0) // TODO: stacktrace 60 61 #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) 62 63 #define VERBOSITY 0 64 65 namespace { 66 67 class ModuleData { 68 public: 69 ModuleData(); 70 ModuleData(const module_data_t *info); 71 // Yes, we want default copy, assign, and dtor semantics. 72 73 public: 74 app_pc start_; 75 app_pc end_; 76 // Full path to the module. 77 string path_; 78 module_handle_t handle_; 79 bool should_instrument_; 80 bool executed_; 81 }; 82 83 string g_app_path; 84 85 int msan_retval_tls_offset; 86 int msan_param_tls_offset; 87 88 // A vector of loaded modules sorted by module bounds. We lookup the current PC 89 // in here from the bb event. This is better than an rb tree because the lookup 90 // is faster and the bb event occurs far more than the module load event. 91 std::vector<ModuleData> g_module_list; 92 93 ModuleData::ModuleData() 94 : start_(NULL), end_(NULL), path_(""), handle_(NULL), 95 should_instrument_(false), executed_(false) { 96 } 97 98 ModuleData::ModuleData(const module_data_t *info) 99 : start_(info->start), end_(info->end), path_(info->full_path), 100 handle_(info->handle), 101 // We'll check the black/white lists later and adjust this. 102 should_instrument_(true), executed_(false) { 103 } 104 105 int(*__msan_get_retval_tls_offset)(); 106 int(*__msan_get_param_tls_offset)(); 107 void (*__msan_unpoison)(void *base, size_t size); 108 bool (*__msan_is_in_loader)(); 109 110 static generic_func_t LookupCallback(module_data_t *app, const char *name) { 111 generic_func_t callback = dr_get_proc_address(app->handle, name); 112 if (callback == NULL) { 113 dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); 114 CHECK(callback); 115 } 116 return callback; 117 } 118 119 void InitializeMSanCallbacks() { 120 module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); 121 if (!app) { 122 dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", 123 dr_get_application_name()); 124 CHECK(app); 125 } 126 g_app_path = app->full_path; 127 128 __msan_get_retval_tls_offset = (int (*)()) 129 LookupCallback(app, "__msan_get_retval_tls_offset"); 130 __msan_get_param_tls_offset = (int (*)()) 131 LookupCallback(app, "__msan_get_param_tls_offset"); 132 __msan_unpoison = (void(*)(void *, size_t)) 133 LookupCallback(app, "__msan_unpoison"); 134 __msan_is_in_loader = (bool (*)()) 135 LookupCallback(app, "__msan_is_in_loader"); 136 137 dr_free_module_data(app); 138 } 139 140 // FIXME: Handle absolute addresses and PC-relative addresses. 141 // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have 142 // a zero base anyway. 143 bool OperandIsInteresting(opnd_t opnd) { 144 return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && 145 opnd_get_segment(opnd) != DR_SEG_GS); 146 } 147 148 bool WantToInstrument(instr_t *instr) { 149 // TODO: skip push instructions? 150 switch (instr_get_opcode(instr)) { 151 // FIXME: support the instructions excluded below: 152 case OP_rep_cmps: 153 // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx 154 return false; 155 } 156 157 // Labels appear due to drutil_expand_rep_string() 158 if (instr_is_label(instr)) 159 return false; 160 161 CHECK(instr_ok_to_mangle(instr) == true); 162 163 if (instr_writes_memory(instr)) { 164 for (int d = 0; d < instr_num_dsts(instr); d++) { 165 opnd_t op = instr_get_dst(instr, d); 166 if (OperandIsInteresting(op)) 167 return true; 168 } 169 } 170 171 return false; 172 } 173 174 #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); 175 #define PREF(at, what) instrlist_meta_preinsert(bb, at, what); 176 177 void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, 178 bool is_write) { 179 bool need_to_restore_eflags = false; 180 uint flags = instr_get_arith_flags(instr); 181 // TODO: do something smarter with flags and spills in general? 182 // For example, spill them only once for a sequence of instrumented 183 // instructions that don't change/read flags. 184 185 if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { 186 if (VERBOSITY > 1) 187 dr_printf("Spilling eflags...\n"); 188 need_to_restore_eflags = true; 189 // TODO: Maybe sometimes don't need to 'seto'. 190 // TODO: Maybe sometimes don't want to spill XAX here? 191 // TODO: No need to spill XAX here if XAX is not used in the BB. 192 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 193 dr_save_arith_flags_to_xax(drcontext, bb, instr); 194 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); 195 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 196 } 197 198 #if 0 199 dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", 200 opnd_is_memory_reference(op), opnd_is_base_disp(op), 201 opnd_is_base_disp(op) ? opnd_get_index(op) : -1, 202 opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), 203 opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); 204 #endif 205 206 reg_id_t R1; 207 bool address_in_R1 = false; 208 if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && 209 opnd_get_disp(op) == 0) { 210 // If this is a simple access with no offset or index, we can just use the 211 // base for R1. 212 address_in_R1 = true; 213 R1 = opnd_get_base(op); 214 } else { 215 // Otherwise, we need to compute the addr into R1. 216 // TODO: reuse some spare register? e.g. r15 on x64 217 // TODO: might be used as a non-mem-ref register? 218 R1 = DR_REG_XAX; 219 } 220 CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. 221 222 // Pick R2 that's not R1 or used by the operand. It's OK if the instr uses 223 // R2 elsewhere, since we'll restore it before instr. 224 reg_id_t GPR_TO_USE_FOR_R2[] = { 225 DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX 226 // Don't forget to update the +4 below if you add anything else! 227 }; 228 std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4); 229 unused_registers.erase(R1); 230 for (int j = 0; j < opnd_num_regs_used(op); j++) { 231 unused_registers.erase(opnd_get_reg_used(op, j)); 232 } 233 234 CHECK(unused_registers.size() > 0); 235 reg_id_t R2 = *unused_registers.begin(); 236 CHECK(R1 != R2); 237 238 // Save the current values of R1 and R2. 239 dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); 240 // TODO: Something smarter than spilling a "fixed" register R2? 241 dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); 242 243 if (!address_in_R1) 244 CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); 245 PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), 246 OPND_CREATE_INT64(0xffffbfffffffffff))); 247 PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); 248 // There is no mov_st of a 64-bit immediate, so... 249 opnd_size_t op_size = opnd_get_size(op); 250 CHECK(op_size != OPSZ_NA); 251 uint access_size = opnd_size_in_bytes(op_size); 252 if (access_size <= 4) { 253 PRE(instr, 254 mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), 255 opnd_create_immed_int((ptr_int_t) 0, op_size))); 256 } else { 257 // FIXME: tail? 258 for (uint ofs = 0; ofs < access_size; ofs += 4) { 259 PRE(instr, 260 mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0))); 261 } 262 } 263 264 // Restore the registers and flags. 265 dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); 266 dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); 267 268 if (need_to_restore_eflags) { 269 if (VERBOSITY > 1) 270 dr_printf("Restoring eflags\n"); 271 // TODO: Check if it's reverse to the dr_restore_reg above and optimize. 272 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 273 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); 274 dr_restore_arith_flags_from_xax(drcontext, bb, instr); 275 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 276 } 277 278 // The original instruction is left untouched. The above instrumentation is just 279 // a prefix. 280 } 281 282 void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { 283 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 284 285 // Clobbers nothing except xax. 286 bool res = 287 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); 288 CHECK(res); 289 290 // TODO: unpoison more bytes? 291 PRE(instr, 292 mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), 293 OPND_CREATE_INT32(0))); 294 295 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 296 297 // The original instruction is left untouched. The above instrumentation is just 298 // a prefix. 299 } 300 301 void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, 302 instr_t *instr) { 303 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 304 305 // Clobbers nothing except xax. 306 bool res = 307 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); 308 CHECK(res); 309 310 // TODO: unpoison more bytes? 311 for (int i = 0; i < 6; ++i) { 312 PRE(instr, 313 mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + 314 i * sizeof(void *)), 315 OPND_CREATE_INT32(0))); 316 } 317 318 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); 319 320 // The original instruction is left untouched. The above instrumentation is just 321 // a prefix. 322 } 323 324 // For use with binary search. Modules shouldn't overlap, so we shouldn't have 325 // to look at end_. If that can happen, we won't support such an application. 326 bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { 327 return left.start_ < right.start_; 328 } 329 330 // Look up the module containing PC. Should be relatively fast, as its called 331 // for each bb instrumentation. 332 ModuleData *LookupModuleByPC(app_pc pc) { 333 ModuleData fake_mod_data; 334 fake_mod_data.start_ = pc; 335 std::vector<ModuleData>::iterator it = 336 lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, 337 ModuleDataCompareStart); 338 // if (it == g_module_list.end()) 339 // return NULL; 340 if (it == g_module_list.end() || pc < it->start_) 341 --it; 342 CHECK(it->start_ <= pc); 343 if (pc >= it->end_) { 344 // We're past the end of this module. We shouldn't be in the next module, 345 // or lower_bound lied to us. 346 ++it; 347 CHECK(it == g_module_list.end() || pc < it->start_); 348 return NULL; 349 } 350 351 // OK, we found the module. 352 return &*it; 353 } 354 355 bool ShouldInstrumentNonModuleCode() { return true; } 356 357 bool ShouldInstrumentModule(ModuleData *mod_data) { 358 // TODO(rnk): Flags for blacklist would get wired in here. 359 generic_func_t p = 360 dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); 361 return !p; 362 } 363 364 bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { 365 ModuleData *mod_data = LookupModuleByPC(pc); 366 if (pmod_data) 367 *pmod_data = mod_data; 368 if (mod_data != NULL) { 369 // This module is on a blacklist. 370 if (!mod_data->should_instrument_) { 371 return false; 372 } 373 } else if (!ShouldInstrumentNonModuleCode()) { 374 return false; 375 } 376 return true; 377 } 378 379 // TODO(rnk): Make sure we instrument after __msan_init. 380 dr_emit_flags_t 381 event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, 382 bool for_trace, bool translating) { 383 app_pc pc = dr_fragment_app_pc(tag); 384 385 if (ShouldInstrumentPc(pc, NULL)) 386 CHECK(drutil_expand_rep_string(drcontext, bb)); 387 388 return DR_EMIT_PERSISTABLE; 389 } 390 391 dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, 392 bool for_trace, bool translating) { 393 app_pc pc = dr_fragment_app_pc(tag); 394 ModuleData *mod_data; 395 396 if (!ShouldInstrumentPc(pc, &mod_data)) 397 return DR_EMIT_PERSISTABLE; 398 399 if (VERBOSITY > 1) 400 dr_printf("============================================================\n"); 401 if (VERBOSITY > 0) { 402 string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>"); 403 if (mod_data && !mod_data->executed_) { 404 mod_data->executed_ = true; // Nevermind this race. 405 dr_printf("Executing from new module: %s\n", mod_path.c_str()); 406 } 407 dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, 408 mod_path.c_str(), translating ? "true" : "false"); 409 if (mod_data) { 410 // Match standard sanitizer trace format for free symbols. 411 // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 412 dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), 413 pc - mod_data->start_); 414 } 415 } 416 if (VERBOSITY > 1) { 417 instrlist_disassemble(drcontext, pc, bb, STDOUT); 418 instr_t *instr; 419 for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { 420 dr_printf("opcode: %d\n", instr_get_opcode(instr)); 421 } 422 } 423 424 for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { 425 int opcode = instr_get_opcode(i); 426 if (opcode == OP_ret || opcode == OP_ret_far) { 427 InstrumentReturn(drcontext, bb, i); 428 continue; 429 } 430 431 // These instructions hopefully cover all cases where control is transferred 432 // to a function in a different module (we only care about calls into 433 // compiler-instrumented modules). 434 // * call_ind is used for normal indirect calls. 435 // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT 436 // stub includes a jump to an address from GOT). 437 if (opcode == OP_call_ind || opcode == OP_call_far_ind || 438 opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { 439 InstrumentIndirectBranch(drcontext, bb, i); 440 continue; 441 } 442 443 if (!WantToInstrument(i)) 444 continue; 445 446 if (VERBOSITY > 1) { 447 app_pc orig_pc = dr_fragment_app_pc(tag); 448 uint flags = instr_get_arith_flags(i); 449 dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", 450 instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); 451 } 452 453 if (instr_writes_memory(i)) { 454 // Instrument memory writes 455 // bool instrumented_anything = false; 456 for (int d = 0; d < instr_num_dsts(i); d++) { 457 opnd_t op = instr_get_dst(i, d); 458 if (!OperandIsInteresting(op)) 459 continue; 460 461 // CHECK(!instrumented_anything); 462 // instrumented_anything = true; 463 InstrumentMops(drcontext, bb, i, op, true); 464 break; // only instrumenting the first dst 465 } 466 } 467 } 468 469 // TODO: optimize away redundant restore-spill pairs? 470 471 if (VERBOSITY > 1) { 472 pc = dr_fragment_app_pc(tag); 473 dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); 474 instrlist_disassemble(drcontext, pc, bb, STDOUT); 475 } 476 return DR_EMIT_PERSISTABLE; 477 } 478 479 void event_module_load(void *drcontext, const module_data_t *info, 480 bool loaded) { 481 // Insert the module into the list while maintaining the ordering. 482 ModuleData mod_data(info); 483 std::vector<ModuleData>::iterator it = 484 upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, 485 ModuleDataCompareStart); 486 it = g_module_list.insert(it, mod_data); 487 // Check if we should instrument this module. 488 it->should_instrument_ = ShouldInstrumentModule(&*it); 489 dr_module_set_should_instrument(info->handle, it->should_instrument_); 490 491 if (VERBOSITY > 0) 492 dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", 493 info->full_path, info->start, info->end, 494 it->should_instrument_ ? "on" : "off"); 495 } 496 497 void event_module_unload(void *drcontext, const module_data_t *info) { 498 if (VERBOSITY > 0) 499 dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, 500 info->start, info->end); 501 502 // Remove the module from the list. 503 ModuleData mod_data(info); 504 std::vector<ModuleData>::iterator it = 505 lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, 506 ModuleDataCompareStart); 507 // It's a bug if we didn't actually find the module. 508 CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && 509 it->end_ == mod_data.end_ && it->path_ == mod_data.path_); 510 g_module_list.erase(it); 511 } 512 513 void event_exit() { 514 // Clean up so DR doesn't tell us we're leaking memory. 515 drsys_exit(); 516 drutil_exit(); 517 drmgr_exit(); 518 519 if (VERBOSITY > 0) 520 dr_printf("==DRMSAN== DONE\n"); 521 } 522 523 bool event_filter_syscall(void *drcontext, int sysnum) { 524 // FIXME: only intercept syscalls with memory effects. 525 return true; /* intercept everything */ 526 } 527 528 bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { 529 CHECK(arg->valid); 530 531 if (arg->pre) 532 return true; 533 if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) 534 return true; 535 536 size_t sz = arg->size; 537 538 if (sz > 0xFFFFFFFF) { 539 drmf_status_t res; 540 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; 541 const char *name; 542 res = drsys_syscall_name(syscall, &name); 543 CHECK(res == DRMF_SUCCESS); 544 545 dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" 546 " Clipping to %llu.\n", 547 name, arg->ordinal, (unsigned long long) sz, 548 (unsigned long long)(sz & 0xFFFFFFFF)); 549 } 550 551 if (VERBOSITY > 0) { 552 drmf_status_t res; 553 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; 554 const char *name; 555 res = drsys_syscall_name(syscall, &name); 556 dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", 557 name, arg->ordinal, arg->start_addr, 558 (char *)arg->start_addr + sz); 559 } 560 561 // We don't switch to the app context because __msan_unpoison() doesn't need 562 // TLS segments. 563 __msan_unpoison(arg->start_addr, sz); 564 565 return true; /* keep going */ 566 } 567 568 bool event_pre_syscall(void *drcontext, int sysnum) { 569 drsys_syscall_t *syscall; 570 drsys_sysnum_t sysnum_full; 571 bool known; 572 drsys_param_type_t ret_type; 573 drmf_status_t res; 574 const char *name; 575 576 res = drsys_cur_syscall(drcontext, &syscall); 577 CHECK(res == DRMF_SUCCESS); 578 579 res = drsys_syscall_number(syscall, &sysnum_full); 580 CHECK(res == DRMF_SUCCESS); 581 CHECK(sysnum == sysnum_full.number); 582 583 res = drsys_syscall_is_known(syscall, &known); 584 CHECK(res == DRMF_SUCCESS); 585 586 res = drsys_syscall_name(syscall, &name); 587 CHECK(res == DRMF_SUCCESS); 588 589 res = drsys_syscall_return_type(syscall, &ret_type); 590 CHECK(res == DRMF_SUCCESS); 591 CHECK(ret_type != DRSYS_TYPE_INVALID); 592 CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); 593 594 res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); 595 CHECK(res == DRMF_SUCCESS); 596 597 return true; 598 } 599 600 static bool IsInLoader(void *drcontext) { 601 // TODO: This segment swap is inefficient. DR should just let us query the 602 // app segment base, which it has. Alternatively, if we disable 603 // -mangle_app_seg, then we won't need the swap. 604 bool need_swap = !dr_using_app_state(drcontext); 605 if (need_swap) 606 dr_switch_to_app_state(drcontext); 607 bool is_in_loader = __msan_is_in_loader(); 608 if (need_swap) 609 dr_switch_to_dr_state(drcontext); 610 return is_in_loader; 611 } 612 613 void event_post_syscall(void *drcontext, int sysnum) { 614 drsys_syscall_t *syscall; 615 drsys_sysnum_t sysnum_full; 616 bool success = false; 617 drmf_status_t res; 618 619 res = drsys_cur_syscall(drcontext, &syscall); 620 CHECK(res == DRMF_SUCCESS); 621 622 res = drsys_syscall_number(syscall, &sysnum_full); 623 CHECK(res == DRMF_SUCCESS); 624 CHECK(sysnum == sysnum_full.number); 625 626 res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), 627 &success); 628 CHECK(res == DRMF_SUCCESS); 629 630 if (success) { 631 res = 632 drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); 633 CHECK(res == DRMF_SUCCESS); 634 } 635 636 // Our normal mmap interceptor can't intercept calls from the loader itself. 637 // This means we don't clear the shadow for calls to dlopen. For now, we 638 // solve this by intercepting mmap from ld.so here, but ideally we'd have a 639 // solution that doesn't rely on msandr. 640 // 641 // Be careful not to intercept maps done by the msan rtl. Otherwise we end up 642 // unpoisoning vast regions of memory and OOMing. 643 // TODO: __msan_unpoison() could "flush" large regions of memory like tsan 644 // does instead of doing a large memset. However, we need the memory to be 645 // zeroed, where as tsan does not, so plain madvise is not enough. 646 if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { 647 if (IsInLoader(drcontext)) { 648 app_pc base = (app_pc)dr_syscall_get_result(drcontext); 649 ptr_uint_t size; 650 drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); 651 CHECK(res == DRMF_SUCCESS); 652 if (VERBOSITY > 0) 653 dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); 654 // We don't switch to the app context because __msan_unpoison() doesn't 655 // need TLS segments. 656 __msan_unpoison(base, size); 657 } 658 } 659 } 660 661 } // namespace 662 663 DR_EXPORT void dr_init(client_id_t id) { 664 drmf_status_t res; 665 666 drmgr_init(); 667 drutil_init(); 668 669 string app_name = dr_get_application_name(); 670 // This blacklist will still run these apps through DR's code cache. On the 671 // other hand, we are able to follow children of these apps. 672 // FIXME: Once DR has detach, we could just detach here. Alternatively, 673 // if DR had a fork or exec hook to let us decide there, that would be nice. 674 // FIXME: make the blacklist cmd-adjustable. 675 if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || 676 app_name == "sh" || app_name == "true" || app_name == "exit" || 677 app_name == "yes" || app_name == "echo") 678 return; 679 680 drsys_options_t ops; 681 memset(&ops, 0, sizeof(ops)); 682 ops.struct_size = sizeof(ops); 683 ops.analyze_unknown_syscalls = false; 684 685 res = drsys_init(id, &ops); 686 CHECK(res == DRMF_SUCCESS); 687 688 dr_register_filter_syscall_event(event_filter_syscall); 689 drmgr_register_pre_syscall_event(event_pre_syscall); 690 drmgr_register_post_syscall_event(event_post_syscall); 691 res = drsys_filter_all_syscalls(); 692 CHECK(res == DRMF_SUCCESS); 693 694 InitializeMSanCallbacks(); 695 696 // FIXME: the shadow is initialized earlier when DR calls one of our wrapper 697 // functions. This may change one day. 698 // TODO: make this more robust. 699 700 void *drcontext = dr_get_current_drcontext(); 701 702 dr_switch_to_app_state(drcontext); 703 msan_retval_tls_offset = __msan_get_retval_tls_offset(); 704 msan_param_tls_offset = __msan_get_param_tls_offset(); 705 dr_switch_to_dr_state(drcontext); 706 if (VERBOSITY > 0) { 707 dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); 708 dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); 709 } 710 711 // Standard DR events. 712 dr_register_exit_event(event_exit); 713 714 drmgr_priority_t priority = { 715 sizeof(priority), /* size of struct */ 716 "msandr", /* name of our operation */ 717 NULL, /* optional name of operation we should precede */ 718 NULL, /* optional name of operation we should follow */ 719 0 720 }; /* numeric priority */ 721 722 drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); 723 drmgr_register_bb_instru2instru_event(event_basic_block, &priority); 724 drmgr_register_module_load_event(event_module_load); 725 drmgr_register_module_unload_event(event_module_unload); 726 if (VERBOSITY > 0) 727 dr_printf("==MSANDR== Starting!\n"); 728 } 729