1 //===-- dfsan.cc ----------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of DataFlowSanitizer. 11 // 12 // DataFlowSanitizer runtime. This file defines the public interface to 13 // DataFlowSanitizer as well as the definition of certain runtime functions 14 // called automatically by the compiler (specifically the instrumentation pass 15 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp). 16 // 17 // The public interface is defined in include/sanitizer/dfsan_interface.h whose 18 // functions are prefixed dfsan_ while the compiler interface functions are 19 // prefixed __dfsan_. 20 //===----------------------------------------------------------------------===// 21 22 #include "sanitizer_common/sanitizer_atomic.h" 23 #include "sanitizer_common/sanitizer_common.h" 24 #include "sanitizer_common/sanitizer_flags.h" 25 #include "sanitizer_common/sanitizer_flag_parser.h" 26 #include "sanitizer_common/sanitizer_libc.h" 27 28 #include "dfsan/dfsan.h" 29 30 using namespace __dfsan; 31 32 typedef atomic_uint16_t atomic_dfsan_label; 33 static const dfsan_label kInitializingLabel = -1; 34 35 static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8); 36 37 static atomic_dfsan_label __dfsan_last_label; 38 static dfsan_label_info __dfsan_label_info[kNumLabels]; 39 40 Flags __dfsan::flags_data; 41 42 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls; 43 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64]; 44 45 // On Linux/x86_64, memory is laid out as follows: 46 // 47 // +--------------------+ 0x800000000000 (top of memory) 48 // | application memory | 49 // +--------------------+ 0x700000008000 (kAppAddr) 50 // | | 51 // | unused | 52 // | | 53 // +--------------------+ 0x200200000000 (kUnusedAddr) 54 // | union table | 55 // +--------------------+ 0x200000000000 (kUnionTableAddr) 56 // | shadow memory | 57 // +--------------------+ 0x000000010000 (kShadowAddr) 58 // | reserved by kernel | 59 // +--------------------+ 0x000000000000 60 // 61 // To derive a shadow memory address from an application memory address, 62 // bits 44-46 are cleared to bring the address into the range 63 // [0x000000008000,0x100000000000). Then the address is shifted left by 1 to 64 // account for the double byte representation of shadow labels and move the 65 // address into the shadow memory range. See the function shadow_for below. 66 67 // On Linux/MIPS64, memory is laid out as follows: 68 // 69 // +--------------------+ 0x10000000000 (top of memory) 70 // | application memory | 71 // +--------------------+ 0xF000008000 (kAppAddr) 72 // | | 73 // | unused | 74 // | | 75 // +--------------------+ 0x2200000000 (kUnusedAddr) 76 // | union table | 77 // +--------------------+ 0x2000000000 (kUnionTableAddr) 78 // | shadow memory | 79 // +--------------------+ 0x0000010000 (kShadowAddr) 80 // | reserved by kernel | 81 // +--------------------+ 0x0000000000 82 83 typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels]; 84 85 #if defined(__x86_64__) 86 static const uptr kShadowAddr = 0x10000; 87 static const uptr kUnionTableAddr = 0x200000000000; 88 static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t); 89 static const uptr kAppAddr = 0x700000008000; 90 #elif defined(__mips64) 91 static const uptr kShadowAddr = 0x10000; 92 static const uptr kUnionTableAddr = 0x2000000000; 93 static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t); 94 static const uptr kAppAddr = 0xF000008000; 95 #else 96 # error "DFSan not supported for this platform!" 97 #endif 98 99 static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) { 100 return &(*(dfsan_union_table_t *) kUnionTableAddr)[l1][l2]; 101 } 102 103 // Checks we do not run out of labels. 104 static void dfsan_check_label(dfsan_label label) { 105 if (label == kInitializingLabel) { 106 Report("FATAL: DataFlowSanitizer: out of labels\n"); 107 Die(); 108 } 109 } 110 111 // Resolves the union of two unequal labels. Nonequality is a precondition for 112 // this function (the instrumentation pass inlines the equality test). 113 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 114 dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) { 115 DCHECK_NE(l1, l2); 116 117 if (l1 == 0) 118 return l2; 119 if (l2 == 0) 120 return l1; 121 122 if (l1 > l2) 123 Swap(l1, l2); 124 125 atomic_dfsan_label *table_ent = union_table(l1, l2); 126 // We need to deal with the case where two threads concurrently request 127 // a union of the same pair of labels. If the table entry is uninitialized, 128 // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel 129 // (i.e. -1) to mark that we are initializing it. 130 dfsan_label label = 0; 131 if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel, 132 memory_order_acquire)) { 133 // Check whether l2 subsumes l1. We don't need to check whether l1 134 // subsumes l2 because we are guaranteed here that l1 < l2, and (at least 135 // in the cases we are interested in) a label may only subsume labels 136 // created earlier (i.e. with a lower numerical value). 137 if (__dfsan_label_info[l2].l1 == l1 || 138 __dfsan_label_info[l2].l2 == l1) { 139 label = l2; 140 } else { 141 label = 142 atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1; 143 dfsan_check_label(label); 144 __dfsan_label_info[label].l1 = l1; 145 __dfsan_label_info[label].l2 = l2; 146 } 147 atomic_store(table_ent, label, memory_order_release); 148 } else if (label == kInitializingLabel) { 149 // Another thread is initializing the entry. Wait until it is finished. 150 do { 151 internal_sched_yield(); 152 label = atomic_load(table_ent, memory_order_acquire); 153 } while (label == kInitializingLabel); 154 } 155 return label; 156 } 157 158 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 159 dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) { 160 dfsan_label label = ls[0]; 161 for (uptr i = 1; i != n; ++i) { 162 dfsan_label next_label = ls[i]; 163 if (label != next_label) 164 label = __dfsan_union(label, next_label); 165 } 166 return label; 167 } 168 169 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 170 void __dfsan_unimplemented(char *fname) { 171 if (flags().warn_unimplemented) 172 Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n", 173 fname); 174 } 175 176 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function 177 // to try to figure out where labels are being introduced in a nominally 178 // label-free program. 179 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() { 180 if (flags().warn_nonzero_labels) 181 Report("WARNING: DataFlowSanitizer: saw nonzero label\n"); 182 } 183 184 // Indirect call to an uninstrumented vararg function. We don't have a way of 185 // handling these at the moment. 186 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void 187 __dfsan_vararg_wrapper(const char *fname) { 188 Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg " 189 "function %s\n", fname); 190 Die(); 191 } 192 193 // Like __dfsan_union, but for use from the client or custom functions. Hence 194 // the equality comparison is done here before calling __dfsan_union. 195 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 196 dfsan_union(dfsan_label l1, dfsan_label l2) { 197 if (l1 == l2) 198 return l1; 199 return __dfsan_union(l1, l2); 200 } 201 202 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 203 dfsan_label dfsan_create_label(const char *desc, void *userdata) { 204 dfsan_label label = 205 atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1; 206 dfsan_check_label(label); 207 __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0; 208 __dfsan_label_info[label].desc = desc; 209 __dfsan_label_info[label].userdata = userdata; 210 return label; 211 } 212 213 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 214 void __dfsan_set_label(dfsan_label label, void *addr, uptr size) { 215 for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) { 216 // Don't write the label if it is already the value we need it to be. 217 // In a program where most addresses are not labeled, it is common that 218 // a page of shadow memory is entirely zeroed. The Linux copy-on-write 219 // implementation will share all of the zeroed pages, making a copy of a 220 // page when any value is written. The un-sharing will happen even if 221 // the value written does not change the value in memory. Avoiding the 222 // write when both |label| and |*labelp| are zero dramatically reduces 223 // the amount of real memory used by large programs. 224 if (label == *labelp) 225 continue; 226 227 *labelp = label; 228 } 229 } 230 231 SANITIZER_INTERFACE_ATTRIBUTE 232 void dfsan_set_label(dfsan_label label, void *addr, uptr size) { 233 __dfsan_set_label(label, addr, size); 234 } 235 236 SANITIZER_INTERFACE_ATTRIBUTE 237 void dfsan_add_label(dfsan_label label, void *addr, uptr size) { 238 for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) 239 if (*labelp != label) 240 *labelp = __dfsan_union(*labelp, label); 241 } 242 243 // Unlike the other dfsan interface functions the behavior of this function 244 // depends on the label of one of its arguments. Hence it is implemented as a 245 // custom function. 246 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 247 __dfsw_dfsan_get_label(long data, dfsan_label data_label, 248 dfsan_label *ret_label) { 249 *ret_label = 0; 250 return data_label; 251 } 252 253 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 254 dfsan_read_label(const void *addr, uptr size) { 255 if (size == 0) 256 return 0; 257 return __dfsan_union_load(shadow_for(addr), size); 258 } 259 260 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 261 const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) { 262 return &__dfsan_label_info[label]; 263 } 264 265 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int 266 dfsan_has_label(dfsan_label label, dfsan_label elem) { 267 if (label == elem) 268 return true; 269 const dfsan_label_info *info = dfsan_get_label_info(label); 270 if (info->l1 != 0) { 271 return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem); 272 } else { 273 return false; 274 } 275 } 276 277 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 278 dfsan_has_label_with_desc(dfsan_label label, const char *desc) { 279 const dfsan_label_info *info = dfsan_get_label_info(label); 280 if (info->l1 != 0) { 281 return dfsan_has_label_with_desc(info->l1, desc) || 282 dfsan_has_label_with_desc(info->l2, desc); 283 } else { 284 return internal_strcmp(desc, info->desc) == 0; 285 } 286 } 287 288 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr 289 dfsan_get_label_count(void) { 290 dfsan_label max_label_allocated = 291 atomic_load(&__dfsan_last_label, memory_order_relaxed); 292 293 return static_cast<uptr>(max_label_allocated); 294 } 295 296 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void 297 dfsan_dump_labels(int fd) { 298 dfsan_label last_label = 299 atomic_load(&__dfsan_last_label, memory_order_relaxed); 300 301 for (uptr l = 1; l <= last_label; ++l) { 302 char buf[64]; 303 internal_snprintf(buf, sizeof(buf), "%u %u %u ", l, 304 __dfsan_label_info[l].l1, __dfsan_label_info[l].l2); 305 WriteToFile(fd, buf, internal_strlen(buf)); 306 if (__dfsan_label_info[l].l1 == 0 && __dfsan_label_info[l].desc) { 307 WriteToFile(fd, __dfsan_label_info[l].desc, 308 internal_strlen(__dfsan_label_info[l].desc)); 309 } 310 WriteToFile(fd, "\n", 1); 311 } 312 } 313 314 void Flags::SetDefaults() { 315 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; 316 #include "dfsan_flags.inc" 317 #undef DFSAN_FLAG 318 } 319 320 static void RegisterDfsanFlags(FlagParser *parser, Flags *f) { 321 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \ 322 RegisterFlag(parser, #Name, Description, &f->Name); 323 #include "dfsan_flags.inc" 324 #undef DFSAN_FLAG 325 } 326 327 static void InitializeFlags() { 328 FlagParser parser; 329 RegisterDfsanFlags(&parser, &flags()); 330 flags().SetDefaults(); 331 parser.ParseString(GetEnv("DFSAN_OPTIONS")); 332 } 333 334 static void dfsan_fini() { 335 if (internal_strcmp(flags().dump_labels_at_exit, "") != 0) { 336 fd_t fd = OpenFile(flags().dump_labels_at_exit, WrOnly); 337 if (fd == kInvalidFd) { 338 Report("WARNING: DataFlowSanitizer: unable to open output file %s\n", 339 flags().dump_labels_at_exit); 340 return; 341 } 342 343 Report("INFO: DataFlowSanitizer: dumping labels to %s\n", 344 flags().dump_labels_at_exit); 345 dfsan_dump_labels(fd); 346 CloseFile(fd); 347 } 348 } 349 350 #ifdef DFSAN_NOLIBC 351 extern "C" void dfsan_init() { 352 #else 353 static void dfsan_init(int argc, char **argv, char **envp) { 354 #endif 355 MmapFixedNoReserve(kShadowAddr, kUnusedAddr - kShadowAddr); 356 357 // Protect the region of memory we don't use, to preserve the one-to-one 358 // mapping from application to shadow memory. But if ASLR is disabled, Linux 359 // will load our executable in the middle of our unused region. This mostly 360 // works so long as the program doesn't use too much memory. We support this 361 // case by disabling memory protection when ASLR is disabled. 362 uptr init_addr = (uptr)&dfsan_init; 363 if (!(init_addr >= kUnusedAddr && init_addr < kAppAddr)) 364 MmapNoAccess(kUnusedAddr, kAppAddr - kUnusedAddr); 365 366 InitializeFlags(); 367 InitializeInterceptors(); 368 369 // Register the fini callback to run when the program terminates successfully 370 // or it is killed by the runtime. 371 Atexit(dfsan_fini); 372 SetDieCallback(dfsan_fini); 373 374 __dfsan_label_info[kInitializingLabel].desc = "<init label>"; 375 } 376 377 #if !defined(DFSAN_NOLIBC) && SANITIZER_CAN_USE_PREINIT_ARRAY 378 __attribute__((section(".preinit_array"), used)) 379 static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init; 380 #endif 381