Home | History | Annotate | Download | only in dfsan
      1 //===-- dfsan.cc ----------------------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of DataFlowSanitizer.
     11 //
     12 // DataFlowSanitizer runtime.  This file defines the public interface to
     13 // DataFlowSanitizer as well as the definition of certain runtime functions
     14 // called automatically by the compiler (specifically the instrumentation pass
     15 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
     16 //
     17 // The public interface is defined in include/sanitizer/dfsan_interface.h whose
     18 // functions are prefixed dfsan_ while the compiler interface functions are
     19 // prefixed __dfsan_.
     20 //===----------------------------------------------------------------------===//
     21 
     22 #include "sanitizer_common/sanitizer_atomic.h"
     23 #include "sanitizer_common/sanitizer_common.h"
     24 #include "sanitizer_common/sanitizer_flags.h"
     25 #include "sanitizer_common/sanitizer_flag_parser.h"
     26 #include "sanitizer_common/sanitizer_libc.h"
     27 
     28 #include "dfsan/dfsan.h"
     29 
     30 using namespace __dfsan;
     31 
     32 typedef atomic_uint16_t atomic_dfsan_label;
     33 static const dfsan_label kInitializingLabel = -1;
     34 
     35 static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
     36 
     37 static atomic_dfsan_label __dfsan_last_label;
     38 static dfsan_label_info __dfsan_label_info[kNumLabels];
     39 
     40 Flags __dfsan::flags_data;
     41 
     42 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
     43 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
     44 
     45 // On Linux/x86_64, memory is laid out as follows:
     46 //
     47 // +--------------------+ 0x800000000000 (top of memory)
     48 // | application memory |
     49 // +--------------------+ 0x700000008000 (kAppAddr)
     50 // |                    |
     51 // |       unused       |
     52 // |                    |
     53 // +--------------------+ 0x200200000000 (kUnusedAddr)
     54 // |    union table     |
     55 // +--------------------+ 0x200000000000 (kUnionTableAddr)
     56 // |   shadow memory    |
     57 // +--------------------+ 0x000000010000 (kShadowAddr)
     58 // | reserved by kernel |
     59 // +--------------------+ 0x000000000000
     60 //
     61 // To derive a shadow memory address from an application memory address,
     62 // bits 44-46 are cleared to bring the address into the range
     63 // [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
     64 // account for the double byte representation of shadow labels and move the
     65 // address into the shadow memory range.  See the function shadow_for below.
     66 
     67 // On Linux/MIPS64, memory is laid out as follows:
     68 //
     69 // +--------------------+ 0x10000000000 (top of memory)
     70 // | application memory |
     71 // +--------------------+ 0xF000008000 (kAppAddr)
     72 // |                    |
     73 // |       unused       |
     74 // |                    |
     75 // +--------------------+ 0x2200000000 (kUnusedAddr)
     76 // |    union table     |
     77 // +--------------------+ 0x2000000000 (kUnionTableAddr)
     78 // |   shadow memory    |
     79 // +--------------------+ 0x0000010000 (kShadowAddr)
     80 // | reserved by kernel |
     81 // +--------------------+ 0x0000000000
     82 
     83 typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
     84 
     85 #if defined(__x86_64__)
     86 static const uptr kShadowAddr = 0x10000;
     87 static const uptr kUnionTableAddr = 0x200000000000;
     88 static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t);
     89 static const uptr kAppAddr = 0x700000008000;
     90 #elif defined(__mips64)
     91 static const uptr kShadowAddr = 0x10000;
     92 static const uptr kUnionTableAddr = 0x2000000000;
     93 static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t);
     94 static const uptr kAppAddr = 0xF000008000;
     95 #else
     96 # error "DFSan not supported for this platform!"
     97 #endif
     98 
     99 static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
    100   return &(*(dfsan_union_table_t *) kUnionTableAddr)[l1][l2];
    101 }
    102 
    103 // Checks we do not run out of labels.
    104 static void dfsan_check_label(dfsan_label label) {
    105   if (label == kInitializingLabel) {
    106     Report("FATAL: DataFlowSanitizer: out of labels\n");
    107     Die();
    108   }
    109 }
    110 
    111 // Resolves the union of two unequal labels.  Nonequality is a precondition for
    112 // this function (the instrumentation pass inlines the equality test).
    113 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    114 dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
    115   DCHECK_NE(l1, l2);
    116 
    117   if (l1 == 0)
    118     return l2;
    119   if (l2 == 0)
    120     return l1;
    121 
    122   if (l1 > l2)
    123     Swap(l1, l2);
    124 
    125   atomic_dfsan_label *table_ent = union_table(l1, l2);
    126   // We need to deal with the case where two threads concurrently request
    127   // a union of the same pair of labels.  If the table entry is uninitialized,
    128   // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
    129   // (i.e. -1) to mark that we are initializing it.
    130   dfsan_label label = 0;
    131   if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
    132                                      memory_order_acquire)) {
    133     // Check whether l2 subsumes l1.  We don't need to check whether l1
    134     // subsumes l2 because we are guaranteed here that l1 < l2, and (at least
    135     // in the cases we are interested in) a label may only subsume labels
    136     // created earlier (i.e. with a lower numerical value).
    137     if (__dfsan_label_info[l2].l1 == l1 ||
    138         __dfsan_label_info[l2].l2 == l1) {
    139       label = l2;
    140     } else {
    141       label =
    142         atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
    143       dfsan_check_label(label);
    144       __dfsan_label_info[label].l1 = l1;
    145       __dfsan_label_info[label].l2 = l2;
    146     }
    147     atomic_store(table_ent, label, memory_order_release);
    148   } else if (label == kInitializingLabel) {
    149     // Another thread is initializing the entry.  Wait until it is finished.
    150     do {
    151       internal_sched_yield();
    152       label = atomic_load(table_ent, memory_order_acquire);
    153     } while (label == kInitializingLabel);
    154   }
    155   return label;
    156 }
    157 
    158 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    159 dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {
    160   dfsan_label label = ls[0];
    161   for (uptr i = 1; i != n; ++i) {
    162     dfsan_label next_label = ls[i];
    163     if (label != next_label)
    164       label = __dfsan_union(label, next_label);
    165   }
    166   return label;
    167 }
    168 
    169 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    170 void __dfsan_unimplemented(char *fname) {
    171   if (flags().warn_unimplemented)
    172     Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
    173            fname);
    174 }
    175 
    176 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
    177 // to try to figure out where labels are being introduced in a nominally
    178 // label-free program.
    179 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() {
    180   if (flags().warn_nonzero_labels)
    181     Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
    182 }
    183 
    184 // Indirect call to an uninstrumented vararg function. We don't have a way of
    185 // handling these at the moment.
    186 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
    187 __dfsan_vararg_wrapper(const char *fname) {
    188   Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
    189          "function %s\n", fname);
    190   Die();
    191 }
    192 
    193 // Like __dfsan_union, but for use from the client or custom functions.  Hence
    194 // the equality comparison is done here before calling __dfsan_union.
    195 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
    196 dfsan_union(dfsan_label l1, dfsan_label l2) {
    197   if (l1 == l2)
    198     return l1;
    199   return __dfsan_union(l1, l2);
    200 }
    201 
    202 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    203 dfsan_label dfsan_create_label(const char *desc, void *userdata) {
    204   dfsan_label label =
    205     atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
    206   dfsan_check_label(label);
    207   __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
    208   __dfsan_label_info[label].desc = desc;
    209   __dfsan_label_info[label].userdata = userdata;
    210   return label;
    211 }
    212 
    213 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    214 void __dfsan_set_label(dfsan_label label, void *addr, uptr size) {
    215   for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) {
    216     // Don't write the label if it is already the value we need it to be.
    217     // In a program where most addresses are not labeled, it is common that
    218     // a page of shadow memory is entirely zeroed.  The Linux copy-on-write
    219     // implementation will share all of the zeroed pages, making a copy of a
    220     // page when any value is written.  The un-sharing will happen even if
    221     // the value written does not change the value in memory.  Avoiding the
    222     // write when both |label| and |*labelp| are zero dramatically reduces
    223     // the amount of real memory used by large programs.
    224     if (label == *labelp)
    225       continue;
    226 
    227     *labelp = label;
    228   }
    229 }
    230 
    231 SANITIZER_INTERFACE_ATTRIBUTE
    232 void dfsan_set_label(dfsan_label label, void *addr, uptr size) {
    233   __dfsan_set_label(label, addr, size);
    234 }
    235 
    236 SANITIZER_INTERFACE_ATTRIBUTE
    237 void dfsan_add_label(dfsan_label label, void *addr, uptr size) {
    238   for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
    239     if (*labelp != label)
    240       *labelp = __dfsan_union(*labelp, label);
    241 }
    242 
    243 // Unlike the other dfsan interface functions the behavior of this function
    244 // depends on the label of one of its arguments.  Hence it is implemented as a
    245 // custom function.
    246 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
    247 __dfsw_dfsan_get_label(long data, dfsan_label data_label,
    248                        dfsan_label *ret_label) {
    249   *ret_label = 0;
    250   return data_label;
    251 }
    252 
    253 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
    254 dfsan_read_label(const void *addr, uptr size) {
    255   if (size == 0)
    256     return 0;
    257   return __dfsan_union_load(shadow_for(addr), size);
    258 }
    259 
    260 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
    261 const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
    262   return &__dfsan_label_info[label];
    263 }
    264 
    265 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int
    266 dfsan_has_label(dfsan_label label, dfsan_label elem) {
    267   if (label == elem)
    268     return true;
    269   const dfsan_label_info *info = dfsan_get_label_info(label);
    270   if (info->l1 != 0) {
    271     return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
    272   } else {
    273     return false;
    274   }
    275 }
    276 
    277 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
    278 dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
    279   const dfsan_label_info *info = dfsan_get_label_info(label);
    280   if (info->l1 != 0) {
    281     return dfsan_has_label_with_desc(info->l1, desc) ||
    282            dfsan_has_label_with_desc(info->l2, desc);
    283   } else {
    284     return internal_strcmp(desc, info->desc) == 0;
    285   }
    286 }
    287 
    288 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
    289 dfsan_get_label_count(void) {
    290   dfsan_label max_label_allocated =
    291       atomic_load(&__dfsan_last_label, memory_order_relaxed);
    292 
    293   return static_cast<uptr>(max_label_allocated);
    294 }
    295 
    296 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
    297 dfsan_dump_labels(int fd) {
    298   dfsan_label last_label =
    299       atomic_load(&__dfsan_last_label, memory_order_relaxed);
    300 
    301   for (uptr l = 1; l <= last_label; ++l) {
    302     char buf[64];
    303     internal_snprintf(buf, sizeof(buf), "%u %u %u ", l,
    304                       __dfsan_label_info[l].l1, __dfsan_label_info[l].l2);
    305     WriteToFile(fd, buf, internal_strlen(buf));
    306     if (__dfsan_label_info[l].l1 == 0 && __dfsan_label_info[l].desc) {
    307       WriteToFile(fd, __dfsan_label_info[l].desc,
    308                   internal_strlen(__dfsan_label_info[l].desc));
    309     }
    310     WriteToFile(fd, "\n", 1);
    311   }
    312 }
    313 
    314 void Flags::SetDefaults() {
    315 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
    316 #include "dfsan_flags.inc"
    317 #undef DFSAN_FLAG
    318 }
    319 
    320 static void RegisterDfsanFlags(FlagParser *parser, Flags *f) {
    321 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
    322   RegisterFlag(parser, #Name, Description, &f->Name);
    323 #include "dfsan_flags.inc"
    324 #undef DFSAN_FLAG
    325 }
    326 
    327 static void InitializeFlags() {
    328   FlagParser parser;
    329   RegisterDfsanFlags(&parser, &flags());
    330   flags().SetDefaults();
    331   parser.ParseString(GetEnv("DFSAN_OPTIONS"));
    332 }
    333 
    334 static void dfsan_fini() {
    335   if (internal_strcmp(flags().dump_labels_at_exit, "") != 0) {
    336     fd_t fd = OpenFile(flags().dump_labels_at_exit, WrOnly);
    337     if (fd == kInvalidFd) {
    338       Report("WARNING: DataFlowSanitizer: unable to open output file %s\n",
    339              flags().dump_labels_at_exit);
    340       return;
    341     }
    342 
    343     Report("INFO: DataFlowSanitizer: dumping labels to %s\n",
    344            flags().dump_labels_at_exit);
    345     dfsan_dump_labels(fd);
    346     CloseFile(fd);
    347   }
    348 }
    349 
    350 #ifdef DFSAN_NOLIBC
    351 extern "C" void dfsan_init() {
    352 #else
    353 static void dfsan_init(int argc, char **argv, char **envp) {
    354 #endif
    355   MmapFixedNoReserve(kShadowAddr, kUnusedAddr - kShadowAddr);
    356 
    357   // Protect the region of memory we don't use, to preserve the one-to-one
    358   // mapping from application to shadow memory. But if ASLR is disabled, Linux
    359   // will load our executable in the middle of our unused region. This mostly
    360   // works so long as the program doesn't use too much memory. We support this
    361   // case by disabling memory protection when ASLR is disabled.
    362   uptr init_addr = (uptr)&dfsan_init;
    363   if (!(init_addr >= kUnusedAddr && init_addr < kAppAddr))
    364     MmapNoAccess(kUnusedAddr, kAppAddr - kUnusedAddr);
    365 
    366   InitializeFlags();
    367   InitializeInterceptors();
    368 
    369   // Register the fini callback to run when the program terminates successfully
    370   // or it is killed by the runtime.
    371   Atexit(dfsan_fini);
    372   SetDieCallback(dfsan_fini);
    373 
    374   __dfsan_label_info[kInitializingLabel].desc = "<init label>";
    375 }
    376 
    377 #if !defined(DFSAN_NOLIBC) && SANITIZER_CAN_USE_PREINIT_ARRAY
    378 __attribute__((section(".preinit_array"), used))
    379 static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
    380 #endif
    381