Home | History | Annotate | Download | only in llvm-mc-fuzzer
      1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //===----------------------------------------------------------------------===//
     11 
     12 #include "FuzzerInterface.h"
     13 #include "llvm-c/Disassembler.h"
     14 #include "llvm-c/Target.h"
     15 #include "llvm/MC/SubtargetFeature.h"
     16 #include "llvm/Support/CommandLine.h"
     17 #include "llvm/Support/raw_ostream.h"
     18 
     19 using namespace llvm;
     20 
     21 const unsigned AssemblyTextBufSize = 80;
     22 
     23 enum ActionType {
     24   AC_Assemble,
     25   AC_Disassemble
     26 };
     27 
     28 static cl::opt<ActionType>
     29 Action(cl::desc("Action to perform:"),
     30        cl::init(AC_Assemble),
     31        cl::values(clEnumValN(AC_Assemble, "assemble",
     32                              "Assemble a .s file (default)"),
     33                   clEnumValN(AC_Disassemble, "disassemble",
     34                              "Disassemble strings of hex bytes"),
     35                   clEnumValEnd));
     36 
     37 static cl::opt<std::string>
     38     TripleName("triple", cl::desc("Target triple to assemble for, "
     39                                   "see -version for available targets"));
     40 
     41 static cl::opt<std::string>
     42     MCPU("mcpu",
     43          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
     44          cl::value_desc("cpu-name"), cl::init(""));
     45 
     46 // This is useful for variable-length instruction sets.
     47 static cl::opt<unsigned> InsnLimit(
     48     "insn-limit",
     49     cl::desc("Limit the number of instructions to process (0 for no limit)"),
     50     cl::value_desc("count"), cl::init(0));
     51 
     52 static cl::list<std::string>
     53     MAttrs("mattr", cl::CommaSeparated,
     54            cl::desc("Target specific attributes (-mattr=help for details)"),
     55            cl::value_desc("a1,+a2,-a3,..."));
     56 // The feature string derived from -mattr's values.
     57 std::string FeaturesStr;
     58 
     59 static cl::list<std::string>
     60     FuzzerArgs("fuzzer-args", cl::Positional,
     61                cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
     62                cl::PositionalEatsArgs);
     63 static std::vector<char *> ModifiedArgv;
     64 
     65 int DisassembleOneInput(const uint8_t *Data, size_t Size) {
     66   char AssemblyText[AssemblyTextBufSize];
     67 
     68   std::vector<uint8_t> DataCopy(Data, Data + Size);
     69 
     70   LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
     71       TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
     72       nullptr, nullptr);
     73   assert(Ctx);
     74   uint8_t *p = DataCopy.data();
     75   unsigned Consumed;
     76   unsigned InstructionsProcessed = 0;
     77   do {
     78     Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
     79                                      AssemblyTextBufSize);
     80     Size -= Consumed;
     81     p += Consumed;
     82 
     83     InstructionsProcessed ++;
     84     if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
     85       break;
     86   } while (Consumed != 0);
     87   LLVMDisasmDispose(Ctx);
     88   return 0;
     89 }
     90 
     91 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     92   if (Action == AC_Assemble)
     93     errs() << "error: -assemble is not implemented\n";
     94   else if (Action == AC_Disassemble)
     95     return DisassembleOneInput(Data, Size);
     96 
     97   llvm_unreachable("Unknown action");
     98   return 0;
     99 }
    100 
    101 int LLVMFuzzerInitialize(int *argc, char ***argv) {
    102   // The command line is unusual compared to other fuzzers due to the need to
    103   // specify the target. Options like -triple, -mcpu, and -mattr work like
    104   // their counterparts in llvm-mc, while -fuzzer-args collects options for the
    105   // fuzzer itself.
    106   //
    107   // Examples:
    108   //
    109   // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
    110   // 4-bytes each and use the contents of ./corpus as the test corpus:
    111   //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
    112   //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
    113   //
    114   // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
    115   // feature enabled using up to 64-byte inputs:
    116   //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
    117   //       -disassemble -fuzzer-args ./corpus
    118   //
    119   // If your aim is to find instructions that are not tested, then it is
    120   // advisable to constrain the maximum input size to a single instruction
    121   // using -max_len as in the first example. This results in a test corpus of
    122   // individual instructions that test unique paths. Without this constraint,
    123   // there will be considerable redundancy in the corpus.
    124 
    125   char **OriginalArgv = *argv;
    126 
    127   LLVMInitializeAllTargetInfos();
    128   LLVMInitializeAllTargetMCs();
    129   LLVMInitializeAllDisassemblers();
    130 
    131   cl::ParseCommandLineOptions(*argc, OriginalArgv);
    132 
    133   // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
    134   // the driver can parse its arguments.
    135   //
    136   // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
    137   // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
    138   // non-const buffer to avoid the need to clean up when the fuzzer terminates.
    139   ModifiedArgv.push_back(OriginalArgv[0]);
    140   for (const auto &FuzzerArg : FuzzerArgs) {
    141     for (int i = 1; i < *argc; ++i) {
    142       if (FuzzerArg == OriginalArgv[i])
    143         ModifiedArgv.push_back(OriginalArgv[i]);
    144     }
    145   }
    146   *argc = ModifiedArgv.size();
    147   *argv = ModifiedArgv.data();
    148 
    149   // Package up features to be passed to target/subtarget
    150   // We have to pass it via a global since the callback doesn't
    151   // permit any user data.
    152   if (MAttrs.size()) {
    153     SubtargetFeatures Features;
    154     for (unsigned i = 0; i != MAttrs.size(); ++i)
    155       Features.AddFeature(MAttrs[i]);
    156     FeaturesStr = Features.getString();
    157   }
    158 
    159   return 0;
    160 }
    161