1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "FuzzerInterface.h" 13 #include "llvm-c/Disassembler.h" 14 #include "llvm-c/Target.h" 15 #include "llvm/MC/SubtargetFeature.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 using namespace llvm; 20 21 const unsigned AssemblyTextBufSize = 80; 22 23 enum ActionType { 24 AC_Assemble, 25 AC_Disassemble 26 }; 27 28 static cl::opt<ActionType> 29 Action(cl::desc("Action to perform:"), 30 cl::init(AC_Assemble), 31 cl::values(clEnumValN(AC_Assemble, "assemble", 32 "Assemble a .s file (default)"), 33 clEnumValN(AC_Disassemble, "disassemble", 34 "Disassemble strings of hex bytes"), 35 clEnumValEnd)); 36 37 static cl::opt<std::string> 38 TripleName("triple", cl::desc("Target triple to assemble for, " 39 "see -version for available targets")); 40 41 static cl::opt<std::string> 42 MCPU("mcpu", 43 cl::desc("Target a specific cpu type (-mcpu=help for details)"), 44 cl::value_desc("cpu-name"), cl::init("")); 45 46 // This is useful for variable-length instruction sets. 47 static cl::opt<unsigned> InsnLimit( 48 "insn-limit", 49 cl::desc("Limit the number of instructions to process (0 for no limit)"), 50 cl::value_desc("count"), cl::init(0)); 51 52 static cl::list<std::string> 53 MAttrs("mattr", cl::CommaSeparated, 54 cl::desc("Target specific attributes (-mattr=help for details)"), 55 cl::value_desc("a1,+a2,-a3,...")); 56 // The feature string derived from -mattr's values. 57 std::string FeaturesStr; 58 59 static cl::list<std::string> 60 FuzzerArgs("fuzzer-args", cl::Positional, 61 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, 62 cl::PositionalEatsArgs); 63 static std::vector<char *> ModifiedArgv; 64 65 int DisassembleOneInput(const uint8_t *Data, size_t Size) { 66 char AssemblyText[AssemblyTextBufSize]; 67 68 std::vector<uint8_t> DataCopy(Data, Data + Size); 69 70 LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures( 71 TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0, 72 nullptr, nullptr); 73 assert(Ctx); 74 uint8_t *p = DataCopy.data(); 75 unsigned Consumed; 76 unsigned InstructionsProcessed = 0; 77 do { 78 Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText, 79 AssemblyTextBufSize); 80 Size -= Consumed; 81 p += Consumed; 82 83 InstructionsProcessed ++; 84 if (InsnLimit != 0 && InstructionsProcessed < InsnLimit) 85 break; 86 } while (Consumed != 0); 87 LLVMDisasmDispose(Ctx); 88 return 0; 89 } 90 91 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { 92 if (Action == AC_Assemble) 93 errs() << "error: -assemble is not implemented\n"; 94 else if (Action == AC_Disassemble) 95 return DisassembleOneInput(Data, Size); 96 97 llvm_unreachable("Unknown action"); 98 return 0; 99 } 100 101 int LLVMFuzzerInitialize(int *argc, char ***argv) { 102 // The command line is unusual compared to other fuzzers due to the need to 103 // specify the target. Options like -triple, -mcpu, and -mattr work like 104 // their counterparts in llvm-mc, while -fuzzer-args collects options for the 105 // fuzzer itself. 106 // 107 // Examples: 108 // 109 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to 110 // 4-bytes each and use the contents of ./corpus as the test corpus: 111 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ 112 // -fuzzer-args -max_len=4 -runs=100000 ./corpus 113 // 114 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA 115 // feature enabled using up to 64-byte inputs: 116 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ 117 // -disassemble -fuzzer-args ./corpus 118 // 119 // If your aim is to find instructions that are not tested, then it is 120 // advisable to constrain the maximum input size to a single instruction 121 // using -max_len as in the first example. This results in a test corpus of 122 // individual instructions that test unique paths. Without this constraint, 123 // there will be considerable redundancy in the corpus. 124 125 char **OriginalArgv = *argv; 126 127 LLVMInitializeAllTargetInfos(); 128 LLVMInitializeAllTargetMCs(); 129 LLVMInitializeAllDisassemblers(); 130 131 cl::ParseCommandLineOptions(*argc, OriginalArgv); 132 133 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that 134 // the driver can parse its arguments. 135 // 136 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. 137 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a 138 // non-const buffer to avoid the need to clean up when the fuzzer terminates. 139 ModifiedArgv.push_back(OriginalArgv[0]); 140 for (const auto &FuzzerArg : FuzzerArgs) { 141 for (int i = 1; i < *argc; ++i) { 142 if (FuzzerArg == OriginalArgv[i]) 143 ModifiedArgv.push_back(OriginalArgv[i]); 144 } 145 } 146 *argc = ModifiedArgv.size(); 147 *argv = ModifiedArgv.data(); 148 149 // Package up features to be passed to target/subtarget 150 // We have to pass it via a global since the callback doesn't 151 // permit any user data. 152 if (MAttrs.size()) { 153 SubtargetFeatures Features; 154 for (unsigned i = 0; i != MAttrs.size(); ++i) 155 Features.AddFeature(MAttrs[i]); 156 FeaturesStr = Features.getString(); 157 } 158 159 return 0; 160 } 161