Home | History | Annotate | Download | only in lib
      1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
      2 //
      3 //                      The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "FileAnalysis.h"
     11 #include "GraphBuilder.h"
     12 
     13 #include "llvm/BinaryFormat/ELF.h"
     14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
     15 #include "llvm/MC/MCAsmInfo.h"
     16 #include "llvm/MC/MCContext.h"
     17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
     18 #include "llvm/MC/MCInst.h"
     19 #include "llvm/MC/MCInstPrinter.h"
     20 #include "llvm/MC/MCInstrAnalysis.h"
     21 #include "llvm/MC/MCInstrDesc.h"
     22 #include "llvm/MC/MCInstrInfo.h"
     23 #include "llvm/MC/MCObjectFileInfo.h"
     24 #include "llvm/MC/MCRegisterInfo.h"
     25 #include "llvm/MC/MCSubtargetInfo.h"
     26 #include "llvm/Object/Binary.h"
     27 #include "llvm/Object/COFF.h"
     28 #include "llvm/Object/ELFObjectFile.h"
     29 #include "llvm/Object/ObjectFile.h"
     30 #include "llvm/Support/Casting.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Error.h"
     33 #include "llvm/Support/MemoryBuffer.h"
     34 #include "llvm/Support/TargetRegistry.h"
     35 #include "llvm/Support/TargetSelect.h"
     36 #include "llvm/Support/raw_ostream.h"
     37 
     38 
     39 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
     40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
     41 
     42 namespace llvm {
     43 namespace cfi_verify {
     44 
     45 bool IgnoreDWARFFlag;
     46 
     47 static cl::opt<bool, true> IgnoreDWARFArg(
     48     "ignore-dwarf",
     49     cl::desc(
     50         "Ignore all DWARF data. This relaxes the requirements for all "
     51         "statically linked libraries to have been compiled with '-g', but "
     52         "will result in false positives for 'CFI unprotected' instructions."),
     53     cl::location(IgnoreDWARFFlag), cl::init(false));
     54 
     55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
     56   switch (Status) {
     57   case CFIProtectionStatus::PROTECTED:
     58     return "PROTECTED";
     59   case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
     60     return "FAIL_NOT_INDIRECT_CF";
     61   case CFIProtectionStatus::FAIL_ORPHANS:
     62     return "FAIL_ORPHANS";
     63   case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
     64     return "FAIL_BAD_CONDITIONAL_BRANCH";
     65   case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
     66     return "FAIL_REGISTER_CLOBBERED";
     67   case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
     68     return "FAIL_INVALID_INSTRUCTION";
     69   }
     70   llvm_unreachable("Attempted to stringify an unknown enum value.");
     71 }
     72 
     73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
     74   // Open the filename provided.
     75   Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
     76       object::createBinary(Filename);
     77   if (!BinaryOrErr)
     78     return BinaryOrErr.takeError();
     79 
     80   // Construct the object and allow it to take ownership of the binary.
     81   object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
     82   FileAnalysis Analysis(std::move(Binary));
     83 
     84   Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
     85   if (!Analysis.Object)
     86     return make_error<UnsupportedDisassembly>("Failed to cast object");
     87 
     88   switch (Analysis.Object->getArch()) {
     89     case Triple::x86:
     90     case Triple::x86_64:
     91     case Triple::aarch64:
     92     case Triple::aarch64_be:
     93       break;
     94     default:
     95       return make_error<UnsupportedDisassembly>("Unsupported architecture.");
     96   }
     97 
     98   Analysis.ObjectTriple = Analysis.Object->makeTriple();
     99   Analysis.Features = Analysis.Object->getFeatures();
    100 
    101   // Init the rest of the object.
    102   if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
    103     return std::move(InitResponse);
    104 
    105   if (auto SectionParseResponse = Analysis.parseCodeSections())
    106     return std::move(SectionParseResponse);
    107 
    108   return std::move(Analysis);
    109 }
    110 
    111 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
    112     : Binary(std::move(Binary)) {}
    113 
    114 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
    115                            const SubtargetFeatures &Features)
    116     : ObjectTriple(ObjectTriple), Features(Features) {}
    117 
    118 const Instr *
    119 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
    120   std::map<uint64_t, Instr>::const_iterator KV =
    121       Instructions.find(InstrMeta.VMAddress);
    122   if (KV == Instructions.end() || KV == Instructions.begin())
    123     return nullptr;
    124 
    125   if (!(--KV)->second.Valid)
    126     return nullptr;
    127 
    128   return &KV->second;
    129 }
    130 
    131 const Instr *
    132 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
    133   std::map<uint64_t, Instr>::const_iterator KV =
    134       Instructions.find(InstrMeta.VMAddress);
    135   if (KV == Instructions.end() || ++KV == Instructions.end())
    136     return nullptr;
    137 
    138   if (!KV->second.Valid)
    139     return nullptr;
    140 
    141   return &KV->second;
    142 }
    143 
    144 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
    145   for (const auto &Operand : InstrMeta.Instruction) {
    146     if (Operand.isReg())
    147       return true;
    148   }
    149   return false;
    150 }
    151 
    152 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
    153   const auto &InstrKV = Instructions.find(Address);
    154   if (InstrKV == Instructions.end())
    155     return nullptr;
    156 
    157   return &InstrKV->second;
    158 }
    159 
    160 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
    161   const auto &InstrKV = Instructions.find(Address);
    162   assert(InstrKV != Instructions.end() && "Address doesn't exist.");
    163   return InstrKV->second;
    164 }
    165 
    166 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
    167   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
    168   return InstrDesc.isTrap();
    169 }
    170 
    171 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
    172   if (!InstrMeta.Valid)
    173     return false;
    174 
    175   if (isCFITrap(InstrMeta))
    176     return false;
    177 
    178   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
    179   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
    180     return InstrDesc.isConditionalBranch();
    181 
    182   return true;
    183 }
    184 
    185 const Instr *
    186 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
    187   if (!InstrMeta.Valid)
    188     return nullptr;
    189 
    190   if (isCFITrap(InstrMeta))
    191     return nullptr;
    192 
    193   const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
    194   const Instr *NextMetaPtr;
    195   if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
    196     if (InstrDesc.isConditionalBranch())
    197       return nullptr;
    198 
    199     uint64_t Target;
    200     if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
    201                              InstrMeta.InstructionSize, Target))
    202       return nullptr;
    203 
    204     NextMetaPtr = getInstruction(Target);
    205   } else {
    206     NextMetaPtr =
    207         getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
    208   }
    209 
    210   if (!NextMetaPtr || !NextMetaPtr->Valid)
    211     return nullptr;
    212 
    213   return NextMetaPtr;
    214 }
    215 
    216 std::set<const Instr *>
    217 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
    218   std::set<const Instr *> CFCrossReferences;
    219   const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
    220 
    221   if (PrevInstruction && canFallThrough(*PrevInstruction))
    222     CFCrossReferences.insert(PrevInstruction);
    223 
    224   const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
    225   if (TargetRefsKV == StaticBranchTargetings.end())
    226     return CFCrossReferences;
    227 
    228   for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
    229     const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
    230     if (SourceInstrKV == Instructions.end()) {
    231       errs() << "Failed to find source instruction at address "
    232              << format_hex(SourceInstrAddress, 2)
    233              << " for the cross-reference to instruction at address "
    234              << format_hex(InstrMeta.VMAddress, 2) << ".\n";
    235       continue;
    236     }
    237 
    238     CFCrossReferences.insert(&SourceInstrKV->second);
    239   }
    240 
    241   return CFCrossReferences;
    242 }
    243 
    244 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
    245   return IndirectInstructions;
    246 }
    247 
    248 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
    249   return RegisterInfo.get();
    250 }
    251 
    252 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
    253 
    254 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
    255   return MIA.get();
    256 }
    257 
    258 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
    259   assert(Symbolizer != nullptr && "Symbolizer is invalid.");
    260   return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
    261 }
    262 
    263 CFIProtectionStatus
    264 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
    265   const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
    266   if (!InstrMetaPtr)
    267     return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
    268 
    269   const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
    270   if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
    271     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
    272 
    273   if (!usesRegisterOperand(*InstrMetaPtr))
    274     return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
    275 
    276   if (!Graph.OrphanedNodes.empty())
    277     return CFIProtectionStatus::FAIL_ORPHANS;
    278 
    279   for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
    280     if (!BranchNode.CFIProtection)
    281       return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
    282   }
    283 
    284   if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
    285     return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
    286 
    287   return CFIProtectionStatus::PROTECTED;
    288 }
    289 
    290 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
    291   assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
    292 
    293   // Get the set of registers we must check to ensure they're not clobbered.
    294   const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
    295   DenseSet<unsigned> RegisterNumbers;
    296   for (const auto &Operand : IndirectCF.Instruction) {
    297     if (Operand.isReg())
    298       RegisterNumbers.insert(Operand.getReg());
    299   }
    300   assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
    301 
    302   // Now check all branches to indirect CFs and ensure no clobbering happens.
    303   for (const auto &Branch : Graph.ConditionalBranchNodes) {
    304     uint64_t Node;
    305     if (Branch.IndirectCFIsOnTargetPath)
    306       Node = Branch.Target;
    307     else
    308       Node = Branch.Fallthrough;
    309 
    310     // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
    311     // we allow them one load.
    312     bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
    313 
    314     // We walk backwards from the indirect CF.  It is the last node returned by
    315     // Graph.flattenAddress, so we skip it since we already handled it.
    316     DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
    317     std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
    318     for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
    319       Node = *I;
    320       const Instr &NodeInstr = getInstructionOrDie(Node);
    321       const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
    322 
    323       for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
    324            RI != RE; ++RI) {
    325         unsigned RegNum = *RI;
    326         if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
    327                                       *RegisterInfo)) {
    328           if (!canLoad || !InstrDesc.mayLoad())
    329             return Node;
    330           canLoad = false;
    331           CurRegisterNumbers.erase(RI);
    332           // Add the registers this load reads to those we check for clobbers.
    333           for (unsigned i = InstrDesc.getNumDefs(),
    334                         e = InstrDesc.getNumOperands(); i != e; i++) {
    335             const auto Operand = NodeInstr.Instruction.getOperand(i);
    336             if (Operand.isReg())
    337               CurRegisterNumbers.insert(Operand.getReg());
    338           }
    339           break;
    340         }
    341       }
    342     }
    343   }
    344 
    345   return Graph.BaseAddress;
    346 }
    347 
    348 void FileAnalysis::printInstruction(const Instr &InstrMeta,
    349                                     raw_ostream &OS) const {
    350   Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
    351 }
    352 
    353 Error FileAnalysis::initialiseDisassemblyMembers() {
    354   std::string TripleName = ObjectTriple.getTriple();
    355   ArchName = "";
    356   MCPU = "";
    357   std::string ErrorString;
    358 
    359   Symbolizer.reset(new LLVMSymbolizer());
    360 
    361   ObjectTarget =
    362       TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
    363   if (!ObjectTarget)
    364     return make_error<UnsupportedDisassembly>(
    365         (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
    366          "\", failed with error: " + ErrorString)
    367             .str());
    368 
    369   RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
    370   if (!RegisterInfo)
    371     return make_error<UnsupportedDisassembly>(
    372         "Failed to initialise RegisterInfo.");
    373 
    374   AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
    375   if (!AsmInfo)
    376     return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
    377 
    378   SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
    379       TripleName, MCPU, Features.getString()));
    380   if (!SubtargetInfo)
    381     return make_error<UnsupportedDisassembly>(
    382         "Failed to initialise SubtargetInfo.");
    383 
    384   MII.reset(ObjectTarget->createMCInstrInfo());
    385   if (!MII)
    386     return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
    387 
    388   Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
    389 
    390   Disassembler.reset(
    391       ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
    392 
    393   if (!Disassembler)
    394     return make_error<UnsupportedDisassembly>(
    395         "No disassembler available for target");
    396 
    397   MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
    398 
    399   Printer.reset(ObjectTarget->createMCInstPrinter(
    400       ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
    401       *RegisterInfo));
    402 
    403   return Error::success();
    404 }
    405 
    406 Error FileAnalysis::parseCodeSections() {
    407   if (!IgnoreDWARFFlag) {
    408     std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
    409     if (!DWARF)
    410       return make_error<StringError>("Could not create DWARF information.",
    411                                      inconvertibleErrorCode());
    412 
    413     bool LineInfoValid = false;
    414 
    415     for (auto &Unit : DWARF->compile_units()) {
    416       const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
    417       if (LineTable && !LineTable->Rows.empty()) {
    418         LineInfoValid = true;
    419         break;
    420       }
    421     }
    422 
    423     if (!LineInfoValid)
    424       return make_error<StringError>(
    425           "DWARF line information missing. Did you compile with '-g'?",
    426           inconvertibleErrorCode());
    427   }
    428 
    429   for (const object::SectionRef &Section : Object->sections()) {
    430     // Ensure only executable sections get analysed.
    431     if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
    432       continue;
    433 
    434     StringRef SectionContents;
    435     if (Section.getContents(SectionContents))
    436       return make_error<StringError>("Failed to retrieve section contents",
    437                                      inconvertibleErrorCode());
    438 
    439     ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
    440                                    Section.getSize());
    441     parseSectionContents(SectionBytes, Section.getAddress());
    442   }
    443   return Error::success();
    444 }
    445 
    446 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
    447                                         uint64_t SectionAddress) {
    448   assert(Symbolizer && "Symbolizer is uninitialised.");
    449   MCInst Instruction;
    450   Instr InstrMeta;
    451   uint64_t InstructionSize;
    452 
    453   for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
    454     bool ValidInstruction =
    455         Disassembler->getInstruction(Instruction, InstructionSize,
    456                                      SectionBytes.drop_front(Byte), 0, nulls(),
    457                                      outs()) == MCDisassembler::Success;
    458 
    459     Byte += InstructionSize;
    460 
    461     uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
    462     InstrMeta.Instruction = Instruction;
    463     InstrMeta.VMAddress = VMAddress;
    464     InstrMeta.InstructionSize = InstructionSize;
    465     InstrMeta.Valid = ValidInstruction;
    466 
    467     addInstruction(InstrMeta);
    468 
    469     if (!ValidInstruction)
    470       continue;
    471 
    472     // Skip additional parsing for instructions that do not affect the control
    473     // flow.
    474     const auto &InstrDesc = MII->get(Instruction.getOpcode());
    475     if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
    476       continue;
    477 
    478     uint64_t Target;
    479     if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
    480       // If the target can be evaluated, it's not indirect.
    481       StaticBranchTargetings[Target].push_back(VMAddress);
    482       continue;
    483     }
    484 
    485     if (!usesRegisterOperand(InstrMeta))
    486       continue;
    487 
    488     if (InstrDesc.isReturn())
    489       continue;
    490 
    491     // Check if this instruction exists in the range of the DWARF metadata.
    492     if (!IgnoreDWARFFlag) {
    493       auto LineInfo =
    494           Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
    495       if (!LineInfo) {
    496         handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
    497           errs() << "Symbolizer failed to get line: " << E.message() << "\n";
    498         });
    499         continue;
    500       }
    501 
    502       if (LineInfo->FileName == "<invalid>")
    503         continue;
    504     }
    505 
    506     IndirectInstructions.insert(VMAddress);
    507   }
    508 }
    509 
    510 void FileAnalysis::addInstruction(const Instr &Instruction) {
    511   const auto &KV =
    512       Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
    513   if (!KV.second) {
    514     errs() << "Failed to add instruction at address "
    515            << format_hex(Instruction.VMAddress, 2)
    516            << ": Instruction at this address already exists.\n";
    517     exit(EXIT_FAILURE);
    518   }
    519 }
    520 
    521 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
    522 
    523 char UnsupportedDisassembly::ID;
    524 void UnsupportedDisassembly::log(raw_ostream &OS) const {
    525   OS << "Could not initialise disassembler: " << Text;
    526 }
    527 
    528 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
    529   return std::error_code();
    530 }
    531 
    532 } // namespace cfi_verify
    533 } // namespace llvm
    534