1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "FileAnalysis.h" 11 #include "GraphBuilder.h" 12 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 15 #include "llvm/MC/MCAsmInfo.h" 16 #include "llvm/MC/MCContext.h" 17 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 18 #include "llvm/MC/MCInst.h" 19 #include "llvm/MC/MCInstPrinter.h" 20 #include "llvm/MC/MCInstrAnalysis.h" 21 #include "llvm/MC/MCInstrDesc.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCObjectFileInfo.h" 24 #include "llvm/MC/MCRegisterInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Object/Binary.h" 27 #include "llvm/Object/COFF.h" 28 #include "llvm/Object/ELFObjectFile.h" 29 #include "llvm/Object/ObjectFile.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Error.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 39 using Instr = llvm::cfi_verify::FileAnalysis::Instr; 40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 41 42 namespace llvm { 43 namespace cfi_verify { 44 45 bool IgnoreDWARFFlag; 46 47 static cl::opt<bool, true> IgnoreDWARFArg( 48 "ignore-dwarf", 49 cl::desc( 50 "Ignore all DWARF data. This relaxes the requirements for all " 51 "statically linked libraries to have been compiled with '-g', but " 52 "will result in false positives for 'CFI unprotected' instructions."), 53 cl::location(IgnoreDWARFFlag), cl::init(false)); 54 55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 56 switch (Status) { 57 case CFIProtectionStatus::PROTECTED: 58 return "PROTECTED"; 59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 60 return "FAIL_NOT_INDIRECT_CF"; 61 case CFIProtectionStatus::FAIL_ORPHANS: 62 return "FAIL_ORPHANS"; 63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 64 return "FAIL_BAD_CONDITIONAL_BRANCH"; 65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 66 return "FAIL_REGISTER_CLOBBERED"; 67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 68 return "FAIL_INVALID_INSTRUCTION"; 69 } 70 llvm_unreachable("Attempted to stringify an unknown enum value."); 71 } 72 73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 74 // Open the filename provided. 75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 76 object::createBinary(Filename); 77 if (!BinaryOrErr) 78 return BinaryOrErr.takeError(); 79 80 // Construct the object and allow it to take ownership of the binary. 81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 82 FileAnalysis Analysis(std::move(Binary)); 83 84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 85 if (!Analysis.Object) 86 return make_error<UnsupportedDisassembly>("Failed to cast object"); 87 88 switch (Analysis.Object->getArch()) { 89 case Triple::x86: 90 case Triple::x86_64: 91 case Triple::aarch64: 92 case Triple::aarch64_be: 93 break; 94 default: 95 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 96 } 97 98 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 99 Analysis.Features = Analysis.Object->getFeatures(); 100 101 // Init the rest of the object. 102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 103 return std::move(InitResponse); 104 105 if (auto SectionParseResponse = Analysis.parseCodeSections()) 106 return std::move(SectionParseResponse); 107 108 return std::move(Analysis); 109 } 110 111 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 112 : Binary(std::move(Binary)) {} 113 114 FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 115 const SubtargetFeatures &Features) 116 : ObjectTriple(ObjectTriple), Features(Features) {} 117 118 const Instr * 119 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 120 std::map<uint64_t, Instr>::const_iterator KV = 121 Instructions.find(InstrMeta.VMAddress); 122 if (KV == Instructions.end() || KV == Instructions.begin()) 123 return nullptr; 124 125 if (!(--KV)->second.Valid) 126 return nullptr; 127 128 return &KV->second; 129 } 130 131 const Instr * 132 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 133 std::map<uint64_t, Instr>::const_iterator KV = 134 Instructions.find(InstrMeta.VMAddress); 135 if (KV == Instructions.end() || ++KV == Instructions.end()) 136 return nullptr; 137 138 if (!KV->second.Valid) 139 return nullptr; 140 141 return &KV->second; 142 } 143 144 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 145 for (const auto &Operand : InstrMeta.Instruction) { 146 if (Operand.isReg()) 147 return true; 148 } 149 return false; 150 } 151 152 const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 153 const auto &InstrKV = Instructions.find(Address); 154 if (InstrKV == Instructions.end()) 155 return nullptr; 156 157 return &InstrKV->second; 158 } 159 160 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 161 const auto &InstrKV = Instructions.find(Address); 162 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 163 return InstrKV->second; 164 } 165 166 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 167 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 168 return InstrDesc.isTrap(); 169 } 170 171 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 172 if (!InstrMeta.Valid) 173 return false; 174 175 if (isCFITrap(InstrMeta)) 176 return false; 177 178 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 179 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 180 return InstrDesc.isConditionalBranch(); 181 182 return true; 183 } 184 185 const Instr * 186 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 187 if (!InstrMeta.Valid) 188 return nullptr; 189 190 if (isCFITrap(InstrMeta)) 191 return nullptr; 192 193 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 194 const Instr *NextMetaPtr; 195 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 196 if (InstrDesc.isConditionalBranch()) 197 return nullptr; 198 199 uint64_t Target; 200 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 201 InstrMeta.InstructionSize, Target)) 202 return nullptr; 203 204 NextMetaPtr = getInstruction(Target); 205 } else { 206 NextMetaPtr = 207 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 208 } 209 210 if (!NextMetaPtr || !NextMetaPtr->Valid) 211 return nullptr; 212 213 return NextMetaPtr; 214 } 215 216 std::set<const Instr *> 217 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 218 std::set<const Instr *> CFCrossReferences; 219 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 220 221 if (PrevInstruction && canFallThrough(*PrevInstruction)) 222 CFCrossReferences.insert(PrevInstruction); 223 224 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 225 if (TargetRefsKV == StaticBranchTargetings.end()) 226 return CFCrossReferences; 227 228 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 229 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 230 if (SourceInstrKV == Instructions.end()) { 231 errs() << "Failed to find source instruction at address " 232 << format_hex(SourceInstrAddress, 2) 233 << " for the cross-reference to instruction at address " 234 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 235 continue; 236 } 237 238 CFCrossReferences.insert(&SourceInstrKV->second); 239 } 240 241 return CFCrossReferences; 242 } 243 244 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const { 245 return IndirectInstructions; 246 } 247 248 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 249 return RegisterInfo.get(); 250 } 251 252 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 253 254 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 255 return MIA.get(); 256 } 257 258 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) { 259 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 260 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); 261 } 262 263 CFIProtectionStatus 264 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 265 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 266 if (!InstrMetaPtr) 267 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 268 269 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 270 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 271 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 272 273 if (!usesRegisterOperand(*InstrMetaPtr)) 274 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 275 276 if (!Graph.OrphanedNodes.empty()) 277 return CFIProtectionStatus::FAIL_ORPHANS; 278 279 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 280 if (!BranchNode.CFIProtection) 281 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 282 } 283 284 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 285 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 286 287 return CFIProtectionStatus::PROTECTED; 288 } 289 290 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 291 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 292 293 // Get the set of registers we must check to ensure they're not clobbered. 294 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 295 DenseSet<unsigned> RegisterNumbers; 296 for (const auto &Operand : IndirectCF.Instruction) { 297 if (Operand.isReg()) 298 RegisterNumbers.insert(Operand.getReg()); 299 } 300 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 301 302 // Now check all branches to indirect CFs and ensure no clobbering happens. 303 for (const auto &Branch : Graph.ConditionalBranchNodes) { 304 uint64_t Node; 305 if (Branch.IndirectCFIsOnTargetPath) 306 Node = Branch.Target; 307 else 308 Node = Branch.Fallthrough; 309 310 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 311 // we allow them one load. 312 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 313 314 // We walk backwards from the indirect CF. It is the last node returned by 315 // Graph.flattenAddress, so we skip it since we already handled it. 316 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 317 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 318 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 319 Node = *I; 320 const Instr &NodeInstr = getInstructionOrDie(Node); 321 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 322 323 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 324 RI != RE; ++RI) { 325 unsigned RegNum = *RI; 326 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 327 *RegisterInfo)) { 328 if (!canLoad || !InstrDesc.mayLoad()) 329 return Node; 330 canLoad = false; 331 CurRegisterNumbers.erase(RI); 332 // Add the registers this load reads to those we check for clobbers. 333 for (unsigned i = InstrDesc.getNumDefs(), 334 e = InstrDesc.getNumOperands(); i != e; i++) { 335 const auto Operand = NodeInstr.Instruction.getOperand(i); 336 if (Operand.isReg()) 337 CurRegisterNumbers.insert(Operand.getReg()); 338 } 339 break; 340 } 341 } 342 } 343 } 344 345 return Graph.BaseAddress; 346 } 347 348 void FileAnalysis::printInstruction(const Instr &InstrMeta, 349 raw_ostream &OS) const { 350 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get()); 351 } 352 353 Error FileAnalysis::initialiseDisassemblyMembers() { 354 std::string TripleName = ObjectTriple.getTriple(); 355 ArchName = ""; 356 MCPU = ""; 357 std::string ErrorString; 358 359 Symbolizer.reset(new LLVMSymbolizer()); 360 361 ObjectTarget = 362 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 363 if (!ObjectTarget) 364 return make_error<UnsupportedDisassembly>( 365 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 366 "\", failed with error: " + ErrorString) 367 .str()); 368 369 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 370 if (!RegisterInfo) 371 return make_error<UnsupportedDisassembly>( 372 "Failed to initialise RegisterInfo."); 373 374 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName)); 375 if (!AsmInfo) 376 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 377 378 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 379 TripleName, MCPU, Features.getString())); 380 if (!SubtargetInfo) 381 return make_error<UnsupportedDisassembly>( 382 "Failed to initialise SubtargetInfo."); 383 384 MII.reset(ObjectTarget->createMCInstrInfo()); 385 if (!MII) 386 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 387 388 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); 389 390 Disassembler.reset( 391 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 392 393 if (!Disassembler) 394 return make_error<UnsupportedDisassembly>( 395 "No disassembler available for target"); 396 397 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 398 399 Printer.reset(ObjectTarget->createMCInstPrinter( 400 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 401 *RegisterInfo)); 402 403 return Error::success(); 404 } 405 406 Error FileAnalysis::parseCodeSections() { 407 if (!IgnoreDWARFFlag) { 408 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 409 if (!DWARF) 410 return make_error<StringError>("Could not create DWARF information.", 411 inconvertibleErrorCode()); 412 413 bool LineInfoValid = false; 414 415 for (auto &Unit : DWARF->compile_units()) { 416 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 417 if (LineTable && !LineTable->Rows.empty()) { 418 LineInfoValid = true; 419 break; 420 } 421 } 422 423 if (!LineInfoValid) 424 return make_error<StringError>( 425 "DWARF line information missing. Did you compile with '-g'?", 426 inconvertibleErrorCode()); 427 } 428 429 for (const object::SectionRef &Section : Object->sections()) { 430 // Ensure only executable sections get analysed. 431 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 432 continue; 433 434 StringRef SectionContents; 435 if (Section.getContents(SectionContents)) 436 return make_error<StringError>("Failed to retrieve section contents", 437 inconvertibleErrorCode()); 438 439 ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(), 440 Section.getSize()); 441 parseSectionContents(SectionBytes, Section.getAddress()); 442 } 443 return Error::success(); 444 } 445 446 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 447 uint64_t SectionAddress) { 448 assert(Symbolizer && "Symbolizer is uninitialised."); 449 MCInst Instruction; 450 Instr InstrMeta; 451 uint64_t InstructionSize; 452 453 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 454 bool ValidInstruction = 455 Disassembler->getInstruction(Instruction, InstructionSize, 456 SectionBytes.drop_front(Byte), 0, nulls(), 457 outs()) == MCDisassembler::Success; 458 459 Byte += InstructionSize; 460 461 uint64_t VMAddress = SectionAddress + Byte - InstructionSize; 462 InstrMeta.Instruction = Instruction; 463 InstrMeta.VMAddress = VMAddress; 464 InstrMeta.InstructionSize = InstructionSize; 465 InstrMeta.Valid = ValidInstruction; 466 467 addInstruction(InstrMeta); 468 469 if (!ValidInstruction) 470 continue; 471 472 // Skip additional parsing for instructions that do not affect the control 473 // flow. 474 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 475 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 476 continue; 477 478 uint64_t Target; 479 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 480 // If the target can be evaluated, it's not indirect. 481 StaticBranchTargetings[Target].push_back(VMAddress); 482 continue; 483 } 484 485 if (!usesRegisterOperand(InstrMeta)) 486 continue; 487 488 if (InstrDesc.isReturn()) 489 continue; 490 491 // Check if this instruction exists in the range of the DWARF metadata. 492 if (!IgnoreDWARFFlag) { 493 auto LineInfo = 494 Symbolizer->symbolizeCode(Object->getFileName(), VMAddress); 495 if (!LineInfo) { 496 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 497 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 498 }); 499 continue; 500 } 501 502 if (LineInfo->FileName == "<invalid>") 503 continue; 504 } 505 506 IndirectInstructions.insert(VMAddress); 507 } 508 } 509 510 void FileAnalysis::addInstruction(const Instr &Instruction) { 511 const auto &KV = 512 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 513 if (!KV.second) { 514 errs() << "Failed to add instruction at address " 515 << format_hex(Instruction.VMAddress, 2) 516 << ": Instruction at this address already exists.\n"; 517 exit(EXIT_FAILURE); 518 } 519 } 520 521 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} 522 523 char UnsupportedDisassembly::ID; 524 void UnsupportedDisassembly::log(raw_ostream &OS) const { 525 OS << "Could not initialise disassembler: " << Text; 526 } 527 528 std::error_code UnsupportedDisassembly::convertToErrorCode() const { 529 return std::error_code(); 530 } 531 532 } // namespace cfi_verify 533 } // namespace llvm 534