Home | History | Annotate | Download | only in Disassembler
      1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "AArch64ExternalSymbolizer.h"
     11 #include "AArch64Subtarget.h"
     12 #include "MCTargetDesc/AArch64AddressingModes.h"
     13 #include "Utils/AArch64BaseInfo.h"
     14 #include "llvm/MC/MCContext.h"
     15 #include "llvm/MC/MCExpr.h"
     16 #include "llvm/MC/MCInst.h"
     17 #include "llvm/Support/Format.h"
     18 #include "llvm/Support/raw_ostream.h"
     19 
     20 using namespace llvm;
     21 
     22 #define DEBUG_TYPE "aarch64-disassembler"
     23 
     24 static MCSymbolRefExpr::VariantKind
     25 getVariant(uint64_t LLVMDisassembler_VariantKind) {
     26   switch (LLVMDisassembler_VariantKind) {
     27   case LLVMDisassembler_VariantKind_None:
     28     return MCSymbolRefExpr::VK_None;
     29   case LLVMDisassembler_VariantKind_ARM64_PAGE:
     30     return MCSymbolRefExpr::VK_PAGE;
     31   case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
     32     return MCSymbolRefExpr::VK_PAGEOFF;
     33   case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
     34     return MCSymbolRefExpr::VK_GOTPAGE;
     35   case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
     36     return MCSymbolRefExpr::VK_GOTPAGEOFF;
     37   case LLVMDisassembler_VariantKind_ARM64_TLVP:
     38   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
     39   default:
     40     llvm_unreachable("bad LLVMDisassembler_VariantKind");
     41   }
     42 }
     43 
     44 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
     45 /// operand in place of the immediate Value in the MCInst.  The immediate
     46 /// Value has not had any PC adjustment made by the caller. If the instruction
     47 /// is a branch that adds the PC to the immediate Value then isBranch is
     48 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
     49 /// symbolic information at the Address for this instrution.  If that returns
     50 /// non-zero then the symbolic information it returns is used to create an
     51 /// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
     52 /// returns zero and isBranch is Success then a symbol look up for
     53 /// Address + Value is done and if a symbol is found an MCExpr is created with
     54 /// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
     55 /// returns zero and isBranch is Fail then the Opcode of the MCInst is
     56 /// tested and for ADRP an other instructions that help to load of pointers
     57 /// a symbol look up is done to see it is returns a specific reference type
     58 /// to add to the comment stream.  This function returns Success if it adds
     59 /// an operand to the MCInst and Fail otherwise.
     60 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
     61     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
     62     bool IsBranch, uint64_t Offset, uint64_t InstSize) {
     63   // FIXME: This method shares a lot of code with
     64   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
     65   //        refactor the MCExternalSymbolizer interface to allow more of this
     66   //        implementation to be shared.
     67   //
     68   struct LLVMOpInfo1 SymbolicOp;
     69   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
     70   SymbolicOp.Value = Value;
     71   uint64_t ReferenceType;
     72   const char *ReferenceName;
     73   if (!GetOpInfo ||
     74       !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
     75     if (IsBranch) {
     76       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
     77       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
     78                                       Address, &ReferenceName);
     79       if (Name) {
     80         SymbolicOp.AddSymbol.Name = Name;
     81         SymbolicOp.AddSymbol.Present = true;
     82         SymbolicOp.Value = 0;
     83       } else {
     84         SymbolicOp.Value = Address + Value;
     85       }
     86       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
     87         CommentStream << "symbol stub for: " << ReferenceName;
     88       else if (ReferenceType ==
     89                LLVMDisassembler_ReferenceType_Out_Objc_Message)
     90         CommentStream << "Objc message: " << ReferenceName;
     91     } else if (MI.getOpcode() == AArch64::ADRP) {
     92         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
     93         // otool expects the fully encoded ADRP instruction to be passed in as
     94         // the value here, so reconstruct it:
     95         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
     96         uint32_t EncodedInst = 0x90000000;
     97         EncodedInst |= (Value & 0x3) << 29; // immlo
     98         EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
     99         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
    100         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
    101                      &ReferenceName);
    102         CommentStream << format("0x%llx",
    103                                 0xfffffffffffff000LL & (Address + Value));
    104     } else if (MI.getOpcode() == AArch64::ADDXri ||
    105                MI.getOpcode() == AArch64::LDRXui ||
    106                MI.getOpcode() == AArch64::LDRXl ||
    107                MI.getOpcode() == AArch64::ADR) {
    108       if (MI.getOpcode() == AArch64::ADDXri)
    109         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
    110       else if (MI.getOpcode() == AArch64::LDRXui)
    111         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
    112       if (MI.getOpcode() == AArch64::LDRXl) {
    113         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
    114         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
    115                      &ReferenceName);
    116       } else if (MI.getOpcode() == AArch64::ADR) {
    117         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
    118         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
    119                             &ReferenceName);
    120       } else {
    121         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
    122         // otool expects the fully encoded ADD/LDR instruction to be passed in
    123         // as the value here, so reconstruct it:
    124         unsigned EncodedInst =
    125           MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
    126         EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
    127         EncodedInst |=
    128           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
    129         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
    130 
    131         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
    132                      &ReferenceName);
    133       }
    134       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
    135         CommentStream << "literal pool symbol address: " << ReferenceName;
    136       else if (ReferenceType ==
    137                LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
    138         CommentStream << "literal pool for: \"";
    139         CommentStream.write_escaped(ReferenceName);
    140         CommentStream << "\"";
    141       } else if (ReferenceType ==
    142                LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
    143         CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
    144       else if (ReferenceType ==
    145                LLVMDisassembler_ReferenceType_Out_Objc_Message)
    146         CommentStream << "Objc message: " << ReferenceName;
    147       else if (ReferenceType ==
    148                LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
    149         CommentStream << "Objc message ref: " << ReferenceName;
    150       else if (ReferenceType ==
    151                LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
    152         CommentStream << "Objc selector ref: " << ReferenceName;
    153       else if (ReferenceType ==
    154                LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
    155         CommentStream << "Objc class ref: " << ReferenceName;
    156       // For these instructions, the SymbolLookUp() above is just to get the
    157       // ReferenceType and ReferenceName.  We want to make sure not to
    158       // fall through so we don't build an MCExpr to leave the disassembly
    159       // of the immediate values of these instructions to the InstPrinter.
    160       return false;
    161     } else {
    162       return false;
    163     }
    164   }
    165 
    166   const MCExpr *Add = nullptr;
    167   if (SymbolicOp.AddSymbol.Present) {
    168     if (SymbolicOp.AddSymbol.Name) {
    169       StringRef Name(SymbolicOp.AddSymbol.Name);
    170       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
    171       MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
    172       if (Variant != MCSymbolRefExpr::VK_None)
    173         Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
    174       else
    175         Add = MCSymbolRefExpr::create(Sym, Ctx);
    176     } else {
    177       Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx);
    178     }
    179   }
    180 
    181   const MCExpr *Sub = nullptr;
    182   if (SymbolicOp.SubtractSymbol.Present) {
    183     if (SymbolicOp.SubtractSymbol.Name) {
    184       StringRef Name(SymbolicOp.SubtractSymbol.Name);
    185       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
    186       Sub = MCSymbolRefExpr::create(Sym, Ctx);
    187     } else {
    188       Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
    189     }
    190   }
    191 
    192   const MCExpr *Off = nullptr;
    193   if (SymbolicOp.Value != 0)
    194     Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
    195 
    196   const MCExpr *Expr;
    197   if (Sub) {
    198     const MCExpr *LHS;
    199     if (Add)
    200       LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
    201     else
    202       LHS = MCUnaryExpr::createMinus(Sub, Ctx);
    203     if (Off)
    204       Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
    205     else
    206       Expr = LHS;
    207   } else if (Add) {
    208     if (Off)
    209       Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
    210     else
    211       Expr = Add;
    212   } else {
    213     if (Off)
    214       Expr = Off;
    215     else
    216       Expr = MCConstantExpr::create(0, Ctx);
    217   }
    218 
    219   MI.addOperand(MCOperand::createExpr(Expr));
    220 
    221   return true;
    222 }
    223