Home | History | Annotate | Download | only in Reader
      1 //===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This header defines the BitcodeReader class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef BITCODE_READER_H
     15 #define BITCODE_READER_H
     16 
     17 #include "llvm/ADT/DenseMap.h"
     18 #include "llvm/Bitcode/BitstreamReader.h"
     19 #include "llvm/Bitcode/LLVMBitCodes.h"
     20 #include "llvm/IR/Attributes.h"
     21 #include "llvm/IR/GVMaterializer.h"
     22 #include "llvm/IR/OperandTraits.h"
     23 #include "llvm/IR/Type.h"
     24 #include "llvm/IR/ValueHandle.h"
     25 #include <system_error>
     26 #include <vector>
     27 
     28 namespace llvm {
     29   class Comdat;
     30   class MemoryBuffer;
     31   class LLVMContext;
     32 
     33 //===----------------------------------------------------------------------===//
     34 //                          BitcodeReaderValueList Class
     35 //===----------------------------------------------------------------------===//
     36 
     37 class BitcodeReaderValueList {
     38   std::vector<WeakVH> ValuePtrs;
     39 
     40   /// ResolveConstants - As we resolve forward-referenced constants, we add
     41   /// information about them to this vector.  This allows us to resolve them in
     42   /// bulk instead of resolving each reference at a time.  See the code in
     43   /// ResolveConstantForwardRefs for more information about this.
     44   ///
     45   /// The key of this vector is the placeholder constant, the value is the slot
     46   /// number that holds the resolved value.
     47   typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
     48   ResolveConstantsTy ResolveConstants;
     49   LLVMContext &Context;
     50 public:
     51   BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
     52   ~BitcodeReaderValueList() {
     53     assert(ResolveConstants.empty() && "Constants not resolved?");
     54   }
     55 
     56   // vector compatibility methods
     57   unsigned size() const { return ValuePtrs.size(); }
     58   void resize(unsigned N) { ValuePtrs.resize(N); }
     59   void push_back(Value *V) {
     60     ValuePtrs.push_back(V);
     61   }
     62 
     63   void clear() {
     64     assert(ResolveConstants.empty() && "Constants not resolved?");
     65     ValuePtrs.clear();
     66   }
     67 
     68   Value *operator[](unsigned i) const {
     69     assert(i < ValuePtrs.size());
     70     return ValuePtrs[i];
     71   }
     72 
     73   Value *back() const { return ValuePtrs.back(); }
     74     void pop_back() { ValuePtrs.pop_back(); }
     75   bool empty() const { return ValuePtrs.empty(); }
     76   void shrinkTo(unsigned N) {
     77     assert(N <= size() && "Invalid shrinkTo request!");
     78     ValuePtrs.resize(N);
     79   }
     80 
     81   Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
     82   Value *getValueFwdRef(unsigned Idx, Type *Ty);
     83 
     84   void AssignValue(Value *V, unsigned Idx);
     85 
     86   /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
     87   /// resolves any forward references.
     88   void ResolveConstantForwardRefs();
     89 };
     90 
     91 
     92 //===----------------------------------------------------------------------===//
     93 //                          BitcodeReaderMDValueList Class
     94 //===----------------------------------------------------------------------===//
     95 
     96 class BitcodeReaderMDValueList {
     97   std::vector<WeakVH> MDValuePtrs;
     98 
     99   LLVMContext &Context;
    100 public:
    101   BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
    102 
    103   // vector compatibility methods
    104   unsigned size() const       { return MDValuePtrs.size(); }
    105   void resize(unsigned N)     { MDValuePtrs.resize(N); }
    106   void push_back(Value *V)    { MDValuePtrs.push_back(V);  }
    107   void clear()                { MDValuePtrs.clear();  }
    108   Value *back() const         { return MDValuePtrs.back(); }
    109   void pop_back()             { MDValuePtrs.pop_back(); }
    110   bool empty() const          { return MDValuePtrs.empty(); }
    111 
    112   Value *operator[](unsigned i) const {
    113     assert(i < MDValuePtrs.size());
    114     return MDValuePtrs[i];
    115   }
    116 
    117   void shrinkTo(unsigned N) {
    118     assert(N <= size() && "Invalid shrinkTo request!");
    119     MDValuePtrs.resize(N);
    120   }
    121 
    122   Value *getValueFwdRef(unsigned Idx);
    123   void AssignValue(Value *V, unsigned Idx);
    124 };
    125 
    126 class BitcodeReader : public GVMaterializer {
    127   LLVMContext &Context;
    128   Module *TheModule;
    129   std::unique_ptr<MemoryBuffer> Buffer;
    130   std::unique_ptr<BitstreamReader> StreamFile;
    131   BitstreamCursor Stream;
    132   DataStreamer *LazyStreamer;
    133   uint64_t NextUnreadBit;
    134   bool SeenValueSymbolTable;
    135 
    136   std::vector<Type*> TypeList;
    137   BitcodeReaderValueList ValueList;
    138   BitcodeReaderMDValueList MDValueList;
    139   std::vector<Comdat *> ComdatList;
    140   SmallVector<Instruction *, 64> InstructionList;
    141   SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
    142 
    143   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
    144   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
    145   std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
    146 
    147   SmallVector<Instruction*, 64> InstsWithTBAATag;
    148 
    149   /// MAttributes - The set of attributes by index.  Index zero in the
    150   /// file is for null, and is thus not represented here.  As such all indices
    151   /// are off by one.
    152   std::vector<AttributeSet> MAttributes;
    153 
    154   /// \brief The set of attribute groups.
    155   std::map<unsigned, AttributeSet> MAttributeGroups;
    156 
    157   /// FunctionBBs - While parsing a function body, this is a list of the basic
    158   /// blocks for the function.
    159   std::vector<BasicBlock*> FunctionBBs;
    160 
    161   // When reading the module header, this list is populated with functions that
    162   // have bodies later in the file.
    163   std::vector<Function*> FunctionsWithBodies;
    164 
    165   // When intrinsic functions are encountered which require upgrading they are
    166   // stored here with their replacement function.
    167   typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
    168   UpgradedIntrinsicMap UpgradedIntrinsics;
    169 
    170   // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
    171   DenseMap<unsigned, unsigned> MDKindMap;
    172 
    173   // Several operations happen after the module header has been read, but
    174   // before function bodies are processed. This keeps track of whether
    175   // we've done this yet.
    176   bool SeenFirstFunctionBody;
    177 
    178   /// DeferredFunctionInfo - When function bodies are initially scanned, this
    179   /// map contains info about where to find deferred function body in the
    180   /// stream.
    181   DenseMap<Function*, uint64_t> DeferredFunctionInfo;
    182 
    183   /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
    184   /// are resolved lazily when functions are loaded.
    185   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
    186   DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
    187 
    188   /// UseRelativeIDs - Indicates that we are using a new encoding for
    189   /// instruction operands where most operands in the current
    190   /// FUNCTION_BLOCK are encoded relative to the instruction number,
    191   /// for a more compact encoding.  Some instruction operands are not
    192   /// relative to the instruction ID: basic block numbers, and types.
    193   /// Once the old style function blocks have been phased out, we would
    194   /// not need this flag.
    195   bool UseRelativeIDs;
    196 
    197   static const std::error_category &BitcodeErrorCategory();
    198 
    199 public:
    200   enum ErrorType {
    201     BitcodeStreamInvalidSize,
    202     ConflictingMETADATA_KINDRecords,
    203     CouldNotFindFunctionInStream,
    204     ExpectedConstant,
    205     InsufficientFunctionProtos,
    206     InvalidBitcodeSignature,
    207     InvalidBitcodeWrapperHeader,
    208     InvalidConstantReference,
    209     InvalidID, // A read identifier is not found in the table it should be in.
    210     InvalidInstructionWithNoBB,
    211     InvalidRecord, // A read record doesn't have the expected size or structure
    212     InvalidTypeForValue, // Type read OK, but is invalid for its use
    213     InvalidTYPETable,
    214     InvalidType, // We were unable to read a type
    215     MalformedBlock, // We are unable to advance in the stream.
    216     MalformedGlobalInitializerSet,
    217     InvalidMultipleBlocks, // We found multiple blocks of a kind that should
    218                            // have only one
    219     NeverResolvedValueFoundInFunction,
    220     InvalidValue // Invalid version, inst number, attr number, etc
    221   };
    222 
    223   std::error_code Error(ErrorType E) {
    224     return std::error_code(E, BitcodeErrorCategory());
    225   }
    226 
    227   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
    228       : Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
    229         NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
    230         MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
    231   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
    232       : Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
    233         NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
    234         MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
    235   ~BitcodeReader() { FreeState(); }
    236 
    237   void materializeForwardReferencedFunctions();
    238 
    239   void FreeState();
    240 
    241   void releaseBuffer() override;
    242 
    243   bool isMaterializable(const GlobalValue *GV) const override;
    244   bool isDematerializable(const GlobalValue *GV) const override;
    245   std::error_code Materialize(GlobalValue *GV) override;
    246   std::error_code MaterializeModule(Module *M) override;
    247   void Dematerialize(GlobalValue *GV) override;
    248 
    249   /// @brief Main interface to parsing a bitcode buffer.
    250   /// @returns true if an error occurred.
    251   std::error_code ParseBitcodeInto(Module *M);
    252 
    253   /// @brief Cheap mechanism to just extract module triple
    254   /// @returns true if an error occurred.
    255   ErrorOr<std::string> parseTriple();
    256 
    257   static uint64_t decodeSignRotatedValue(uint64_t V);
    258 
    259 private:
    260   Type *getTypeByID(unsigned ID);
    261   Value *getFnValueByID(unsigned ID, Type *Ty) {
    262     if (Ty && Ty->isMetadataTy())
    263       return MDValueList.getValueFwdRef(ID);
    264     return ValueList.getValueFwdRef(ID, Ty);
    265   }
    266   BasicBlock *getBasicBlock(unsigned ID) const {
    267     if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
    268     return FunctionBBs[ID];
    269   }
    270   AttributeSet getAttributes(unsigned i) const {
    271     if (i-1 < MAttributes.size())
    272       return MAttributes[i-1];
    273     return AttributeSet();
    274   }
    275 
    276   /// getValueTypePair - Read a value/type pair out of the specified record from
    277   /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
    278   /// Return true on failure.
    279   bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
    280                         unsigned InstNum, Value *&ResVal) {
    281     if (Slot == Record.size()) return true;
    282     unsigned ValNo = (unsigned)Record[Slot++];
    283     // Adjust the ValNo, if it was encoded relative to the InstNum.
    284     if (UseRelativeIDs)
    285       ValNo = InstNum - ValNo;
    286     if (ValNo < InstNum) {
    287       // If this is not a forward reference, just return the value we already
    288       // have.
    289       ResVal = getFnValueByID(ValNo, nullptr);
    290       return ResVal == nullptr;
    291     } else if (Slot == Record.size()) {
    292       return true;
    293     }
    294 
    295     unsigned TypeNo = (unsigned)Record[Slot++];
    296     ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
    297     return ResVal == nullptr;
    298   }
    299 
    300   /// popValue - Read a value out of the specified record from slot 'Slot'.
    301   /// Increment Slot past the number of slots used by the value in the record.
    302   /// Return true if there is an error.
    303   bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
    304                 unsigned InstNum, Type *Ty, Value *&ResVal) {
    305     if (getValue(Record, Slot, InstNum, Ty, ResVal))
    306       return true;
    307     // All values currently take a single record slot.
    308     ++Slot;
    309     return false;
    310   }
    311 
    312   /// getValue -- Like popValue, but does not increment the Slot number.
    313   bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
    314                 unsigned InstNum, Type *Ty, Value *&ResVal) {
    315     ResVal = getValue(Record, Slot, InstNum, Ty);
    316     return ResVal == nullptr;
    317   }
    318 
    319   /// getValue -- Version of getValue that returns ResVal directly,
    320   /// or 0 if there is an error.
    321   Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
    322                   unsigned InstNum, Type *Ty) {
    323     if (Slot == Record.size()) return nullptr;
    324     unsigned ValNo = (unsigned)Record[Slot];
    325     // Adjust the ValNo, if it was encoded relative to the InstNum.
    326     if (UseRelativeIDs)
    327       ValNo = InstNum - ValNo;
    328     return getFnValueByID(ValNo, Ty);
    329   }
    330 
    331   /// getValueSigned -- Like getValue, but decodes signed VBRs.
    332   Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
    333                         unsigned InstNum, Type *Ty) {
    334     if (Slot == Record.size()) return nullptr;
    335     unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
    336     // Adjust the ValNo, if it was encoded relative to the InstNum.
    337     if (UseRelativeIDs)
    338       ValNo = InstNum - ValNo;
    339     return getFnValueByID(ValNo, Ty);
    340   }
    341 
    342   std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
    343   std::error_code ParseModule(bool Resume);
    344   std::error_code ParseAttributeBlock();
    345   std::error_code ParseAttributeGroupBlock();
    346   std::error_code ParseTypeTable();
    347   std::error_code ParseTypeTableBody();
    348 
    349   std::error_code ParseValueSymbolTable();
    350   std::error_code ParseConstants();
    351   std::error_code RememberAndSkipFunctionBody();
    352   std::error_code ParseFunctionBody(Function *F);
    353   std::error_code GlobalCleanup();
    354   std::error_code ResolveGlobalAndAliasInits();
    355   std::error_code ParseMetadata();
    356   std::error_code ParseMetadataAttachment();
    357   ErrorOr<std::string> parseModuleTriple();
    358   std::error_code ParseUseLists();
    359   std::error_code InitStream();
    360   std::error_code InitStreamFromBuffer();
    361   std::error_code InitLazyStream();
    362   std::error_code FindFunctionInStream(
    363       Function *F,
    364       DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
    365 };
    366 
    367 } // End llvm namespace
    368 
    369 #endif
    370