Home | History | Annotate | Download | only in Bitcode
      1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This header defines interfaces to read LLVM bitcode files/streams.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_BITCODE_BITCODEREADER_H
     15 #define LLVM_BITCODE_BITCODEREADER_H
     16 
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/StringRef.h"
     19 #include "llvm/Bitcode/BitCodes.h"
     20 #include "llvm/IR/ModuleSummaryIndex.h"
     21 #include "llvm/Support/Endian.h"
     22 #include "llvm/Support/Error.h"
     23 #include "llvm/Support/ErrorOr.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include <cstdint>
     26 #include <memory>
     27 #include <string>
     28 #include <system_error>
     29 #include <vector>
     30 namespace llvm {
     31 
     32 class LLVMContext;
     33 class Module;
     34 
     35   // These functions are for converting Expected/Error values to
     36   // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
     37   // Remove these functions once no longer needed by the C and libLTO APIs.
     38 
     39   std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
     40 
     41   template <typename T>
     42   ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
     43     if (!Val)
     44       return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
     45     return std::move(*Val);
     46   }
     47 
     48   struct BitcodeFileContents;
     49 
     50   /// Basic information extracted from a bitcode module to be used for LTO.
     51   struct BitcodeLTOInfo {
     52     bool IsThinLTO;
     53     bool HasSummary;
     54   };
     55 
     56   /// Represents a module in a bitcode file.
     57   class BitcodeModule {
     58     // This covers the identification (if present) and module blocks.
     59     ArrayRef<uint8_t> Buffer;
     60     StringRef ModuleIdentifier;
     61 
     62     // The string table used to interpret this module.
     63     StringRef Strtab;
     64 
     65     // The bitstream location of the IDENTIFICATION_BLOCK.
     66     uint64_t IdentificationBit;
     67 
     68     // The bitstream location of this module's MODULE_BLOCK.
     69     uint64_t ModuleBit;
     70 
     71     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
     72                   uint64_t IdentificationBit, uint64_t ModuleBit)
     73         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
     74           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
     75 
     76     // Calls the ctor.
     77     friend Expected<BitcodeFileContents>
     78     getBitcodeFileContents(MemoryBufferRef Buffer);
     79 
     80     Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
     81                                                     bool MaterializeAll,
     82                                                     bool ShouldLazyLoadMetadata,
     83                                                     bool IsImporting);
     84 
     85   public:
     86     StringRef getBuffer() const {
     87       return StringRef((const char *)Buffer.begin(), Buffer.size());
     88     }
     89 
     90     StringRef getStrtab() const { return Strtab; }
     91 
     92     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
     93 
     94     /// Read the bitcode module and prepare for lazy deserialization of function
     95     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
     96     /// If IsImporting is true, this module is being parsed for ThinLTO
     97     /// importing into another module.
     98     Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
     99                                                     bool ShouldLazyLoadMetadata,
    100                                                     bool IsImporting);
    101 
    102     /// Read the entire bitcode module and return it.
    103     Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
    104 
    105     /// Returns information about the module to be used for LTO: whether to
    106     /// compile with ThinLTO, and whether it has a summary.
    107     Expected<BitcodeLTOInfo> getLTOInfo();
    108 
    109     /// Parse the specified bitcode buffer, returning the module summary index.
    110     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
    111 
    112     /// Parse the specified bitcode buffer and merge its module summary index
    113     /// into CombinedIndex.
    114     Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
    115                       uint64_t ModuleId);
    116   };
    117 
    118   struct BitcodeFileContents {
    119     std::vector<BitcodeModule> Mods;
    120     StringRef Symtab, StrtabForSymtab;
    121   };
    122 
    123   /// Returns the contents of a bitcode file. This includes the raw contents of
    124   /// the symbol table embedded in the bitcode file. Clients which require a
    125   /// symbol table should prefer to use irsymtab::read instead of this function
    126   /// because it creates a reader for the irsymtab and handles upgrading bitcode
    127   /// files without a symbol table or with an old symbol table.
    128   Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
    129 
    130   /// Returns a list of modules in the specified bitcode buffer.
    131   Expected<std::vector<BitcodeModule>>
    132   getBitcodeModuleList(MemoryBufferRef Buffer);
    133 
    134   /// Read the header of the specified bitcode buffer and prepare for lazy
    135   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
    136   /// lazily load metadata as well. If IsImporting is true, this module is
    137   /// being parsed for ThinLTO importing into another module.
    138   Expected<std::unique_ptr<Module>>
    139   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
    140                        bool ShouldLazyLoadMetadata = false,
    141                        bool IsImporting = false);
    142 
    143   /// Like getLazyBitcodeModule, except that the module takes ownership of
    144   /// the memory buffer if successful. If successful, this moves Buffer. On
    145   /// error, this *does not* move Buffer. If IsImporting is true, this module is
    146   /// being parsed for ThinLTO importing into another module.
    147   Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
    148       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
    149       bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
    150 
    151   /// Read the header of the specified bitcode buffer and extract just the
    152   /// triple information. If successful, this returns a string. On error, this
    153   /// returns "".
    154   Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
    155 
    156   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
    157   /// or class) in it.
    158   Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
    159 
    160   /// Read the header of the specified bitcode buffer and extract just the
    161   /// producer string information. If successful, this returns a string. On
    162   /// error, this returns "".
    163   Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
    164 
    165   /// Read the specified bitcode file, returning the module.
    166   Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
    167                                                      LLVMContext &Context);
    168 
    169   /// Returns LTO information for the specified bitcode file.
    170   Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
    171 
    172   /// Parse the specified bitcode buffer, returning the module summary index.
    173   Expected<std::unique_ptr<ModuleSummaryIndex>>
    174   getModuleSummaryIndex(MemoryBufferRef Buffer);
    175 
    176   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
    177   Error readModuleSummaryIndex(MemoryBufferRef Buffer,
    178                                ModuleSummaryIndex &CombinedIndex,
    179                                uint64_t ModuleId);
    180 
    181   /// Parse the module summary index out of an IR file and return the module
    182   /// summary index object if found, or an empty summary if not. If Path refers
    183   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
    184   /// this function will return nullptr.
    185   Expected<std::unique_ptr<ModuleSummaryIndex>>
    186   getModuleSummaryIndexForFile(StringRef Path,
    187                                bool IgnoreEmptyThinLTOIndexFile = false);
    188 
    189   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
    190   /// for an LLVM IR bitcode wrapper.
    191   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
    192                                const unsigned char *BufEnd) {
    193     // See if you can find the hidden message in the magic bytes :-).
    194     // (Hint: it's a little-endian encoding.)
    195     return BufPtr != BufEnd &&
    196            BufPtr[0] == 0xDE &&
    197            BufPtr[1] == 0xC0 &&
    198            BufPtr[2] == 0x17 &&
    199            BufPtr[3] == 0x0B;
    200   }
    201 
    202   /// isRawBitcode - Return true if the given bytes are the magic bytes for
    203   /// raw LLVM IR bitcode (without a wrapper).
    204   inline bool isRawBitcode(const unsigned char *BufPtr,
    205                            const unsigned char *BufEnd) {
    206     // These bytes sort of have a hidden message, but it's not in
    207     // little-endian this time, and it's a little redundant.
    208     return BufPtr != BufEnd &&
    209            BufPtr[0] == 'B' &&
    210            BufPtr[1] == 'C' &&
    211            BufPtr[2] == 0xc0 &&
    212            BufPtr[3] == 0xde;
    213   }
    214 
    215   /// isBitcode - Return true if the given bytes are the magic bytes for
    216   /// LLVM IR bitcode, either with or without a wrapper.
    217   inline bool isBitcode(const unsigned char *BufPtr,
    218                         const unsigned char *BufEnd) {
    219     return isBitcodeWrapper(BufPtr, BufEnd) ||
    220            isRawBitcode(BufPtr, BufEnd);
    221   }
    222 
    223   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
    224   /// header for padding or other reasons.  The format of this header is:
    225   ///
    226   /// struct bc_header {
    227   ///   uint32_t Magic;         // 0x0B17C0DE
    228   ///   uint32_t Version;       // Version, currently always 0.
    229   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
    230   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
    231   ///   ... potentially other gunk ...
    232   /// };
    233   ///
    234   /// This function is called when we find a file with a matching magic number.
    235   /// In this case, skip down to the subsection of the file that is actually a
    236   /// BC file.
    237   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
    238   /// contain the whole bitcode file.
    239   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
    240                                        const unsigned char *&BufEnd,
    241                                        bool VerifyBufferSize) {
    242     // Must contain the offset and size field!
    243     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
    244       return true;
    245 
    246     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
    247     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
    248     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
    249 
    250     // Verify that Offset+Size fits in the file.
    251     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
    252       return true;
    253     BufPtr += Offset;
    254     BufEnd = BufPtr+Size;
    255     return false;
    256   }
    257 
    258   const std::error_category &BitcodeErrorCategory();
    259   enum class BitcodeError { CorruptedBitcode = 1 };
    260   inline std::error_code make_error_code(BitcodeError E) {
    261     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
    262   }
    263 
    264 } // end namespace llvm
    265 
    266 namespace std {
    267 
    268 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
    269 
    270 } // end namespace std
    271 
    272 #endif // LLVM_BITCODE_BITCODEREADER_H
    273