Home | History | Annotate | Download | only in Support
      1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
     11 #define LLVM_SUPPORT_DATAEXTRACTOR_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/Support/DataTypes.h"
     15 
     16 namespace llvm {
     17 
     18 /// An auxiliary type to facilitate extraction of 3-byte entities.
     19 struct Uint24 {
     20   uint8_t Bytes[3];
     21   Uint24(uint8_t U) {
     22     Bytes[0] = Bytes[1] = Bytes[2] = U;
     23   }
     24   Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
     25     Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
     26   }
     27   uint32_t getAsUint32(bool IsLittleEndian) const {
     28     int LoIx = IsLittleEndian ? 0 : 2;
     29     return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
     30   }
     31 };
     32 
     33 using uint24_t = Uint24;
     34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
     35 
     36 /// Needed by swapByteOrder().
     37 inline uint24_t getSwappedBytes(uint24_t C) {
     38   return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
     39 }
     40 
     41 class DataExtractor {
     42   StringRef Data;
     43   uint8_t IsLittleEndian;
     44   uint8_t AddressSize;
     45 public:
     46   /// Construct with a buffer that is owned by the caller.
     47   ///
     48   /// This constructor allows us to use data that is owned by the
     49   /// caller. The data must stay around as long as this object is
     50   /// valid.
     51   DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
     52     : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
     53 
     54   /// \brief Get the data pointed to by this extractor.
     55   StringRef getData() const { return Data; }
     56   /// \brief Get the endianness for this extractor.
     57   bool isLittleEndian() const { return IsLittleEndian; }
     58   /// \brief Get the address size for this extractor.
     59   uint8_t getAddressSize() const { return AddressSize; }
     60   /// \brief Set the address size for this extractor.
     61   void setAddressSize(uint8_t Size) { AddressSize = Size; }
     62 
     63   /// Extract a C string from \a *offset_ptr.
     64   ///
     65   /// Returns a pointer to a C String from the data at the offset
     66   /// pointed to by \a offset_ptr. A variable length NULL terminated C
     67   /// string will be extracted and the \a offset_ptr will be
     68   /// updated with the offset of the byte that follows the NULL
     69   /// terminator byte.
     70   ///
     71   /// @param[in,out] offset_ptr
     72   ///     A pointer to an offset within the data that will be advanced
     73   ///     by the appropriate number of bytes if the value is extracted
     74   ///     correctly. If the offset is out of bounds or there are not
     75   ///     enough bytes to extract this value, the offset will be left
     76   ///     unmodified.
     77   ///
     78   /// @return
     79   ///     A pointer to the C string value in the data. If the offset
     80   ///     pointed to by \a offset_ptr is out of bounds, or if the
     81   ///     offset plus the length of the C string is out of bounds,
     82   ///     NULL will be returned.
     83   const char *getCStr(uint32_t *offset_ptr) const;
     84 
     85   /// Extract a C string from \a *OffsetPtr.
     86   ///
     87   /// Returns a StringRef for the C String from the data at the offset
     88   /// pointed to by \a OffsetPtr. A variable length NULL terminated C
     89   /// string will be extracted and the \a OffsetPtr will be
     90   /// updated with the offset of the byte that follows the NULL
     91   /// terminator byte.
     92   ///
     93   /// \param[in,out] OffsetPtr
     94   ///     A pointer to an offset within the data that will be advanced
     95   ///     by the appropriate number of bytes if the value is extracted
     96   ///     correctly. If the offset is out of bounds or there are not
     97   ///     enough bytes to extract this value, the offset will be left
     98   ///     unmodified.
     99   ///
    100   /// \return
    101   ///     A StringRef for the C string value in the data. If the offset
    102   ///     pointed to by \a OffsetPtr is out of bounds, or if the
    103   ///     offset plus the length of the C string is out of bounds,
    104   ///     a default-initialized StringRef will be returned.
    105   StringRef getCStrRef(uint32_t *OffsetPtr) const;
    106 
    107   /// Extract an unsigned integer of size \a byte_size from \a
    108   /// *offset_ptr.
    109   ///
    110   /// Extract a single unsigned integer value and update the offset
    111   /// pointed to by \a offset_ptr. The size of the extracted integer
    112   /// is specified by the \a byte_size argument. \a byte_size should
    113   /// have a value greater than or equal to one and less than or equal
    114   /// to eight since the return value is 64 bits wide. Any
    115   /// \a byte_size values less than 1 or greater than 8 will result in
    116   /// nothing being extracted, and zero being returned.
    117   ///
    118   /// @param[in,out] offset_ptr
    119   ///     A pointer to an offset within the data that will be advanced
    120   ///     by the appropriate number of bytes if the value is extracted
    121   ///     correctly. If the offset is out of bounds or there are not
    122   ///     enough bytes to extract this value, the offset will be left
    123   ///     unmodified.
    124   ///
    125   /// @param[in] byte_size
    126   ///     The size in byte of the integer to extract.
    127   ///
    128   /// @return
    129   ///     The unsigned integer value that was extracted, or zero on
    130   ///     failure.
    131   uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
    132 
    133   /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
    134   ///
    135   /// Extract a single signed integer value (sign extending if required)
    136   /// and update the offset pointed to by \a offset_ptr. The size of
    137   /// the extracted integer is specified by the \a byte_size argument.
    138   /// \a byte_size should have a value greater than or equal to one
    139   /// and less than or equal to eight since the return value is 64
    140   /// bits wide. Any \a byte_size values less than 1 or greater than
    141   /// 8 will result in nothing being extracted, and zero being returned.
    142   ///
    143   /// @param[in,out] offset_ptr
    144   ///     A pointer to an offset within the data that will be advanced
    145   ///     by the appropriate number of bytes if the value is extracted
    146   ///     correctly. If the offset is out of bounds or there are not
    147   ///     enough bytes to extract this value, the offset will be left
    148   ///     unmodified.
    149   ///
    150   /// @param[in] size
    151   ///     The size in bytes of the integer to extract.
    152   ///
    153   /// @return
    154   ///     The sign extended signed integer value that was extracted,
    155   ///     or zero on failure.
    156   int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
    157 
    158   //------------------------------------------------------------------
    159   /// Extract an pointer from \a *offset_ptr.
    160   ///
    161   /// Extract a single pointer from the data and update the offset
    162   /// pointed to by \a offset_ptr. The size of the extracted pointer
    163   /// is \a getAddressSize(), so the address size has to be
    164   /// set correctly prior to extracting any pointer values.
    165   ///
    166   /// @param[in,out] offset_ptr
    167   ///     A pointer to an offset within the data that will be advanced
    168   ///     by the appropriate number of bytes if the value is extracted
    169   ///     correctly. If the offset is out of bounds or there are not
    170   ///     enough bytes to extract this value, the offset will be left
    171   ///     unmodified.
    172   ///
    173   /// @return
    174   ///     The extracted pointer value as a 64 integer.
    175   uint64_t getAddress(uint32_t *offset_ptr) const {
    176     return getUnsigned(offset_ptr, AddressSize);
    177   }
    178 
    179   /// Extract a uint8_t value from \a *offset_ptr.
    180   ///
    181   /// Extract a single uint8_t from the binary data at the offset
    182   /// pointed to by \a offset_ptr, and advance the offset on success.
    183   ///
    184   /// @param[in,out] offset_ptr
    185   ///     A pointer to an offset within the data that will be advanced
    186   ///     by the appropriate number of bytes if the value is extracted
    187   ///     correctly. If the offset is out of bounds or there are not
    188   ///     enough bytes to extract this value, the offset will be left
    189   ///     unmodified.
    190   ///
    191   /// @return
    192   ///     The extracted uint8_t value.
    193   uint8_t getU8(uint32_t *offset_ptr) const;
    194 
    195   /// Extract \a count uint8_t values from \a *offset_ptr.
    196   ///
    197   /// Extract \a count uint8_t values from the binary data at the
    198   /// offset pointed to by \a offset_ptr, and advance the offset on
    199   /// success. The extracted values are copied into \a dst.
    200   ///
    201   /// @param[in,out] offset_ptr
    202   ///     A pointer to an offset within the data that will be advanced
    203   ///     by the appropriate number of bytes if the value is extracted
    204   ///     correctly. If the offset is out of bounds or there are not
    205   ///     enough bytes to extract this value, the offset will be left
    206   ///     unmodified.
    207   ///
    208   /// @param[out] dst
    209   ///     A buffer to copy \a count uint8_t values into. \a dst must
    210   ///     be large enough to hold all requested data.
    211   ///
    212   /// @param[in] count
    213   ///     The number of uint8_t values to extract.
    214   ///
    215   /// @return
    216   ///     \a dst if all values were properly extracted and copied,
    217   ///     NULL otherise.
    218   uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
    219 
    220   //------------------------------------------------------------------
    221   /// Extract a uint16_t value from \a *offset_ptr.
    222   ///
    223   /// Extract a single uint16_t from the binary data at the offset
    224   /// pointed to by \a offset_ptr, and update the offset on success.
    225   ///
    226   /// @param[in,out] offset_ptr
    227   ///     A pointer to an offset within the data that will be advanced
    228   ///     by the appropriate number of bytes if the value is extracted
    229   ///     correctly. If the offset is out of bounds or there are not
    230   ///     enough bytes to extract this value, the offset will be left
    231   ///     unmodified.
    232   ///
    233   /// @return
    234   ///     The extracted uint16_t value.
    235   //------------------------------------------------------------------
    236   uint16_t getU16(uint32_t *offset_ptr) const;
    237 
    238   /// Extract \a count uint16_t values from \a *offset_ptr.
    239   ///
    240   /// Extract \a count uint16_t values from the binary data at the
    241   /// offset pointed to by \a offset_ptr, and advance the offset on
    242   /// success. The extracted values are copied into \a dst.
    243   ///
    244   /// @param[in,out] offset_ptr
    245   ///     A pointer to an offset within the data that will be advanced
    246   ///     by the appropriate number of bytes if the value is extracted
    247   ///     correctly. If the offset is out of bounds or there are not
    248   ///     enough bytes to extract this value, the offset will be left
    249   ///     unmodified.
    250   ///
    251   /// @param[out] dst
    252   ///     A buffer to copy \a count uint16_t values into. \a dst must
    253   ///     be large enough to hold all requested data.
    254   ///
    255   /// @param[in] count
    256   ///     The number of uint16_t values to extract.
    257   ///
    258   /// @return
    259   ///     \a dst if all values were properly extracted and copied,
    260   ///     NULL otherise.
    261   uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
    262 
    263   /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
    264   /// in a uint32_t.
    265   ///
    266   /// Extract 3 bytes from the binary data at the offset pointed to by
    267   /// \a offset_ptr, construct a uint32_t from them and update the offset
    268   /// on success.
    269   ///
    270   /// @param[in,out] offset_ptr
    271   ///     A pointer to an offset within the data that will be advanced
    272   ///     by the 3 bytes if the value is extracted correctly. If the offset
    273   ///     is out of bounds or there are not enough bytes to extract this value,
    274   ///     the offset will be left unmodified.
    275   ///
    276   /// @return
    277   ///     The extracted 24-bit value represented in a uint32_t.
    278   uint32_t getU24(uint32_t *offset_ptr) const;
    279 
    280   /// Extract a uint32_t value from \a *offset_ptr.
    281   ///
    282   /// Extract a single uint32_t from the binary data at the offset
    283   /// pointed to by \a offset_ptr, and update the offset on success.
    284   ///
    285   /// @param[in,out] offset_ptr
    286   ///     A pointer to an offset within the data that will be advanced
    287   ///     by the appropriate number of bytes if the value is extracted
    288   ///     correctly. If the offset is out of bounds or there are not
    289   ///     enough bytes to extract this value, the offset will be left
    290   ///     unmodified.
    291   ///
    292   /// @return
    293   ///     The extracted uint32_t value.
    294   uint32_t getU32(uint32_t *offset_ptr) const;
    295 
    296   /// Extract \a count uint32_t values from \a *offset_ptr.
    297   ///
    298   /// Extract \a count uint32_t values from the binary data at the
    299   /// offset pointed to by \a offset_ptr, and advance the offset on
    300   /// success. The extracted values are copied into \a dst.
    301   ///
    302   /// @param[in,out] offset_ptr
    303   ///     A pointer to an offset within the data that will be advanced
    304   ///     by the appropriate number of bytes if the value is extracted
    305   ///     correctly. If the offset is out of bounds or there are not
    306   ///     enough bytes to extract this value, the offset will be left
    307   ///     unmodified.
    308   ///
    309   /// @param[out] dst
    310   ///     A buffer to copy \a count uint32_t values into. \a dst must
    311   ///     be large enough to hold all requested data.
    312   ///
    313   /// @param[in] count
    314   ///     The number of uint32_t values to extract.
    315   ///
    316   /// @return
    317   ///     \a dst if all values were properly extracted and copied,
    318   ///     NULL otherise.
    319   uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
    320 
    321   /// Extract a uint64_t value from \a *offset_ptr.
    322   ///
    323   /// Extract a single uint64_t from the binary data at the offset
    324   /// pointed to by \a offset_ptr, and update the offset on success.
    325   ///
    326   /// @param[in,out] offset_ptr
    327   ///     A pointer to an offset within the data that will be advanced
    328   ///     by the appropriate number of bytes if the value is extracted
    329   ///     correctly. If the offset is out of bounds or there are not
    330   ///     enough bytes to extract this value, the offset will be left
    331   ///     unmodified.
    332   ///
    333   /// @return
    334   ///     The extracted uint64_t value.
    335   uint64_t getU64(uint32_t *offset_ptr) const;
    336 
    337   /// Extract \a count uint64_t values from \a *offset_ptr.
    338   ///
    339   /// Extract \a count uint64_t values from the binary data at the
    340   /// offset pointed to by \a offset_ptr, and advance the offset on
    341   /// success. The extracted values are copied into \a dst.
    342   ///
    343   /// @param[in,out] offset_ptr
    344   ///     A pointer to an offset within the data that will be advanced
    345   ///     by the appropriate number of bytes if the value is extracted
    346   ///     correctly. If the offset is out of bounds or there are not
    347   ///     enough bytes to extract this value, the offset will be left
    348   ///     unmodified.
    349   ///
    350   /// @param[out] dst
    351   ///     A buffer to copy \a count uint64_t values into. \a dst must
    352   ///     be large enough to hold all requested data.
    353   ///
    354   /// @param[in] count
    355   ///     The number of uint64_t values to extract.
    356   ///
    357   /// @return
    358   ///     \a dst if all values were properly extracted and copied,
    359   ///     NULL otherise.
    360   uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
    361 
    362   /// Extract a signed LEB128 value from \a *offset_ptr.
    363   ///
    364   /// Extracts an signed LEB128 number from this object's data
    365   /// starting at the offset pointed to by \a offset_ptr. The offset
    366   /// pointed to by \a offset_ptr will be updated with the offset of
    367   /// the byte following the last extracted byte.
    368   ///
    369   /// @param[in,out] offset_ptr
    370   ///     A pointer to an offset within the data that will be advanced
    371   ///     by the appropriate number of bytes if the value is extracted
    372   ///     correctly. If the offset is out of bounds or there are not
    373   ///     enough bytes to extract this value, the offset will be left
    374   ///     unmodified.
    375   ///
    376   /// @return
    377   ///     The extracted signed integer value.
    378   int64_t getSLEB128(uint32_t *offset_ptr) const;
    379 
    380   /// Extract a unsigned LEB128 value from \a *offset_ptr.
    381   ///
    382   /// Extracts an unsigned LEB128 number from this object's data
    383   /// starting at the offset pointed to by \a offset_ptr. The offset
    384   /// pointed to by \a offset_ptr will be updated with the offset of
    385   /// the byte following the last extracted byte.
    386   ///
    387   /// @param[in,out] offset_ptr
    388   ///     A pointer to an offset within the data that will be advanced
    389   ///     by the appropriate number of bytes if the value is extracted
    390   ///     correctly. If the offset is out of bounds or there are not
    391   ///     enough bytes to extract this value, the offset will be left
    392   ///     unmodified.
    393   ///
    394   /// @return
    395   ///     The extracted unsigned integer value.
    396   uint64_t getULEB128(uint32_t *offset_ptr) const;
    397 
    398   /// Test the validity of \a offset.
    399   ///
    400   /// @return
    401   ///     \b true if \a offset is a valid offset into the data in this
    402   ///     object, \b false otherwise.
    403   bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
    404 
    405   /// Test the availability of \a length bytes of data from \a offset.
    406   ///
    407   /// @return
    408   ///     \b true if \a offset is a valid offset and there are \a
    409   ///     length bytes available at that offset, \b false otherwise.
    410   bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
    411     return offset + length >= offset && isValidOffset(offset + length - 1);
    412   }
    413 
    414   /// Test the availability of enough bytes of data for a pointer from
    415   /// \a offset. The size of a pointer is \a getAddressSize().
    416   ///
    417   /// @return
    418   ///     \b true if \a offset is a valid offset and there are enough
    419   ///     bytes for a pointer available at that offset, \b false
    420   ///     otherwise.
    421   bool isValidOffsetForAddress(uint32_t offset) const {
    422     return isValidOffsetForDataOfSize(offset, AddressSize);
    423   }
    424 };
    425 
    426 } // namespace llvm
    427 
    428 #endif
    429