1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H 11 #define LLVM_SUPPORT_DATAEXTRACTOR_H 12 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/DataTypes.h" 15 16 namespace llvm { 17 18 /// An auxiliary type to facilitate extraction of 3-byte entities. 19 struct Uint24 { 20 uint8_t Bytes[3]; 21 Uint24(uint8_t U) { 22 Bytes[0] = Bytes[1] = Bytes[2] = U; 23 } 24 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) { 25 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2; 26 } 27 uint32_t getAsUint32(bool IsLittleEndian) const { 28 int LoIx = IsLittleEndian ? 0 : 2; 29 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16); 30 } 31 }; 32 33 using uint24_t = Uint24; 34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3"); 35 36 /// Needed by swapByteOrder(). 37 inline uint24_t getSwappedBytes(uint24_t C) { 38 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]); 39 } 40 41 class DataExtractor { 42 StringRef Data; 43 uint8_t IsLittleEndian; 44 uint8_t AddressSize; 45 public: 46 /// Construct with a buffer that is owned by the caller. 47 /// 48 /// This constructor allows us to use data that is owned by the 49 /// caller. The data must stay around as long as this object is 50 /// valid. 51 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) 52 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} 53 54 /// \brief Get the data pointed to by this extractor. 55 StringRef getData() const { return Data; } 56 /// \brief Get the endianness for this extractor. 57 bool isLittleEndian() const { return IsLittleEndian; } 58 /// \brief Get the address size for this extractor. 59 uint8_t getAddressSize() const { return AddressSize; } 60 /// \brief Set the address size for this extractor. 61 void setAddressSize(uint8_t Size) { AddressSize = Size; } 62 63 /// Extract a C string from \a *offset_ptr. 64 /// 65 /// Returns a pointer to a C String from the data at the offset 66 /// pointed to by \a offset_ptr. A variable length NULL terminated C 67 /// string will be extracted and the \a offset_ptr will be 68 /// updated with the offset of the byte that follows the NULL 69 /// terminator byte. 70 /// 71 /// @param[in,out] offset_ptr 72 /// A pointer to an offset within the data that will be advanced 73 /// by the appropriate number of bytes if the value is extracted 74 /// correctly. If the offset is out of bounds or there are not 75 /// enough bytes to extract this value, the offset will be left 76 /// unmodified. 77 /// 78 /// @return 79 /// A pointer to the C string value in the data. If the offset 80 /// pointed to by \a offset_ptr is out of bounds, or if the 81 /// offset plus the length of the C string is out of bounds, 82 /// NULL will be returned. 83 const char *getCStr(uint32_t *offset_ptr) const; 84 85 /// Extract a C string from \a *OffsetPtr. 86 /// 87 /// Returns a StringRef for the C String from the data at the offset 88 /// pointed to by \a OffsetPtr. A variable length NULL terminated C 89 /// string will be extracted and the \a OffsetPtr will be 90 /// updated with the offset of the byte that follows the NULL 91 /// terminator byte. 92 /// 93 /// \param[in,out] OffsetPtr 94 /// A pointer to an offset within the data that will be advanced 95 /// by the appropriate number of bytes if the value is extracted 96 /// correctly. If the offset is out of bounds or there are not 97 /// enough bytes to extract this value, the offset will be left 98 /// unmodified. 99 /// 100 /// \return 101 /// A StringRef for the C string value in the data. If the offset 102 /// pointed to by \a OffsetPtr is out of bounds, or if the 103 /// offset plus the length of the C string is out of bounds, 104 /// a default-initialized StringRef will be returned. 105 StringRef getCStrRef(uint32_t *OffsetPtr) const; 106 107 /// Extract an unsigned integer of size \a byte_size from \a 108 /// *offset_ptr. 109 /// 110 /// Extract a single unsigned integer value and update the offset 111 /// pointed to by \a offset_ptr. The size of the extracted integer 112 /// is specified by the \a byte_size argument. \a byte_size should 113 /// have a value greater than or equal to one and less than or equal 114 /// to eight since the return value is 64 bits wide. Any 115 /// \a byte_size values less than 1 or greater than 8 will result in 116 /// nothing being extracted, and zero being returned. 117 /// 118 /// @param[in,out] offset_ptr 119 /// A pointer to an offset within the data that will be advanced 120 /// by the appropriate number of bytes if the value is extracted 121 /// correctly. If the offset is out of bounds or there are not 122 /// enough bytes to extract this value, the offset will be left 123 /// unmodified. 124 /// 125 /// @param[in] byte_size 126 /// The size in byte of the integer to extract. 127 /// 128 /// @return 129 /// The unsigned integer value that was extracted, or zero on 130 /// failure. 131 uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; 132 133 /// Extract an signed integer of size \a byte_size from \a *offset_ptr. 134 /// 135 /// Extract a single signed integer value (sign extending if required) 136 /// and update the offset pointed to by \a offset_ptr. The size of 137 /// the extracted integer is specified by the \a byte_size argument. 138 /// \a byte_size should have a value greater than or equal to one 139 /// and less than or equal to eight since the return value is 64 140 /// bits wide. Any \a byte_size values less than 1 or greater than 141 /// 8 will result in nothing being extracted, and zero being returned. 142 /// 143 /// @param[in,out] offset_ptr 144 /// A pointer to an offset within the data that will be advanced 145 /// by the appropriate number of bytes if the value is extracted 146 /// correctly. If the offset is out of bounds or there are not 147 /// enough bytes to extract this value, the offset will be left 148 /// unmodified. 149 /// 150 /// @param[in] size 151 /// The size in bytes of the integer to extract. 152 /// 153 /// @return 154 /// The sign extended signed integer value that was extracted, 155 /// or zero on failure. 156 int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const; 157 158 //------------------------------------------------------------------ 159 /// Extract an pointer from \a *offset_ptr. 160 /// 161 /// Extract a single pointer from the data and update the offset 162 /// pointed to by \a offset_ptr. The size of the extracted pointer 163 /// is \a getAddressSize(), so the address size has to be 164 /// set correctly prior to extracting any pointer values. 165 /// 166 /// @param[in,out] offset_ptr 167 /// A pointer to an offset within the data that will be advanced 168 /// by the appropriate number of bytes if the value is extracted 169 /// correctly. If the offset is out of bounds or there are not 170 /// enough bytes to extract this value, the offset will be left 171 /// unmodified. 172 /// 173 /// @return 174 /// The extracted pointer value as a 64 integer. 175 uint64_t getAddress(uint32_t *offset_ptr) const { 176 return getUnsigned(offset_ptr, AddressSize); 177 } 178 179 /// Extract a uint8_t value from \a *offset_ptr. 180 /// 181 /// Extract a single uint8_t from the binary data at the offset 182 /// pointed to by \a offset_ptr, and advance the offset on success. 183 /// 184 /// @param[in,out] offset_ptr 185 /// A pointer to an offset within the data that will be advanced 186 /// by the appropriate number of bytes if the value is extracted 187 /// correctly. If the offset is out of bounds or there are not 188 /// enough bytes to extract this value, the offset will be left 189 /// unmodified. 190 /// 191 /// @return 192 /// The extracted uint8_t value. 193 uint8_t getU8(uint32_t *offset_ptr) const; 194 195 /// Extract \a count uint8_t values from \a *offset_ptr. 196 /// 197 /// Extract \a count uint8_t values from the binary data at the 198 /// offset pointed to by \a offset_ptr, and advance the offset on 199 /// success. The extracted values are copied into \a dst. 200 /// 201 /// @param[in,out] offset_ptr 202 /// A pointer to an offset within the data that will be advanced 203 /// by the appropriate number of bytes if the value is extracted 204 /// correctly. If the offset is out of bounds or there are not 205 /// enough bytes to extract this value, the offset will be left 206 /// unmodified. 207 /// 208 /// @param[out] dst 209 /// A buffer to copy \a count uint8_t values into. \a dst must 210 /// be large enough to hold all requested data. 211 /// 212 /// @param[in] count 213 /// The number of uint8_t values to extract. 214 /// 215 /// @return 216 /// \a dst if all values were properly extracted and copied, 217 /// NULL otherise. 218 uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; 219 220 //------------------------------------------------------------------ 221 /// Extract a uint16_t value from \a *offset_ptr. 222 /// 223 /// Extract a single uint16_t from the binary data at the offset 224 /// pointed to by \a offset_ptr, and update the offset on success. 225 /// 226 /// @param[in,out] offset_ptr 227 /// A pointer to an offset within the data that will be advanced 228 /// by the appropriate number of bytes if the value is extracted 229 /// correctly. If the offset is out of bounds or there are not 230 /// enough bytes to extract this value, the offset will be left 231 /// unmodified. 232 /// 233 /// @return 234 /// The extracted uint16_t value. 235 //------------------------------------------------------------------ 236 uint16_t getU16(uint32_t *offset_ptr) const; 237 238 /// Extract \a count uint16_t values from \a *offset_ptr. 239 /// 240 /// Extract \a count uint16_t values from the binary data at the 241 /// offset pointed to by \a offset_ptr, and advance the offset on 242 /// success. The extracted values are copied into \a dst. 243 /// 244 /// @param[in,out] offset_ptr 245 /// A pointer to an offset within the data that will be advanced 246 /// by the appropriate number of bytes if the value is extracted 247 /// correctly. If the offset is out of bounds or there are not 248 /// enough bytes to extract this value, the offset will be left 249 /// unmodified. 250 /// 251 /// @param[out] dst 252 /// A buffer to copy \a count uint16_t values into. \a dst must 253 /// be large enough to hold all requested data. 254 /// 255 /// @param[in] count 256 /// The number of uint16_t values to extract. 257 /// 258 /// @return 259 /// \a dst if all values were properly extracted and copied, 260 /// NULL otherise. 261 uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; 262 263 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it 264 /// in a uint32_t. 265 /// 266 /// Extract 3 bytes from the binary data at the offset pointed to by 267 /// \a offset_ptr, construct a uint32_t from them and update the offset 268 /// on success. 269 /// 270 /// @param[in,out] offset_ptr 271 /// A pointer to an offset within the data that will be advanced 272 /// by the 3 bytes if the value is extracted correctly. If the offset 273 /// is out of bounds or there are not enough bytes to extract this value, 274 /// the offset will be left unmodified. 275 /// 276 /// @return 277 /// The extracted 24-bit value represented in a uint32_t. 278 uint32_t getU24(uint32_t *offset_ptr) const; 279 280 /// Extract a uint32_t value from \a *offset_ptr. 281 /// 282 /// Extract a single uint32_t from the binary data at the offset 283 /// pointed to by \a offset_ptr, and update the offset on success. 284 /// 285 /// @param[in,out] offset_ptr 286 /// A pointer to an offset within the data that will be advanced 287 /// by the appropriate number of bytes if the value is extracted 288 /// correctly. If the offset is out of bounds or there are not 289 /// enough bytes to extract this value, the offset will be left 290 /// unmodified. 291 /// 292 /// @return 293 /// The extracted uint32_t value. 294 uint32_t getU32(uint32_t *offset_ptr) const; 295 296 /// Extract \a count uint32_t values from \a *offset_ptr. 297 /// 298 /// Extract \a count uint32_t values from the binary data at the 299 /// offset pointed to by \a offset_ptr, and advance the offset on 300 /// success. The extracted values are copied into \a dst. 301 /// 302 /// @param[in,out] offset_ptr 303 /// A pointer to an offset within the data that will be advanced 304 /// by the appropriate number of bytes if the value is extracted 305 /// correctly. If the offset is out of bounds or there are not 306 /// enough bytes to extract this value, the offset will be left 307 /// unmodified. 308 /// 309 /// @param[out] dst 310 /// A buffer to copy \a count uint32_t values into. \a dst must 311 /// be large enough to hold all requested data. 312 /// 313 /// @param[in] count 314 /// The number of uint32_t values to extract. 315 /// 316 /// @return 317 /// \a dst if all values were properly extracted and copied, 318 /// NULL otherise. 319 uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const; 320 321 /// Extract a uint64_t value from \a *offset_ptr. 322 /// 323 /// Extract a single uint64_t from the binary data at the offset 324 /// pointed to by \a offset_ptr, and update the offset on success. 325 /// 326 /// @param[in,out] offset_ptr 327 /// A pointer to an offset within the data that will be advanced 328 /// by the appropriate number of bytes if the value is extracted 329 /// correctly. If the offset is out of bounds or there are not 330 /// enough bytes to extract this value, the offset will be left 331 /// unmodified. 332 /// 333 /// @return 334 /// The extracted uint64_t value. 335 uint64_t getU64(uint32_t *offset_ptr) const; 336 337 /// Extract \a count uint64_t values from \a *offset_ptr. 338 /// 339 /// Extract \a count uint64_t values from the binary data at the 340 /// offset pointed to by \a offset_ptr, and advance the offset on 341 /// success. The extracted values are copied into \a dst. 342 /// 343 /// @param[in,out] offset_ptr 344 /// A pointer to an offset within the data that will be advanced 345 /// by the appropriate number of bytes if the value is extracted 346 /// correctly. If the offset is out of bounds or there are not 347 /// enough bytes to extract this value, the offset will be left 348 /// unmodified. 349 /// 350 /// @param[out] dst 351 /// A buffer to copy \a count uint64_t values into. \a dst must 352 /// be large enough to hold all requested data. 353 /// 354 /// @param[in] count 355 /// The number of uint64_t values to extract. 356 /// 357 /// @return 358 /// \a dst if all values were properly extracted and copied, 359 /// NULL otherise. 360 uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const; 361 362 /// Extract a signed LEB128 value from \a *offset_ptr. 363 /// 364 /// Extracts an signed LEB128 number from this object's data 365 /// starting at the offset pointed to by \a offset_ptr. The offset 366 /// pointed to by \a offset_ptr will be updated with the offset of 367 /// the byte following the last extracted byte. 368 /// 369 /// @param[in,out] offset_ptr 370 /// A pointer to an offset within the data that will be advanced 371 /// by the appropriate number of bytes if the value is extracted 372 /// correctly. If the offset is out of bounds or there are not 373 /// enough bytes to extract this value, the offset will be left 374 /// unmodified. 375 /// 376 /// @return 377 /// The extracted signed integer value. 378 int64_t getSLEB128(uint32_t *offset_ptr) const; 379 380 /// Extract a unsigned LEB128 value from \a *offset_ptr. 381 /// 382 /// Extracts an unsigned LEB128 number from this object's data 383 /// starting at the offset pointed to by \a offset_ptr. The offset 384 /// pointed to by \a offset_ptr will be updated with the offset of 385 /// the byte following the last extracted byte. 386 /// 387 /// @param[in,out] offset_ptr 388 /// A pointer to an offset within the data that will be advanced 389 /// by the appropriate number of bytes if the value is extracted 390 /// correctly. If the offset is out of bounds or there are not 391 /// enough bytes to extract this value, the offset will be left 392 /// unmodified. 393 /// 394 /// @return 395 /// The extracted unsigned integer value. 396 uint64_t getULEB128(uint32_t *offset_ptr) const; 397 398 /// Test the validity of \a offset. 399 /// 400 /// @return 401 /// \b true if \a offset is a valid offset into the data in this 402 /// object, \b false otherwise. 403 bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } 404 405 /// Test the availability of \a length bytes of data from \a offset. 406 /// 407 /// @return 408 /// \b true if \a offset is a valid offset and there are \a 409 /// length bytes available at that offset, \b false otherwise. 410 bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { 411 return offset + length >= offset && isValidOffset(offset + length - 1); 412 } 413 414 /// Test the availability of enough bytes of data for a pointer from 415 /// \a offset. The size of a pointer is \a getAddressSize(). 416 /// 417 /// @return 418 /// \b true if \a offset is a valid offset and there are enough 419 /// bytes for a pointer available at that offset, \b false 420 /// otherwise. 421 bool isValidOffsetForAddress(uint32_t offset) const { 422 return isValidOffsetForDataOfSize(offset, AddressSize); 423 } 424 }; 425 426 } // namespace llvm 427 428 #endif 429