1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H 11 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H 12 13 #include "llvm/ADT/ArrayRef.h" 14 #include "llvm/ADT/iterator.h" 15 #include "llvm/Support/BinaryStreamRef.h" 16 #include "llvm/Support/Error.h" 17 #include <cassert> 18 #include <cstdint> 19 20 /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file 21 /// provides two different array implementations. 22 /// 23 /// VarStreamArray - Arrays of variable length records. The user specifies 24 /// an Extractor type that can extract a record from a given offset and 25 /// return the number of bytes consumed by the record. 26 /// 27 /// FixedStreamArray - Arrays of fixed length records. This is similar in 28 /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the 29 /// elements of the array need not be laid out in contiguous memory. 30 namespace llvm { 31 32 /// VarStreamArrayExtractor is intended to be specialized to provide customized 33 /// extraction logic. On input it receives a BinaryStreamRef pointing to the 34 /// beginning of the next record, but where the length of the record is not yet 35 /// known. Upon completion, it should return an appropriate Error instance if 36 /// a record could not be extracted, or if one could be extracted it should 37 /// return success and set Len to the number of bytes this record occupied in 38 /// the underlying stream, and it should fill out the fields of the value type 39 /// Item appropriately to represent the current record. 40 /// 41 /// You can specialize this template for your own custom value types to avoid 42 /// having to specify a second template argument to VarStreamArray (documented 43 /// below). 44 template <typename T> struct VarStreamArrayExtractor { 45 // Method intentionally deleted. You must provide an explicit specialization 46 // with the following method implemented. 47 Error operator()(BinaryStreamRef Stream, uint32_t &Len, 48 T &Item) const = delete; 49 }; 50 51 /// VarStreamArray represents an array of variable length records backed by a 52 /// stream. This could be a contiguous sequence of bytes in memory, it could 53 /// be a file on disk, or it could be a PDB stream where bytes are stored as 54 /// discontiguous blocks in a file. Usually it is desirable to treat arrays 55 /// as contiguous blocks of memory, but doing so with large PDB files, for 56 /// example, could mean allocating huge amounts of memory just to allow 57 /// re-ordering of stream data to be contiguous before iterating over it. By 58 /// abstracting this out, we need not duplicate this memory, and we can 59 /// iterate over arrays in arbitrarily formatted streams. Elements are parsed 60 /// lazily on iteration, so there is no upfront cost associated with building 61 /// or copying a VarStreamArray, no matter how large it may be. 62 /// 63 /// You create a VarStreamArray by specifying a ValueType and an Extractor type. 64 /// If you do not specify an Extractor type, you are expected to specialize 65 /// VarStreamArrayExtractor<T> for your ValueType. 66 /// 67 /// By default an Extractor is default constructed in the class, but in some 68 /// cases you might find it useful for an Extractor to maintain state across 69 /// extractions. In this case you can provide your own Extractor through a 70 /// secondary constructor. The following examples show various ways of 71 /// creating a VarStreamArray. 72 /// 73 /// // Will use VarStreamArrayExtractor<MyType> as the extractor. 74 /// VarStreamArray<MyType> MyTypeArray; 75 /// 76 /// // Will use a default-constructed MyExtractor as the extractor. 77 /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; 78 /// 79 /// // Will use the specific instance of MyExtractor provided. 80 /// // MyExtractor need not be default-constructible in this case. 81 /// MyExtractor E(SomeContext); 82 /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); 83 /// 84 85 template <typename ValueType, typename Extractor> class VarStreamArrayIterator; 86 87 template <typename ValueType, 88 typename Extractor = VarStreamArrayExtractor<ValueType>> 89 class VarStreamArray { 90 friend class VarStreamArrayIterator<ValueType, Extractor>; 91 92 public: 93 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; 94 95 VarStreamArray() = default; 96 97 explicit VarStreamArray(const Extractor &E) : E(E) {} 98 99 explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {} 100 101 VarStreamArray(BinaryStreamRef Stream, const Extractor &E) 102 : Stream(Stream), E(E) {} 103 104 Iterator begin(bool *HadError = nullptr) const { 105 return Iterator(*this, E, HadError); 106 } 107 108 bool valid() const { return Stream.valid(); } 109 110 Iterator end() const { return Iterator(E); } 111 112 bool empty() const { return Stream.getLength() == 0; } 113 114 /// \brief given an offset into the array's underlying stream, return an 115 /// iterator to the record at that offset. This is considered unsafe 116 /// since the behavior is undefined if \p Offset does not refer to the 117 /// beginning of a valid record. 118 Iterator at(uint32_t Offset) const { 119 return Iterator(*this, E, Offset, nullptr); 120 } 121 122 const Extractor &getExtractor() const { return E; } 123 Extractor &getExtractor() { return E; } 124 125 BinaryStreamRef getUnderlyingStream() const { return Stream; } 126 void setUnderlyingStream(BinaryStreamRef S) { Stream = S; } 127 128 private: 129 BinaryStreamRef Stream; 130 Extractor E; 131 }; 132 133 template <typename ValueType, typename Extractor> 134 class VarStreamArrayIterator 135 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, 136 std::forward_iterator_tag, ValueType> { 137 typedef VarStreamArrayIterator<ValueType, Extractor> IterType; 138 typedef VarStreamArray<ValueType, Extractor> ArrayType; 139 140 public: 141 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, 142 bool *HadError) 143 : VarStreamArrayIterator(Array, E, 0, HadError) {} 144 145 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, 146 uint32_t Offset, bool *HadError) 147 : IterRef(Array.Stream.drop_front(Offset)), Extract(E), 148 Array(&Array), AbsOffset(Offset), HadError(HadError) { 149 if (IterRef.getLength() == 0) 150 moveToEnd(); 151 else { 152 auto EC = Extract(IterRef, ThisLen, ThisValue); 153 if (EC) { 154 consumeError(std::move(EC)); 155 markError(); 156 } 157 } 158 } 159 160 VarStreamArrayIterator() = default; 161 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} 162 ~VarStreamArrayIterator() = default; 163 164 bool operator==(const IterType &R) const { 165 if (Array && R.Array) { 166 // Both have a valid array, make sure they're same. 167 assert(Array == R.Array); 168 return IterRef == R.IterRef; 169 } 170 171 // Both iterators are at the end. 172 if (!Array && !R.Array) 173 return true; 174 175 // One is not at the end and one is. 176 return false; 177 } 178 179 const ValueType &operator*() const { 180 assert(Array && !HasError); 181 return ThisValue; 182 } 183 184 ValueType &operator*() { 185 assert(Array && !HasError); 186 return ThisValue; 187 } 188 189 IterType &operator+=(unsigned N) { 190 for (unsigned I = 0; I < N; ++I) { 191 // We are done with the current record, discard it so that we are 192 // positioned at the next record. 193 AbsOffset += ThisLen; 194 IterRef = IterRef.drop_front(ThisLen); 195 if (IterRef.getLength() == 0) { 196 // There is nothing after the current record, we must make this an end 197 // iterator. 198 moveToEnd(); 199 } else { 200 // There is some data after the current record. 201 auto EC = Extract(IterRef, ThisLen, ThisValue); 202 if (EC) { 203 consumeError(std::move(EC)); 204 markError(); 205 } else if (ThisLen == 0) { 206 // An empty record? Make this an end iterator. 207 moveToEnd(); 208 } 209 } 210 } 211 return *this; 212 } 213 214 uint32_t offset() const { return AbsOffset; } 215 uint32_t getRecordLength() const { return ThisLen; } 216 217 private: 218 void moveToEnd() { 219 Array = nullptr; 220 ThisLen = 0; 221 } 222 void markError() { 223 moveToEnd(); 224 HasError = true; 225 if (HadError != nullptr) 226 *HadError = true; 227 } 228 229 ValueType ThisValue; 230 BinaryStreamRef IterRef; 231 Extractor Extract; 232 const ArrayType *Array{nullptr}; 233 uint32_t ThisLen{0}; 234 uint32_t AbsOffset{0}; 235 bool HasError{false}; 236 bool *HadError{nullptr}; 237 }; 238 239 template <typename T> class FixedStreamArrayIterator; 240 241 /// FixedStreamArray is similar to VarStreamArray, except with each record 242 /// having a fixed-length. As with VarStreamArray, there is no upfront 243 /// cost associated with building or copying a FixedStreamArray, as the 244 /// memory for each element is not read from the backing stream until that 245 /// element is iterated. 246 template <typename T> class FixedStreamArray { 247 friend class FixedStreamArrayIterator<T>; 248 249 public: 250 typedef FixedStreamArrayIterator<T> Iterator; 251 252 FixedStreamArray() = default; 253 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { 254 assert(Stream.getLength() % sizeof(T) == 0); 255 } 256 257 bool operator==(const FixedStreamArray<T> &Other) const { 258 return Stream == Other.Stream; 259 } 260 261 bool operator!=(const FixedStreamArray<T> &Other) const { 262 return !(*this == Other); 263 } 264 265 FixedStreamArray &operator=(const FixedStreamArray &) = default; 266 267 const T &operator[](uint32_t Index) const { 268 assert(Index < size()); 269 uint32_t Off = Index * sizeof(T); 270 ArrayRef<uint8_t> Data; 271 if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { 272 assert(false && "Unexpected failure reading from stream"); 273 // This should never happen since we asserted that the stream length was 274 // an exact multiple of the element size. 275 consumeError(std::move(EC)); 276 } 277 assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0); 278 return *reinterpret_cast<const T *>(Data.data()); 279 } 280 281 uint32_t size() const { return Stream.getLength() / sizeof(T); } 282 283 bool empty() const { return size() == 0; } 284 285 FixedStreamArrayIterator<T> begin() const { 286 return FixedStreamArrayIterator<T>(*this, 0); 287 } 288 289 FixedStreamArrayIterator<T> end() const { 290 return FixedStreamArrayIterator<T>(*this, size()); 291 } 292 293 const T &front() const { return *begin(); } 294 const T &back() const { 295 FixedStreamArrayIterator<T> I = end(); 296 return *(--I); 297 } 298 299 BinaryStreamRef getUnderlyingStream() const { return Stream; } 300 301 private: 302 BinaryStreamRef Stream; 303 }; 304 305 template <typename T> 306 class FixedStreamArrayIterator 307 : public iterator_facade_base<FixedStreamArrayIterator<T>, 308 std::random_access_iterator_tag, const T> { 309 310 public: 311 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) 312 : Array(Array), Index(Index) {} 313 314 FixedStreamArrayIterator<T> & 315 operator=(const FixedStreamArrayIterator<T> &Other) { 316 Array = Other.Array; 317 Index = Other.Index; 318 return *this; 319 } 320 321 const T &operator*() const { return Array[Index]; } 322 const T &operator*() { return Array[Index]; } 323 324 bool operator==(const FixedStreamArrayIterator<T> &R) const { 325 assert(Array == R.Array); 326 return (Index == R.Index) && (Array == R.Array); 327 } 328 329 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { 330 Index += N; 331 return *this; 332 } 333 334 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { 335 assert(std::ptrdiff_t(Index) >= N); 336 Index -= N; 337 return *this; 338 } 339 340 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { 341 assert(Array == R.Array); 342 assert(Index >= R.Index); 343 return Index - R.Index; 344 } 345 346 bool operator<(const FixedStreamArrayIterator<T> &RHS) const { 347 assert(Array == RHS.Array); 348 return Index < RHS.Index; 349 } 350 351 private: 352 FixedStreamArray<T> Array; 353 uint32_t Index; 354 }; 355 356 } // namespace llvm 357 358 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H 359