1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 11 #include "llvm/ADT/SmallString.h" 12 #include "llvm/ADT/StringExtras.h" 13 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" 14 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" 15 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 16 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 17 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 18 #include "llvm/Support/Error.h" 19 20 using namespace llvm; 21 using namespace llvm::codeview; 22 23 static inline size_t slotForIndex(TypeIndex Idx) { 24 assert(!Idx.isSimple() && "simple type indices have no slots"); 25 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex; 26 } 27 28 namespace { 29 30 /// Implementation of CodeView type stream merging. 31 /// 32 /// A CodeView type stream is a series of records that reference each other 33 /// through type indices. A type index is either "simple", meaning it is less 34 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it 35 /// refers to a prior type record in the current stream. The type index of a 36 /// record is equal to the number of records before it in the stream plus 37 /// 0x1000. 38 /// 39 /// Type records are only allowed to use type indices smaller than their own, so 40 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in 41 /// the type graph of the source program are resolved with forward declarations 42 /// of composite types. This class implements the following type stream merging 43 /// algorithm, which relies on this DAG structure: 44 /// 45 /// - Begin with a new empty stream, and a new empty hash table that maps from 46 /// type record contents to new type index. 47 /// - For each new type stream, maintain a map from source type index to 48 /// destination type index. 49 /// - For each record, copy it and rewrite its type indices to be valid in the 50 /// destination type stream. 51 /// - If the new type record is not already present in the destination stream 52 /// hash table, append it to the destination type stream, assign it the next 53 /// type index, and update the two hash tables. 54 /// - If the type record already exists in the destination stream, discard it 55 /// and update the type index map to forward the source type index to the 56 /// existing destination type index. 57 /// 58 /// As an additional complication, type stream merging actually produces two 59 /// streams: an item (or IPI) stream and a type stream, as this is what is 60 /// actually stored in the final PDB. We choose which records go where by 61 /// looking at the record kind. 62 class TypeStreamMerger { 63 public: 64 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest) 65 : IndexMap(SourceToDest) { 66 SourceToDest.clear(); 67 } 68 69 static const TypeIndex Untranslated; 70 71 // Local hashing entry points 72 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, 73 MergingTypeTableBuilder &DestTypes, 74 const CVTypeArray &IdsAndTypes); 75 Error mergeIdRecords(MergingTypeTableBuilder &Dest, 76 ArrayRef<TypeIndex> TypeSourceToDest, 77 const CVTypeArray &Ids); 78 Error mergeTypeRecords(MergingTypeTableBuilder &Dest, 79 const CVTypeArray &Types); 80 81 // Global hashing entry points 82 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, 83 GlobalTypeTableBuilder &DestTypes, 84 const CVTypeArray &IdsAndTypes, 85 ArrayRef<GloballyHashedType> Hashes); 86 Error mergeIdRecords(GlobalTypeTableBuilder &Dest, 87 ArrayRef<TypeIndex> TypeSourceToDest, 88 const CVTypeArray &Ids, 89 ArrayRef<GloballyHashedType> Hashes); 90 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, 91 ArrayRef<GloballyHashedType> Hashes); 92 93 private: 94 Error doit(const CVTypeArray &Types); 95 96 Error remapAllTypes(const CVTypeArray &Types); 97 98 Error remapType(const CVType &Type); 99 100 void addMapping(TypeIndex Idx); 101 102 inline bool remapTypeIndex(TypeIndex &Idx) { 103 // If we're mapping a pure index stream, then IndexMap only contains 104 // mappings from OldIdStream -> NewIdStream, in which case we will need to 105 // use the special mapping from OldTypeStream -> NewTypeStream which was 106 // computed externally. Regardless, we use this special map if and only if 107 // we are doing an id-only mapping. 108 if (!hasTypeStream()) 109 return remapIndex(Idx, TypeLookup); 110 111 assert(TypeLookup.empty()); 112 return remapIndex(Idx, IndexMap); 113 } 114 inline bool remapItemIndex(TypeIndex &Idx) { 115 assert(hasIdStream()); 116 return remapIndex(Idx, IndexMap); 117 } 118 119 bool hasTypeStream() const { 120 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); 121 } 122 123 bool hasIdStream() const { 124 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); 125 } 126 127 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType, 128 MutableArrayRef<uint8_t> Storage); 129 130 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) { 131 if (LLVM_LIKELY(remapIndexSimple(Idx, Map))) 132 return true; 133 134 return remapIndexFallback(Idx, Map); 135 } 136 137 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const { 138 // Simple types are unchanged. 139 if (Idx.isSimple()) 140 return true; 141 142 // Check if this type index refers to a record we've already translated 143 // successfully. If it refers to a type later in the stream or a record we 144 // had to defer, defer it until later pass. 145 unsigned MapPos = slotForIndex(Idx); 146 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated)) 147 return false; 148 149 Idx = Map[MapPos]; 150 return true; 151 } 152 153 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map); 154 155 Error errorCorruptRecord() const { 156 return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record); 157 } 158 159 Optional<Error> LastError; 160 161 bool UseGlobalHashes = false; 162 163 bool IsSecondPass = false; 164 165 unsigned NumBadIndices = 0; 166 167 TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; 168 169 MergingTypeTableBuilder *DestIdStream = nullptr; 170 MergingTypeTableBuilder *DestTypeStream = nullptr; 171 172 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; 173 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; 174 175 ArrayRef<GloballyHashedType> GlobalHashes; 176 177 // If we're only mapping id records, this array contains the mapping for 178 // type records. 179 ArrayRef<TypeIndex> TypeLookup; 180 181 /// Map from source type index to destination type index. Indexed by source 182 /// type index minus 0x1000. 183 SmallVectorImpl<TypeIndex> &IndexMap; 184 185 /// Temporary storage that we use to copy a record's data while re-writing 186 /// its type indices. 187 SmallVector<uint8_t, 256> RemapStorage; 188 }; 189 190 } // end anonymous namespace 191 192 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); 193 194 static bool isIdRecord(TypeLeafKind K) { 195 switch (K) { 196 case TypeLeafKind::LF_FUNC_ID: 197 case TypeLeafKind::LF_MFUNC_ID: 198 case TypeLeafKind::LF_STRING_ID: 199 case TypeLeafKind::LF_SUBSTR_LIST: 200 case TypeLeafKind::LF_BUILDINFO: 201 case TypeLeafKind::LF_UDT_SRC_LINE: 202 case TypeLeafKind::LF_UDT_MOD_SRC_LINE: 203 return true; 204 default: 205 return false; 206 } 207 } 208 209 void TypeStreamMerger::addMapping(TypeIndex Idx) { 210 if (!IsSecondPass) { 211 assert(IndexMap.size() == slotForIndex(CurIndex) && 212 "visitKnownRecord should add one index map entry"); 213 IndexMap.push_back(Idx); 214 } else { 215 assert(slotForIndex(CurIndex) < IndexMap.size()); 216 IndexMap[slotForIndex(CurIndex)] = Idx; 217 } 218 } 219 220 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, 221 ArrayRef<TypeIndex> Map) { 222 size_t MapPos = slotForIndex(Idx); 223 224 // If this is the second pass and this index isn't in the map, then it points 225 // outside the current type stream, and this is a corrupt record. 226 if (IsSecondPass && MapPos >= Map.size()) { 227 // FIXME: Print a more useful error. We can give the current record and the 228 // index that we think its pointing to. 229 if (LastError) 230 LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); 231 else 232 LastError = errorCorruptRecord(); 233 } 234 235 ++NumBadIndices; 236 237 // This type index is invalid. Remap this to "not translated by cvpack", 238 // and return failure. 239 Idx = Untranslated; 240 return false; 241 } 242 243 // Local hashing entry points 244 Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, 245 const CVTypeArray &Types) { 246 DestTypeStream = &Dest; 247 UseGlobalHashes = false; 248 249 return doit(Types); 250 } 251 252 Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, 253 ArrayRef<TypeIndex> TypeSourceToDest, 254 const CVTypeArray &Ids) { 255 DestIdStream = &Dest; 256 TypeLookup = TypeSourceToDest; 257 UseGlobalHashes = false; 258 259 return doit(Ids); 260 } 261 262 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds, 263 MergingTypeTableBuilder &DestTypes, 264 const CVTypeArray &IdsAndTypes) { 265 DestIdStream = &DestIds; 266 DestTypeStream = &DestTypes; 267 UseGlobalHashes = false; 268 return doit(IdsAndTypes); 269 } 270 271 // Global hashing entry points 272 Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest, 273 const CVTypeArray &Types, 274 ArrayRef<GloballyHashedType> Hashes) { 275 DestGlobalTypeStream = &Dest; 276 UseGlobalHashes = true; 277 GlobalHashes = Hashes; 278 279 return doit(Types); 280 } 281 282 Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, 283 ArrayRef<TypeIndex> TypeSourceToDest, 284 const CVTypeArray &Ids, 285 ArrayRef<GloballyHashedType> Hashes) { 286 DestGlobalIdStream = &Dest; 287 TypeLookup = TypeSourceToDest; 288 UseGlobalHashes = true; 289 GlobalHashes = Hashes; 290 291 return doit(Ids); 292 } 293 294 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, 295 GlobalTypeTableBuilder &DestTypes, 296 const CVTypeArray &IdsAndTypes, 297 ArrayRef<GloballyHashedType> Hashes) { 298 DestGlobalIdStream = &DestIds; 299 DestGlobalTypeStream = &DestTypes; 300 UseGlobalHashes = true; 301 GlobalHashes = Hashes; 302 return doit(IdsAndTypes); 303 } 304 305 Error TypeStreamMerger::doit(const CVTypeArray &Types) { 306 if (auto EC = remapAllTypes(Types)) 307 return EC; 308 309 // If we found bad indices but no other errors, try doing another pass and see 310 // if we can resolve the indices that weren't in the map on the first pass. 311 // This may require multiple passes, but we should always make progress. MASM 312 // is the only known CodeView producer that makes type streams that aren't 313 // topologically sorted. The standard library contains MASM-produced objects, 314 // so this is important to handle correctly, but we don't have to be too 315 // efficient. MASM type streams are usually very small. 316 while (!LastError && NumBadIndices > 0) { 317 unsigned BadIndicesRemaining = NumBadIndices; 318 IsSecondPass = true; 319 NumBadIndices = 0; 320 CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); 321 322 if (auto EC = remapAllTypes(Types)) 323 return EC; 324 325 assert(NumBadIndices <= BadIndicesRemaining && 326 "second pass found more bad indices"); 327 if (!LastError && NumBadIndices == BadIndicesRemaining) { 328 return llvm::make_error<CodeViewError>( 329 cv_error_code::corrupt_record, "input type graph contains cycles"); 330 } 331 } 332 333 if (LastError) 334 return std::move(*LastError); 335 return Error::success(); 336 } 337 338 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { 339 BinaryStreamRef Stream = Types.getUnderlyingStream(); 340 ArrayRef<uint8_t> Buffer; 341 cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); 342 343 return forEachCodeViewRecord<CVType>( 344 Buffer, [this](const CVType &T) { return remapType(T); }); 345 } 346 347 Error TypeStreamMerger::remapType(const CVType &Type) { 348 auto DoSerialize = 349 [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { 350 return remapIndices(Type, Storage); 351 }; 352 353 TypeIndex DestIdx = Untranslated; 354 if (LLVM_LIKELY(UseGlobalHashes)) { 355 GlobalTypeTableBuilder &Dest = 356 isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; 357 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; 358 DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize); 359 } else { 360 MergingTypeTableBuilder &Dest = 361 isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; 362 363 RemapStorage.resize(Type.RecordData.size()); 364 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); 365 if (!Result.empty()) 366 DestIdx = Dest.insertRecordBytes(Result); 367 } 368 addMapping(DestIdx); 369 370 ++CurIndex; 371 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && 372 "visitKnownRecord should add one index map entry"); 373 return Error::success(); 374 } 375 376 ArrayRef<uint8_t> 377 TypeStreamMerger::remapIndices(const CVType &OriginalType, 378 MutableArrayRef<uint8_t> Storage) { 379 SmallVector<TiReference, 4> Refs; 380 discoverTypeIndices(OriginalType.RecordData, Refs); 381 if (Refs.empty()) 382 return OriginalType.RecordData; 383 384 ::memcpy(Storage.data(), OriginalType.RecordData.data(), 385 OriginalType.RecordData.size()); 386 387 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix); 388 389 for (auto &Ref : Refs) { 390 TypeIndex *DestTIs = 391 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset); 392 393 for (size_t I = 0; I < Ref.Count; ++I) { 394 TypeIndex &TI = DestTIs[I]; 395 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI) 396 : remapTypeIndex(TI); 397 if (LLVM_UNLIKELY(!Success)) 398 return {}; 399 } 400 } 401 return Storage; 402 } 403 404 Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, 405 SmallVectorImpl<TypeIndex> &SourceToDest, 406 const CVTypeArray &Types) { 407 TypeStreamMerger M(SourceToDest); 408 return M.mergeTypeRecords(Dest, Types); 409 } 410 411 Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, 412 ArrayRef<TypeIndex> TypeSourceToDest, 413 SmallVectorImpl<TypeIndex> &SourceToDest, 414 const CVTypeArray &Ids) { 415 TypeStreamMerger M(SourceToDest); 416 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); 417 } 418 419 Error llvm::codeview::mergeTypeAndIdRecords( 420 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, 421 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes) { 422 TypeStreamMerger M(SourceToDest); 423 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); 424 } 425 426 Error llvm::codeview::mergeTypeAndIdRecords( 427 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, 428 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, 429 ArrayRef<GloballyHashedType> Hashes) { 430 TypeStreamMerger M(SourceToDest); 431 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes); 432 } 433 434 Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, 435 SmallVectorImpl<TypeIndex> &SourceToDest, 436 const CVTypeArray &Types, 437 ArrayRef<GloballyHashedType> Hashes) { 438 TypeStreamMerger M(SourceToDest); 439 return M.mergeTypeRecords(Dest, Types, Hashes); 440 } 441 442 Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, 443 ArrayRef<TypeIndex> Types, 444 SmallVectorImpl<TypeIndex> &SourceToDest, 445 const CVTypeArray &Ids, 446 ArrayRef<GloballyHashedType> Hashes) { 447 TypeStreamMerger M(SourceToDest); 448 return M.mergeIdRecords(Dest, Types, Ids, Hashes); 449 } 450