//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/Support/Error.h" using namespace llvm; using namespace llvm::codeview; static inline size_t slotForIndex(TypeIndex Idx) { assert(!Idx.isSimple() && "simple type indices have no slots"); return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex; } namespace { /// Implementation of CodeView type stream merging. /// /// A CodeView type stream is a series of records that reference each other /// through type indices. A type index is either "simple", meaning it is less /// than 0x1000 and refers to a builtin type, or it is complex, meaning it /// refers to a prior type record in the current stream. The type index of a /// record is equal to the number of records before it in the stream plus /// 0x1000. /// /// Type records are only allowed to use type indices smaller than their own, so /// a type stream is effectively a topologically sorted DAG. Cycles occurring in /// the type graph of the source program are resolved with forward declarations /// of composite types. This class implements the following type stream merging /// algorithm, which relies on this DAG structure: /// /// - Begin with a new empty stream, and a new empty hash table that maps from /// type record contents to new type index. /// - For each new type stream, maintain a map from source type index to /// destination type index. /// - For each record, copy it and rewrite its type indices to be valid in the /// destination type stream. /// - If the new type record is not already present in the destination stream /// hash table, append it to the destination type stream, assign it the next /// type index, and update the two hash tables. /// - If the type record already exists in the destination stream, discard it /// and update the type index map to forward the source type index to the /// existing destination type index. /// /// As an additional complication, type stream merging actually produces two /// streams: an item (or IPI) stream and a type stream, as this is what is /// actually stored in the final PDB. We choose which records go where by /// looking at the record kind. class TypeStreamMerger { public: explicit TypeStreamMerger(SmallVectorImpl &SourceToDest) : IndexMap(SourceToDest) { // When dealing with precompiled headers objects, all data in SourceToDest // belongs to the precompiled headers object, and is assumed to be already // remapped to the target PDB. Any forthcoming type that will be merged in // might potentially back-reference this data. We also don't want to resolve // twice the types in the precompiled object. CurIndex += SourceToDest.size(); } static const TypeIndex Untranslated; // Local hashing entry points Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes, Optional &S); Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids); Error mergeTypeRecords(MergingTypeTableBuilder &Dest, const CVTypeArray &Types); // Global hashing entry points Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes, ArrayRef Hashes, Optional &S); Error mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids, ArrayRef Hashes); Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, ArrayRef Hashes, Optional &S); private: Error doit(const CVTypeArray &Types); Error remapAllTypes(const CVTypeArray &Types); Error remapType(const CVType &Type); void addMapping(TypeIndex Idx); inline bool remapTypeIndex(TypeIndex &Idx) { // If we're mapping a pure index stream, then IndexMap only contains // mappings from OldIdStream -> NewIdStream, in which case we will need to // use the special mapping from OldTypeStream -> NewTypeStream which was // computed externally. Regardless, we use this special map if and only if // we are doing an id-only mapping. if (!hasTypeStream()) return remapIndex(Idx, TypeLookup); assert(TypeLookup.empty()); return remapIndex(Idx, IndexMap); } inline bool remapItemIndex(TypeIndex &Idx) { assert(hasIdStream()); return remapIndex(Idx, IndexMap); } bool hasTypeStream() const { return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); } bool hasIdStream() const { return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); } ArrayRef remapIndices(const CVType &OriginalType, MutableArrayRef Storage); inline bool remapIndex(TypeIndex &Idx, ArrayRef Map) { if (LLVM_LIKELY(remapIndexSimple(Idx, Map))) return true; return remapIndexFallback(Idx, Map); } inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef Map) const { // Simple types are unchanged. if (Idx.isSimple()) return true; // Check if this type index refers to a record we've already translated // successfully. If it refers to a type later in the stream or a record we // had to defer, defer it until later pass. unsigned MapPos = slotForIndex(Idx); if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated)) return false; Idx = Map[MapPos]; return true; } bool remapIndexFallback(TypeIndex &Idx, ArrayRef Map); Error errorCorruptRecord() const { return llvm::make_error(cv_error_code::corrupt_record); } Expected shouldRemapType(const CVType &Type); Optional LastError; bool UseGlobalHashes = false; bool IsSecondPass = false; unsigned NumBadIndices = 0; TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; MergingTypeTableBuilder *DestIdStream = nullptr; MergingTypeTableBuilder *DestTypeStream = nullptr; GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; ArrayRef GlobalHashes; // If we're only mapping id records, this array contains the mapping for // type records. ArrayRef TypeLookup; /// Map from source type index to destination type index. Indexed by source /// type index minus 0x1000. SmallVectorImpl &IndexMap; /// Temporary storage that we use to copy a record's data while re-writing /// its type indices. SmallVector RemapStorage; Optional PCHSignature; }; } // end anonymous namespace const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); void TypeStreamMerger::addMapping(TypeIndex Idx) { if (!IsSecondPass) { assert(IndexMap.size() == slotForIndex(CurIndex) && "visitKnownRecord should add one index map entry"); IndexMap.push_back(Idx); } else { assert(slotForIndex(CurIndex) < IndexMap.size()); IndexMap[slotForIndex(CurIndex)] = Idx; } } bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, ArrayRef Map) { size_t MapPos = slotForIndex(Idx); // If this is the second pass and this index isn't in the map, then it points // outside the current type stream, and this is a corrupt record. if (IsSecondPass && MapPos >= Map.size()) { // FIXME: Print a more useful error. We can give the current record and the // index that we think its pointing to. if (LastError) LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); else LastError = errorCorruptRecord(); } ++NumBadIndices; // This type index is invalid. Remap this to "not translated by cvpack", // and return failure. Idx = Untranslated; return false; } // Local hashing entry points Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, const CVTypeArray &Types) { DestTypeStream = &Dest; UseGlobalHashes = false; return doit(Types); } Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids) { DestIdStream = &Dest; TypeLookup = TypeSourceToDest; UseGlobalHashes = false; return doit(Ids); } Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes, Optional &S) { DestIdStream = &DestIds; DestTypeStream = &DestTypes; UseGlobalHashes = false; auto Err = doit(IdsAndTypes); S = PCHSignature; return Err; } // Global hashing entry points Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, ArrayRef Hashes, Optional &S) { DestGlobalTypeStream = &Dest; UseGlobalHashes = true; GlobalHashes = Hashes; auto Err = doit(Types); S = PCHSignature; return Err; } Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids, ArrayRef Hashes) { DestGlobalIdStream = &Dest; TypeLookup = TypeSourceToDest; UseGlobalHashes = true; GlobalHashes = Hashes; return doit(Ids); } Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes, ArrayRef Hashes, Optional &S) { DestGlobalIdStream = &DestIds; DestGlobalTypeStream = &DestTypes; UseGlobalHashes = true; GlobalHashes = Hashes; auto Err = doit(IdsAndTypes); S = PCHSignature; return Err; } Error TypeStreamMerger::doit(const CVTypeArray &Types) { if (auto EC = remapAllTypes(Types)) return EC; // If we found bad indices but no other errors, try doing another pass and see // if we can resolve the indices that weren't in the map on the first pass. // This may require multiple passes, but we should always make progress. MASM // is the only known CodeView producer that makes type streams that aren't // topologically sorted. The standard library contains MASM-produced objects, // so this is important to handle correctly, but we don't have to be too // efficient. MASM type streams are usually very small. while (!LastError && NumBadIndices > 0) { unsigned BadIndicesRemaining = NumBadIndices; IsSecondPass = true; NumBadIndices = 0; CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); if (auto EC = remapAllTypes(Types)) return EC; assert(NumBadIndices <= BadIndicesRemaining && "second pass found more bad indices"); if (!LastError && NumBadIndices == BadIndicesRemaining) { return llvm::make_error( cv_error_code::corrupt_record, "Input type graph contains cycles"); } } if (LastError) return std::move(*LastError); return Error::success(); } Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { BinaryStreamRef Stream = Types.getUnderlyingStream(); ArrayRef Buffer; cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); return forEachCodeViewRecord( Buffer, [this](const CVType &T) { return remapType(T); }); } Error TypeStreamMerger::remapType(const CVType &Type) { auto R = shouldRemapType(Type); if (!R) return R.takeError(); TypeIndex DestIdx = Untranslated; if (*R) { auto DoSerialize = [this, Type](MutableArrayRef Storage) -> ArrayRef { return remapIndices(Type, Storage); }; unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); if (LLVM_LIKELY(UseGlobalHashes)) { GlobalTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); } else { MergingTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; RemapStorage.resize(AlignedSize); ArrayRef Result = DoSerialize(RemapStorage); if (!Result.empty()) DestIdx = Dest.insertRecordBytes(Result); } } addMapping(DestIdx); ++CurIndex; assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && "visitKnownRecord should add one index map entry"); return Error::success(); } ArrayRef TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef Storage) { unsigned Align = OriginalType.RecordData.size() & 3; assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) && "The storage buffer size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); SmallVector Refs; discoverTypeIndices(OriginalType.RecordData, Refs); if (Refs.empty() && Align == 0) return OriginalType.RecordData; ::memcpy(Storage.data(), OriginalType.RecordData.data(), OriginalType.RecordData.size()); uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix); for (auto &Ref : Refs) { TypeIndex *DestTIs = reinterpret_cast(DestContent + Ref.Offset); for (size_t I = 0; I < Ref.Count; ++I) { TypeIndex &TI = DestTIs[I]; bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI) : remapTypeIndex(TI); if (LLVM_UNLIKELY(!Success)) return {}; } } if (Align > 0) { RecordPrefix *StorageHeader = reinterpret_cast(Storage.data()); StorageHeader->RecordLen += 4 - Align; DestContent = Storage.data() + OriginalType.RecordData.size(); for (; Align < 4; ++Align) *DestContent++ = LF_PAD4 - Align; } return Storage; } Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, const CVTypeArray &Types) { TypeStreamMerger M(SourceToDest); return M.mergeTypeRecords(Dest, Types); } Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, SmallVectorImpl &SourceToDest, const CVTypeArray &Ids) { TypeStreamMerger M(SourceToDest); return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); } Error llvm::codeview::mergeTypeAndIdRecords( MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes, Optional &PCHSignature) { TypeStreamMerger M(SourceToDest); return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature); } Error llvm::codeview::mergeTypeAndIdRecords( GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes, ArrayRef Hashes, Optional &PCHSignature) { TypeStreamMerger M(SourceToDest); return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHSignature); } Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, const CVTypeArray &Types, ArrayRef Hashes, Optional &PCHSignature) { TypeStreamMerger M(SourceToDest); return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature); } Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef Types, SmallVectorImpl &SourceToDest, const CVTypeArray &Ids, ArrayRef Hashes) { TypeStreamMerger M(SourceToDest); return M.mergeIdRecords(Dest, Types, Ids, Hashes); } Expected TypeStreamMerger::shouldRemapType(const CVType &Type) { // For object files containing precompiled types, we need to extract the // signature, through EndPrecompRecord. This is done here for performance // reasons, to avoid re-parsing the Types stream. if (Type.kind() == LF_ENDPRECOMP) { EndPrecompRecord EP; if (auto EC = TypeDeserializer::deserializeAs(const_cast(Type), EP)) return joinErrors(std::move(EC), errorCorruptRecord()); if (PCHSignature.hasValue()) return errorCorruptRecord(); PCHSignature.emplace(EP.getSignature()); return false; } return true; }