1 //===- TypeSerialzier.cpp ---------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/DebugInfo/CodeView/TypeSerializer.h"
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/Support/BinaryStreamWriter.h"
18 using namespace llvm::codeview;
24 unsigned Size; // FIXME: Go to uint16_t?
28 /// Wrapper around a poitner to a HashedType. Hash and equality operations are
29 /// based on data in the pointee.
30 struct HashedTypePtr {
31 HashedTypePtr() = default;
32 HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
33 HashedType *Ptr = nullptr;
38 template <> struct DenseMapInfo<HashedTypePtr> {
39 static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
40 static inline HashedTypePtr getTombstoneKey() {
41 return HashedTypePtr(reinterpret_cast<HashedType *>(1));
43 static unsigned getHashValue(HashedTypePtr Val) {
44 assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
47 static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
48 HashedType *LHS = LHSP.Ptr;
49 HashedType *RHS = RHSP.Ptr;
50 if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr)
52 if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size)
54 return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
59 /// Private implementation so that we don't leak our DenseMap instantiations to
61 class llvm::codeview::TypeHasher {
63 /// Storage for type record provided by the caller. Records will outlive the
64 /// hasher object, so they should be allocated here.
65 BumpPtrAllocator &RecordStorage;
67 /// Storage for hash keys. These only need to live as long as the hashing
69 BumpPtrAllocator KeyStorage;
71 /// Hash table. We really want a DenseMap<ArrayRef<uint8_t>, TypeIndex> here,
72 /// but DenseMap is inefficient when the keys are long (like type records)
73 /// because it recomputes the hash value of every key when it grows. This
74 /// value type stores the hash out of line in KeyStorage, so that table
75 /// entries are small and easy to rehash.
76 DenseSet<HashedTypePtr> HashedRecords;
79 TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {}
81 void reset() { HashedRecords.clear(); }
83 /// Takes the bytes of type record, inserts them into the hash table, saves
84 /// them, and returns a pointer to an identical stable type record along with
85 /// its type index in the destination stream.
86 TypeIndex getOrCreateRecord(ArrayRef<uint8_t> &Record, TypeIndex TI);
89 TypeIndex TypeHasher::getOrCreateRecord(ArrayRef<uint8_t> &Record,
91 assert(Record.size() < UINT32_MAX && "Record too big");
92 assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
94 // Compute the hash up front so we can store it in the key.
95 HashedType TempHashedType = {hash_value(Record), Record.data(),
96 unsigned(Record.size()), TI};
97 auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType));
98 HashedType *&Hashed = Result.first->Ptr;
101 // This was a new type record. We need stable storage for both the key and
102 // the record. The record should outlive the hashing operation.
103 Hashed = KeyStorage.Allocate<HashedType>();
104 *Hashed = TempHashedType;
106 uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
107 memcpy(Stable, Record.data(), Record.size());
108 Hashed->Data = Stable;
109 assert(Hashed->Size == Record.size());
112 // Update the caller's copy of Record to point a stable copy.
113 Record = ArrayRef<uint8_t>(Hashed->Data, Hashed->Size);
114 return Hashed->Index;
117 TypeIndex TypeSerializer::nextTypeIndex() const {
118 return TypeIndex::fromArrayIndex(SeenRecords.size());
121 bool TypeSerializer::isInFieldList() const {
122 return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST;
125 MutableArrayRef<uint8_t> TypeSerializer::getCurrentSubRecordData() {
126 assert(isInFieldList());
127 return getCurrentRecordData().drop_front(CurrentSegment.length());
130 MutableArrayRef<uint8_t> TypeSerializer::getCurrentRecordData() {
131 return MutableArrayRef<uint8_t>(RecordBuffer).take_front(Writer.getOffset());
134 Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) {
136 Prefix.RecordKind = Kind;
137 Prefix.RecordLen = 0;
138 if (auto EC = Writer.writeObject(Prefix))
140 return Error::success();
143 Expected<MutableArrayRef<uint8_t>>
144 TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
145 uint32_t Align = Record.size() % 4;
149 int PaddingBytes = 4 - Align;
150 int N = PaddingBytes;
151 while (PaddingBytes > 0) {
152 uint8_t Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
153 if (auto EC = Writer.writeInteger(Pad))
154 return std::move(EC);
157 return MutableArrayRef<uint8_t>(Record.data(), Record.size() + N);
160 TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash)
161 : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2),
162 Stream(RecordBuffer, llvm::support::little), Writer(Stream),
164 // RecordBuffer needs to be able to hold enough data so that if we are 1
165 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
166 // we won't overflow.
168 Hasher = make_unique<TypeHasher>(Storage);
171 TypeSerializer::~TypeSerializer() = default;
173 ArrayRef<ArrayRef<uint8_t>> TypeSerializer::records() const {
177 void TypeSerializer::reset() {
181 CurrentSegment = RecordSegment();
182 FieldListSegments.clear();
188 TypeIndex TypeSerializer::insertRecordBytes(ArrayRef<uint8_t> &Record) {
189 assert(!TypeKind.hasValue() && "Already in a type mapping!");
190 assert(Writer.getOffset() == 0 && "Stream has data already!");
193 TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex());
194 if (nextTypeIndex() == ActualTI)
195 SeenRecords.push_back(Record);
199 TypeIndex NewTI = nextTypeIndex();
200 uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
201 memcpy(Stable, Record.data(), Record.size());
202 Record = ArrayRef<uint8_t>(Stable, Record.size());
203 SeenRecords.push_back(Record);
207 TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) {
208 assert(!TypeKind.hasValue() && "Already in a type mapping!");
209 assert(Writer.getOffset() == 0 && "Stream has data already!");
212 ArrayRef<uint8_t> OriginalData = Record.OriginalRecord.RecordData;
213 if (Record.Mappings.empty()) {
214 // This record did not remap any type indices. Just write it.
215 return insertRecordBytes(OriginalData);
218 // At least one type index was remapped. Before we can hash it we have to
219 // copy the full record bytes, re-write each type index, then hash the copy.
220 // We do this in temporary storage since only the DenseMap can decide whether
221 // this record already exists, and if it does we don't want the memory to
223 RemapStorage.resize(OriginalData.size());
224 ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size());
225 uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix);
226 for (const auto &M : Record.Mappings) {
227 // First 4 bytes of every record are the record prefix, but the mapping
228 // offset is relative to the content which starts after.
229 *(TypeIndex *)(ContentBegin + M.first) = M.second;
231 auto RemapRef = makeArrayRef(RemapStorage);
232 return insertRecordBytes(RemapRef);
235 Error TypeSerializer::visitTypeBegin(CVType &Record) {
236 assert(!TypeKind.hasValue() && "Already in a type mapping!");
237 assert(Writer.getOffset() == 0 && "Stream has data already!");
239 if (auto EC = writeRecordPrefix(Record.kind()))
242 TypeKind = Record.kind();
243 if (auto EC = Mapping.visitTypeBegin(Record))
246 return Error::success();
249 Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
250 assert(TypeKind.hasValue() && "Not in a type mapping!");
251 if (auto EC = Mapping.visitTypeEnd(Record))
252 return std::move(EC);
254 // Update the record's length and fill out the CVType members to point to
255 // the stable memory holding the record's data.
256 auto ThisRecordData = getCurrentRecordData();
257 auto ExpectedData = addPadding(ThisRecordData);
259 return ExpectedData.takeError();
260 ThisRecordData = *ExpectedData;
262 RecordPrefix *Prefix =
263 reinterpret_cast<RecordPrefix *>(ThisRecordData.data());
264 Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t);
266 Record.Type = *TypeKind;
267 Record.RecordData = ThisRecordData;
269 // insertRecordBytes assumes we're not in a mapping, so do this first.
273 TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData);
275 // Write out each additional segment in reverse order, and update each
276 // record's continuation index to point to the previous one.
277 for (auto X : reverse(FieldListSegments)) {
278 auto CIBytes = X.take_back(sizeof(uint32_t));
279 support::ulittle32_t *CI =
280 reinterpret_cast<support::ulittle32_t *>(CIBytes.data());
281 assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder");
282 *CI = InsertedTypeIndex.getIndex();
283 InsertedTypeIndex = insertRecordBytes(X);
286 FieldListSegments.clear();
287 CurrentSegment.SubRecords.clear();
289 return InsertedTypeIndex;
292 Error TypeSerializer::visitTypeEnd(CVType &Record) {
293 auto ExpectedIndex = visitTypeEndGetIndex(Record);
295 return ExpectedIndex.takeError();
296 return Error::success();
299 Error TypeSerializer::visitMemberBegin(CVMemberRecord &Record) {
300 assert(isInFieldList() && "Not in a field list!");
301 assert(!MemberKind.hasValue() && "Already in a member record!");
302 MemberKind = Record.Kind;
304 if (auto EC = Mapping.visitMemberBegin(Record))
307 return Error::success();
310 Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
311 if (auto EC = Mapping.visitMemberEnd(Record))
314 // Check if this subrecord makes the current segment not fit in 64K minus
315 // the space for a continuation record (8 bytes). If the segment does not
316 // fit, insert a continuation record.
317 if (Writer.getOffset() > MaxRecordLength - ContinuationLength) {
318 MutableArrayRef<uint8_t> Data = getCurrentRecordData();
319 SubRecord LastSubRecord = CurrentSegment.SubRecords.back();
320 uint32_t CopySize = CurrentSegment.length() - LastSubRecord.Size;
321 auto CopyData = Data.take_front(CopySize);
322 auto LeftOverData = Data.drop_front(CopySize);
323 assert(LastSubRecord.Size == LeftOverData.size());
325 // Allocate stable storage for the record and copy the old record plus
326 // continuation over.
327 uint16_t LengthWithSize = CopySize + ContinuationLength;
328 assert(LengthWithSize <= MaxRecordLength);
329 RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(CopyData.data());
330 Prefix->RecordLen = LengthWithSize - sizeof(uint16_t);
332 uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
333 auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
334 MutableBinaryByteStream CS(SavedSegment, llvm::support::little);
335 BinaryStreamWriter CW(CS);
336 if (auto EC = CW.writeBytes(CopyData))
338 if (auto EC = CW.writeEnum(TypeLeafKind::LF_INDEX))
340 if (auto EC = CW.writeInteger<uint16_t>(0))
342 if (auto EC = CW.writeInteger<uint32_t>(0xB0C0B0C0))
344 FieldListSegments.push_back(SavedSegment);
346 // Write a new placeholder record prefix to mark the start of this new
349 if (auto EC = writeRecordPrefix(TypeLeafKind::LF_FIELDLIST))
352 // Then move over the subrecord that overflowed the old segment to the
353 // beginning of this segment. Note that we have to use memmove here
354 // instead of Writer.writeBytes(), because the new and old locations
356 ::memmove(Stream.data().data() + sizeof(RecordPrefix), LeftOverData.data(),
357 LeftOverData.size());
358 // And point the segment writer at the end of that subrecord.
359 Writer.setOffset(LeftOverData.size() + sizeof(RecordPrefix));
361 CurrentSegment.SubRecords.clear();
362 CurrentSegment.SubRecords.push_back(LastSubRecord);
365 // Update the CVMemberRecord since we may have shifted around or gotten
367 Record.Data = getCurrentSubRecordData();
370 return Error::success();