1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file
11 /// provides two different array implementations.
13 /// VarStreamArray - Arrays of variable length records. The user specifies
14 /// an Extractor type that can extract a record from a given offset and
15 /// return the number of bytes consumed by the record.
17 /// FixedStreamArray - Arrays of fixed length records. This is similar in
18 /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
19 /// elements of the array need not be laid out in contiguous memory.
22 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
23 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/iterator.h"
27 #include "llvm/Support/Alignment.h"
28 #include "llvm/Support/BinaryStreamRef.h"
29 #include "llvm/Support/Error.h"
35 /// VarStreamArrayExtractor is intended to be specialized to provide customized
36 /// extraction logic. On input it receives a BinaryStreamRef pointing to the
37 /// beginning of the next record, but where the length of the record is not yet
38 /// known. Upon completion, it should return an appropriate Error instance if
39 /// a record could not be extracted, or if one could be extracted it should
40 /// return success and set Len to the number of bytes this record occupied in
41 /// the underlying stream, and it should fill out the fields of the value type
42 /// Item appropriately to represent the current record.
44 /// You can specialize this template for your own custom value types to avoid
45 /// having to specify a second template argument to VarStreamArray (documented
47 template <typename T> struct VarStreamArrayExtractor {
48 // Method intentionally deleted. You must provide an explicit specialization
49 // with the following method implemented.
50 Error operator()(BinaryStreamRef Stream, uint32_t &Len,
51 T &Item) const = delete;
54 /// VarStreamArray represents an array of variable length records backed by a
55 /// stream. This could be a contiguous sequence of bytes in memory, it could
56 /// be a file on disk, or it could be a PDB stream where bytes are stored as
57 /// discontiguous blocks in a file. Usually it is desirable to treat arrays
58 /// as contiguous blocks of memory, but doing so with large PDB files, for
59 /// example, could mean allocating huge amounts of memory just to allow
60 /// re-ordering of stream data to be contiguous before iterating over it. By
61 /// abstracting this out, we need not duplicate this memory, and we can
62 /// iterate over arrays in arbitrarily formatted streams. Elements are parsed
63 /// lazily on iteration, so there is no upfront cost associated with building
64 /// or copying a VarStreamArray, no matter how large it may be.
66 /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
67 /// If you do not specify an Extractor type, you are expected to specialize
68 /// VarStreamArrayExtractor<T> for your ValueType.
70 /// By default an Extractor is default constructed in the class, but in some
71 /// cases you might find it useful for an Extractor to maintain state across
72 /// extractions. In this case you can provide your own Extractor through a
73 /// secondary constructor. The following examples show various ways of
74 /// creating a VarStreamArray.
76 /// // Will use VarStreamArrayExtractor<MyType> as the extractor.
77 /// VarStreamArray<MyType> MyTypeArray;
79 /// // Will use a default-constructed MyExtractor as the extractor.
80 /// VarStreamArray<MyType, MyExtractor> MyTypeArray2;
82 /// // Will use the specific instance of MyExtractor provided.
83 /// // MyExtractor need not be default-constructible in this case.
84 /// MyExtractor E(SomeContext);
85 /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
88 template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
90 template <typename ValueType,
91 typename Extractor = VarStreamArrayExtractor<ValueType>>
92 class VarStreamArray {
93 friend class VarStreamArrayIterator<ValueType, Extractor>;
96 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
98 VarStreamArray() = default;
100 explicit VarStreamArray(const Extractor &E) : E(E) {}
102 explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
103 : Stream(Stream), Skew(Skew) {}
105 VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
106 : Stream(Stream), E(E), Skew(Skew) {}
108 Iterator begin(bool *HadError = nullptr) const {
109 return Iterator(*this, E, Skew, nullptr);
112 bool valid() const { return Stream.valid(); }
114 uint32_t skew() const { return Skew; }
115 Iterator end() const { return Iterator(E); }
117 bool empty() const { return Stream.getLength() == 0; }
119 VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
120 uint32_t End) const {
121 assert(Begin >= Skew);
122 // We should never cut off the beginning of the stream since it might be
123 // skewed, meaning the initial bytes are important.
124 BinaryStreamRef NewStream = Stream.slice(0, End);
125 return {NewStream, E, Begin};
128 /// given an offset into the array's underlying stream, return an
129 /// iterator to the record at that offset. This is considered unsafe
130 /// since the behavior is undefined if \p Offset does not refer to the
131 /// beginning of a valid record.
132 Iterator at(uint32_t Offset) const {
133 return Iterator(*this, E, Offset, nullptr);
136 const Extractor &getExtractor() const { return E; }
137 Extractor &getExtractor() { return E; }
139 BinaryStreamRef getUnderlyingStream() const { return Stream; }
140 void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
145 void drop_front() { Skew += begin()->length(); }
148 BinaryStreamRef Stream;
153 template <typename ValueType, typename Extractor>
154 class VarStreamArrayIterator
155 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
156 std::forward_iterator_tag, const ValueType> {
157 typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
158 typedef VarStreamArray<ValueType, Extractor> ArrayType;
161 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
162 uint32_t Offset, bool *HadError)
163 : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
164 Array(&Array), AbsOffset(Offset), HadError(HadError) {
165 if (IterRef.getLength() == 0)
168 auto EC = Extract(IterRef, ThisLen, ThisValue);
170 consumeError(std::move(EC));
176 VarStreamArrayIterator() = default;
177 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
178 ~VarStreamArrayIterator() = default;
180 bool operator==(const IterType &R) const {
181 if (Array && R.Array) {
182 // Both have a valid array, make sure they're same.
183 assert(Array == R.Array);
184 return IterRef == R.IterRef;
187 // Both iterators are at the end.
188 if (!Array && !R.Array)
191 // One is not at the end and one is.
195 const ValueType &operator*() const {
196 assert(Array && !HasError);
200 IterType &operator+=(unsigned N) {
201 for (unsigned I = 0; I < N; ++I) {
202 // We are done with the current record, discard it so that we are
203 // positioned at the next record.
204 AbsOffset += ThisLen;
205 IterRef = IterRef.drop_front(ThisLen);
206 if (IterRef.getLength() == 0) {
207 // There is nothing after the current record, we must make this an end
211 // There is some data after the current record.
212 auto EC = Extract(IterRef, ThisLen, ThisValue);
214 consumeError(std::move(EC));
216 } else if (ThisLen == 0) {
217 // An empty record? Make this an end iterator.
225 uint32_t offset() const { return AbsOffset; }
226 uint32_t getRecordLength() const { return ThisLen; }
236 if (HadError != nullptr)
241 BinaryStreamRef IterRef;
243 const ArrayType *Array{nullptr};
245 uint32_t AbsOffset{0};
246 bool HasError{false};
247 bool *HadError{nullptr};
250 template <typename T> class FixedStreamArrayIterator;
252 /// FixedStreamArray is similar to VarStreamArray, except with each record
253 /// having a fixed-length. As with VarStreamArray, there is no upfront
254 /// cost associated with building or copying a FixedStreamArray, as the
255 /// memory for each element is not read from the backing stream until that
256 /// element is iterated.
257 template <typename T> class FixedStreamArray {
258 friend class FixedStreamArrayIterator<T>;
261 typedef FixedStreamArrayIterator<T> Iterator;
263 FixedStreamArray() = default;
264 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
265 assert(Stream.getLength() % sizeof(T) == 0);
268 bool operator==(const FixedStreamArray<T> &Other) const {
269 return Stream == Other.Stream;
272 bool operator!=(const FixedStreamArray<T> &Other) const {
273 return !(*this == Other);
276 FixedStreamArray(const FixedStreamArray &) = default;
277 FixedStreamArray &operator=(const FixedStreamArray &) = default;
279 const T &operator[](uint32_t Index) const {
280 assert(Index < size());
281 uint32_t Off = Index * sizeof(T);
282 ArrayRef<uint8_t> Data;
283 if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
284 assert(false && "Unexpected failure reading from stream");
285 // This should never happen since we asserted that the stream length was
286 // an exact multiple of the element size.
287 consumeError(std::move(EC));
289 assert(isAddrAligned(Align::Of<T>(), Data.data()));
290 return *reinterpret_cast<const T *>(Data.data());
293 uint32_t size() const { return Stream.getLength() / sizeof(T); }
295 bool empty() const { return size() == 0; }
297 FixedStreamArrayIterator<T> begin() const {
298 return FixedStreamArrayIterator<T>(*this, 0);
301 FixedStreamArrayIterator<T> end() const {
302 return FixedStreamArrayIterator<T>(*this, size());
305 const T &front() const { return *begin(); }
306 const T &back() const {
307 FixedStreamArrayIterator<T> I = end();
311 BinaryStreamRef getUnderlyingStream() const { return Stream; }
314 BinaryStreamRef Stream;
317 template <typename T>
318 class FixedStreamArrayIterator
319 : public iterator_facade_base<FixedStreamArrayIterator<T>,
320 std::random_access_iterator_tag, const T> {
323 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
324 : Array(Array), Index(Index) {}
326 FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other)
327 : Array(Other.Array), Index(Other.Index) {}
328 FixedStreamArrayIterator<T> &
329 operator=(const FixedStreamArrayIterator<T> &Other) {
335 const T &operator*() const { return Array[Index]; }
336 const T &operator*() { return Array[Index]; }
338 bool operator==(const FixedStreamArrayIterator<T> &R) const {
339 assert(Array == R.Array);
340 return (Index == R.Index) && (Array == R.Array);
343 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
348 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
349 assert(std::ptrdiff_t(Index) >= N);
354 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
355 assert(Array == R.Array);
356 assert(Index >= R.Index);
357 return Index - R.Index;
360 bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
361 assert(Array == RHS.Array);
362 return Index < RHS.Index;
366 FixedStreamArray<T> Array;
372 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H