1 //===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the SparseBitVector class. See the doxygen comment for
10 // SparseBitVector for more details on the algorithm used.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_ADT_SPARSEBITVECTOR_H
15 #define LLVM_ADT_SPARSEBITVECTOR_H
17 #include "llvm/Support/ErrorHandling.h"
18 #include "llvm/Support/MathExtras.h"
19 #include "llvm/Support/raw_ostream.h"
28 /// SparseBitVector is an implementation of a bitvector that is sparse by only
29 /// storing the elements that have non-zero bits set. In order to make this
30 /// fast for the most common cases, SparseBitVector is implemented as a linked
31 /// list of SparseBitVectorElements. We maintain a pointer to the last
32 /// SparseBitVectorElement accessed (in the form of a list iterator), in order
33 /// to make multiple in-order test/set constant time after the first one is
34 /// executed. Note that using vectors to store SparseBitVectorElement's does
35 /// not work out very well because it causes insertion in the middle to take
36 /// enormous amounts of time with a large amount of bits. Other structures that
37 /// have better worst cases for insertion in the middle (various balanced trees,
38 /// etc) do not perform as well in practice as a linked list with this iterator
39 /// kept up to date. They are also significantly more memory intensive.
41 template <unsigned ElementSize = 128> struct SparseBitVectorElement {
43 using BitWord = unsigned long;
44 using size_type = unsigned;
46 BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT,
47 BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE,
48 BITS_PER_ELEMENT = ElementSize
52 // Index of Element in terms of where first bit starts.
53 unsigned ElementIndex;
54 BitWord Bits[BITWORDS_PER_ELEMENT];
56 SparseBitVectorElement() {
58 memset(&Bits[0], 0, sizeof (BitWord) * BITWORDS_PER_ELEMENT);
62 explicit SparseBitVectorElement(unsigned Idx) {
64 memset(&Bits[0], 0, sizeof (BitWord) * BITWORDS_PER_ELEMENT);
68 bool operator==(const SparseBitVectorElement &RHS) const {
69 if (ElementIndex != RHS.ElementIndex)
71 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
72 if (Bits[i] != RHS.Bits[i])
77 bool operator!=(const SparseBitVectorElement &RHS) const {
78 return !(*this == RHS);
81 // Return the bits that make up word Idx in our element.
82 BitWord word(unsigned Idx) const {
83 assert(Idx < BITWORDS_PER_ELEMENT);
87 unsigned index() const {
92 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
98 void set(unsigned Idx) {
99 Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
102 bool test_and_set(unsigned Idx) {
103 bool old = test(Idx);
111 void reset(unsigned Idx) {
112 Bits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
115 bool test(unsigned Idx) const {
116 return Bits[Idx / BITWORD_SIZE] & (1L << (Idx % BITWORD_SIZE));
119 size_type count() const {
120 unsigned NumBits = 0;
121 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
122 NumBits += countPopulation(Bits[i]);
126 /// find_first - Returns the index of the first set bit.
127 int find_first() const {
128 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
130 return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
131 llvm_unreachable("Illegal empty element");
134 /// find_last - Returns the index of the last set bit.
135 int find_last() const {
136 for (unsigned I = 0; I < BITWORDS_PER_ELEMENT; ++I) {
137 unsigned Idx = BITWORDS_PER_ELEMENT - I - 1;
139 return Idx * BITWORD_SIZE + BITWORD_SIZE -
140 countLeadingZeros(Bits[Idx]) - 1;
142 llvm_unreachable("Illegal empty element");
145 /// find_next - Returns the index of the next set bit starting from the
146 /// "Curr" bit. Returns -1 if the next set bit is not found.
147 int find_next(unsigned Curr) const {
148 if (Curr >= BITS_PER_ELEMENT)
151 unsigned WordPos = Curr / BITWORD_SIZE;
152 unsigned BitPos = Curr % BITWORD_SIZE;
153 BitWord Copy = Bits[WordPos];
154 assert(WordPos <= BITWORDS_PER_ELEMENT
155 && "Word Position outside of element");
157 // Mask off previous bits.
158 Copy &= ~0UL << BitPos;
161 return WordPos * BITWORD_SIZE + countTrailingZeros(Copy);
163 // Check subsequent words.
164 for (unsigned i = WordPos+1; i < BITWORDS_PER_ELEMENT; ++i)
166 return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
170 // Union this element with RHS and return true if this one changed.
171 bool unionWith(const SparseBitVectorElement &RHS) {
172 bool changed = false;
173 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
174 BitWord old = changed ? 0 : Bits[i];
176 Bits[i] |= RHS.Bits[i];
177 if (!changed && old != Bits[i])
183 // Return true if we have any bits in common with RHS
184 bool intersects(const SparseBitVectorElement &RHS) const {
185 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
186 if (RHS.Bits[i] & Bits[i])
192 // Intersect this Element with RHS and return true if this one changed.
193 // BecameZero is set to true if this element became all-zero bits.
194 bool intersectWith(const SparseBitVectorElement &RHS,
196 bool changed = false;
200 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
201 BitWord old = changed ? 0 : Bits[i];
203 Bits[i] &= RHS.Bits[i];
207 if (!changed && old != Bits[i])
210 BecameZero = allzero;
214 // Intersect this Element with the complement of RHS and return true if this
215 // one changed. BecameZero is set to true if this element became all-zero
217 bool intersectWithComplement(const SparseBitVectorElement &RHS,
219 bool changed = false;
223 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
224 BitWord old = changed ? 0 : Bits[i];
226 Bits[i] &= ~RHS.Bits[i];
230 if (!changed && old != Bits[i])
233 BecameZero = allzero;
237 // Three argument version of intersectWithComplement that intersects
238 // RHS1 & ~RHS2 into this element
239 void intersectWithComplement(const SparseBitVectorElement &RHS1,
240 const SparseBitVectorElement &RHS2,
245 for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
246 Bits[i] = RHS1.Bits[i] & ~RHS2.Bits[i];
250 BecameZero = allzero;
254 template <unsigned ElementSize = 128>
255 class SparseBitVector {
256 using ElementList = std::list<SparseBitVectorElement<ElementSize>>;
257 using ElementListIter = typename ElementList::iterator;
258 using ElementListConstIter = typename ElementList::const_iterator;
260 BITWORD_SIZE = SparseBitVectorElement<ElementSize>::BITWORD_SIZE
263 ElementList Elements;
264 // Pointer to our current Element. This has no visible effect on the external
265 // state of a SparseBitVector, it's just used to improve performance in the
266 // common case of testing/modifying bits with similar indices.
267 mutable ElementListIter CurrElementIter;
269 // This is like std::lower_bound, except we do linear searching from the
271 ElementListIter FindLowerBoundImpl(unsigned ElementIndex) const {
273 // We cache a non-const iterator so we're forced to resort to const_cast to
274 // get the begin/end in the case where 'this' is const. To avoid duplication
275 // of code with the only difference being whether the const cast is present
276 // 'this' is always const in this particular function and we sort out the
277 // difference in FindLowerBound and FindLowerBoundConst.
278 ElementListIter Begin =
279 const_cast<SparseBitVector<ElementSize> *>(this)->Elements.begin();
280 ElementListIter End =
281 const_cast<SparseBitVector<ElementSize> *>(this)->Elements.end();
283 if (Elements.empty()) {
284 CurrElementIter = Begin;
285 return CurrElementIter;
288 // Make sure our current iterator is valid.
289 if (CurrElementIter == End)
292 // Search from our current iterator, either backwards or forwards,
293 // depending on what element we are looking for.
294 ElementListIter ElementIter = CurrElementIter;
295 if (CurrElementIter->index() == ElementIndex) {
297 } else if (CurrElementIter->index() > ElementIndex) {
298 while (ElementIter != Begin
299 && ElementIter->index() > ElementIndex)
302 while (ElementIter != End &&
303 ElementIter->index() < ElementIndex)
306 CurrElementIter = ElementIter;
309 ElementListConstIter FindLowerBoundConst(unsigned ElementIndex) const {
310 return FindLowerBoundImpl(ElementIndex);
312 ElementListIter FindLowerBound(unsigned ElementIndex) {
313 return FindLowerBoundImpl(ElementIndex);
316 // Iterator to walk set bits in the bitmap. This iterator is a lot uglier
317 // than it would be, in order to be efficient.
318 class SparseBitVectorIterator {
322 const SparseBitVector<ElementSize> *BitVector = nullptr;
324 // Current element inside of bitmap.
325 ElementListConstIter Iter;
327 // Current bit number inside of our bitmap.
330 // Current word number inside of our element.
333 // Current bits from the element.
334 typename SparseBitVectorElement<ElementSize>::BitWord Bits;
336 // Move our iterator to the first non-zero bit in the bitmap.
337 void AdvanceToFirstNonZero() {
340 if (BitVector->Elements.empty()) {
344 Iter = BitVector->Elements.begin();
345 BitNumber = Iter->index() * ElementSize;
346 unsigned BitPos = Iter->find_first();
348 WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
349 Bits = Iter->word(WordNumber);
350 Bits >>= BitPos % BITWORD_SIZE;
353 // Move our iterator to the next non-zero bit.
354 void AdvanceToNextNonZero() {
358 while (Bits && !(Bits & 1)) {
363 // See if we ran out of Bits in this word.
365 int NextSetBitNumber = Iter->find_next(BitNumber % ElementSize) ;
366 // If we ran out of set bits in this element, move to next element.
367 if (NextSetBitNumber == -1 || (BitNumber % ElementSize == 0)) {
371 // We may run out of elements in the bitmap.
372 if (Iter == BitVector->Elements.end()) {
376 // Set up for next non-zero word in bitmap.
377 BitNumber = Iter->index() * ElementSize;
378 NextSetBitNumber = Iter->find_first();
379 BitNumber += NextSetBitNumber;
380 WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
381 Bits = Iter->word(WordNumber);
382 Bits >>= NextSetBitNumber % BITWORD_SIZE;
384 WordNumber = (NextSetBitNumber % ElementSize) / BITWORD_SIZE;
385 Bits = Iter->word(WordNumber);
386 Bits >>= NextSetBitNumber % BITWORD_SIZE;
387 BitNumber = Iter->index() * ElementSize;
388 BitNumber += NextSetBitNumber;
394 SparseBitVectorIterator() = default;
396 SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
397 bool end = false):BitVector(RHS) {
398 Iter = BitVector->Elements.begin();
403 AdvanceToFirstNonZero();
407 inline SparseBitVectorIterator& operator++() {
410 AdvanceToNextNonZero();
415 inline SparseBitVectorIterator operator++(int) {
416 SparseBitVectorIterator tmp = *this;
421 // Return the current set bit number.
422 unsigned operator*() const {
426 bool operator==(const SparseBitVectorIterator &RHS) const {
427 // If they are both at the end, ignore the rest of the fields.
428 if (AtEnd && RHS.AtEnd)
430 // Otherwise they are the same if they have the same bit number and
432 return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber;
435 bool operator!=(const SparseBitVectorIterator &RHS) const {
436 return !(*this == RHS);
441 using iterator = SparseBitVectorIterator;
443 SparseBitVector() : Elements(), CurrElementIter(Elements.begin()) {}
445 SparseBitVector(const SparseBitVector &RHS)
446 : Elements(RHS.Elements), CurrElementIter(Elements.begin()) {}
447 SparseBitVector(SparseBitVector &&RHS)
448 : Elements(std::move(RHS.Elements)), CurrElementIter(Elements.begin()) {}
456 SparseBitVector& operator=(const SparseBitVector& RHS) {
460 Elements = RHS.Elements;
461 CurrElementIter = Elements.begin();
464 SparseBitVector &operator=(SparseBitVector &&RHS) {
465 Elements = std::move(RHS.Elements);
466 CurrElementIter = Elements.begin();
470 // Test, Reset, and Set a bit in the bitmap.
471 bool test(unsigned Idx) const {
472 if (Elements.empty())
475 unsigned ElementIndex = Idx / ElementSize;
476 ElementListConstIter ElementIter = FindLowerBoundConst(ElementIndex);
478 // If we can't find an element that is supposed to contain this bit, there
479 // is nothing more to do.
480 if (ElementIter == Elements.end() ||
481 ElementIter->index() != ElementIndex)
483 return ElementIter->test(Idx % ElementSize);
486 void reset(unsigned Idx) {
487 if (Elements.empty())
490 unsigned ElementIndex = Idx / ElementSize;
491 ElementListIter ElementIter = FindLowerBound(ElementIndex);
493 // If we can't find an element that is supposed to contain this bit, there
494 // is nothing more to do.
495 if (ElementIter == Elements.end() ||
496 ElementIter->index() != ElementIndex)
498 ElementIter->reset(Idx % ElementSize);
500 // When the element is zeroed out, delete it.
501 if (ElementIter->empty()) {
503 Elements.erase(ElementIter);
507 void set(unsigned Idx) {
508 unsigned ElementIndex = Idx / ElementSize;
509 ElementListIter ElementIter;
510 if (Elements.empty()) {
511 ElementIter = Elements.emplace(Elements.end(), ElementIndex);
513 ElementIter = FindLowerBound(ElementIndex);
515 if (ElementIter == Elements.end() ||
516 ElementIter->index() != ElementIndex) {
517 // We may have hit the beginning of our SparseBitVector, in which case,
518 // we may need to insert right after this element, which requires moving
519 // the current iterator forward one, because insert does insert before.
520 if (ElementIter != Elements.end() &&
521 ElementIter->index() < ElementIndex)
523 ElementIter = Elements.emplace(ElementIter, ElementIndex);
526 CurrElementIter = ElementIter;
528 ElementIter->set(Idx % ElementSize);
531 bool test_and_set(unsigned Idx) {
532 bool old = test(Idx);
540 bool operator!=(const SparseBitVector &RHS) const {
541 return !(*this == RHS);
544 bool operator==(const SparseBitVector &RHS) const {
545 ElementListConstIter Iter1 = Elements.begin();
546 ElementListConstIter Iter2 = RHS.Elements.begin();
548 for (; Iter1 != Elements.end() && Iter2 != RHS.Elements.end();
550 if (*Iter1 != *Iter2)
553 return Iter1 == Elements.end() && Iter2 == RHS.Elements.end();
556 // Union our bitmap with the RHS and return true if we changed.
557 bool operator|=(const SparseBitVector &RHS) {
561 bool changed = false;
562 ElementListIter Iter1 = Elements.begin();
563 ElementListConstIter Iter2 = RHS.Elements.begin();
565 // If RHS is empty, we are done
566 if (RHS.Elements.empty())
569 while (Iter2 != RHS.Elements.end()) {
570 if (Iter1 == Elements.end() || Iter1->index() > Iter2->index()) {
571 Elements.insert(Iter1, *Iter2);
574 } else if (Iter1->index() == Iter2->index()) {
575 changed |= Iter1->unionWith(*Iter2);
582 CurrElementIter = Elements.begin();
586 // Intersect our bitmap with the RHS and return true if ours changed.
587 bool operator&=(const SparseBitVector &RHS) {
591 bool changed = false;
592 ElementListIter Iter1 = Elements.begin();
593 ElementListConstIter Iter2 = RHS.Elements.begin();
595 // Check if both bitmaps are empty.
596 if (Elements.empty() && RHS.Elements.empty())
599 // Loop through, intersecting as we go, erasing elements when necessary.
600 while (Iter2 != RHS.Elements.end()) {
601 if (Iter1 == Elements.end()) {
602 CurrElementIter = Elements.begin();
606 if (Iter1->index() > Iter2->index()) {
608 } else if (Iter1->index() == Iter2->index()) {
610 changed |= Iter1->intersectWith(*Iter2, BecameZero);
612 ElementListIter IterTmp = Iter1;
614 Elements.erase(IterTmp);
620 ElementListIter IterTmp = Iter1;
622 Elements.erase(IterTmp);
626 if (Iter1 != Elements.end()) {
627 Elements.erase(Iter1, Elements.end());
630 CurrElementIter = Elements.begin();
634 // Intersect our bitmap with the complement of the RHS and return true
636 bool intersectWithComplement(const SparseBitVector &RHS) {
645 bool changed = false;
646 ElementListIter Iter1 = Elements.begin();
647 ElementListConstIter Iter2 = RHS.Elements.begin();
649 // If either our bitmap or RHS is empty, we are done
650 if (Elements.empty() || RHS.Elements.empty())
653 // Loop through, intersecting as we go, erasing elements when necessary.
654 while (Iter2 != RHS.Elements.end()) {
655 if (Iter1 == Elements.end()) {
656 CurrElementIter = Elements.begin();
660 if (Iter1->index() > Iter2->index()) {
662 } else if (Iter1->index() == Iter2->index()) {
664 changed |= Iter1->intersectWithComplement(*Iter2, BecameZero);
666 ElementListIter IterTmp = Iter1;
668 Elements.erase(IterTmp);
677 CurrElementIter = Elements.begin();
681 bool intersectWithComplement(const SparseBitVector<ElementSize> *RHS) const {
682 return intersectWithComplement(*RHS);
685 // Three argument version of intersectWithComplement.
686 // Result of RHS1 & ~RHS2 is stored into this bitmap.
687 void intersectWithComplement(const SparseBitVector<ElementSize> &RHS1,
688 const SparseBitVector<ElementSize> &RHS2)
691 intersectWithComplement(RHS2);
693 } else if (this == &RHS2) {
694 SparseBitVector RHS2Copy(RHS2);
695 intersectWithComplement(RHS1, RHS2Copy);
700 CurrElementIter = Elements.begin();
701 ElementListConstIter Iter1 = RHS1.Elements.begin();
702 ElementListConstIter Iter2 = RHS2.Elements.begin();
704 // If RHS1 is empty, we are done
705 // If RHS2 is empty, we still have to copy RHS1
706 if (RHS1.Elements.empty())
709 // Loop through, intersecting as we go, erasing elements when necessary.
710 while (Iter2 != RHS2.Elements.end()) {
711 if (Iter1 == RHS1.Elements.end())
714 if (Iter1->index() > Iter2->index()) {
716 } else if (Iter1->index() == Iter2->index()) {
717 bool BecameZero = false;
718 Elements.emplace_back(Iter1->index());
719 Elements.back().intersectWithComplement(*Iter1, *Iter2, BecameZero);
725 Elements.push_back(*Iter1++);
729 // copy the remaining elements
730 std::copy(Iter1, RHS1.Elements.end(), std::back_inserter(Elements));
733 void intersectWithComplement(const SparseBitVector<ElementSize> *RHS1,
734 const SparseBitVector<ElementSize> *RHS2) {
735 intersectWithComplement(*RHS1, *RHS2);
738 bool intersects(const SparseBitVector<ElementSize> *RHS) const {
739 return intersects(*RHS);
742 // Return true if we share any bits in common with RHS
743 bool intersects(const SparseBitVector<ElementSize> &RHS) const {
744 ElementListConstIter Iter1 = Elements.begin();
745 ElementListConstIter Iter2 = RHS.Elements.begin();
747 // Check if both bitmaps are empty.
748 if (Elements.empty() && RHS.Elements.empty())
751 // Loop through, intersecting stopping when we hit bits in common.
752 while (Iter2 != RHS.Elements.end()) {
753 if (Iter1 == Elements.end())
756 if (Iter1->index() > Iter2->index()) {
758 } else if (Iter1->index() == Iter2->index()) {
759 if (Iter1->intersects(*Iter2))
770 // Return true iff all bits set in this SparseBitVector are
772 bool contains(const SparseBitVector<ElementSize> &RHS) const {
773 SparseBitVector<ElementSize> Result(*this);
775 return (Result == RHS);
778 // Return the first set bit in the bitmap. Return -1 if no bits are set.
779 int find_first() const {
780 if (Elements.empty())
782 const SparseBitVectorElement<ElementSize> &First = *(Elements.begin());
783 return (First.index() * ElementSize) + First.find_first();
786 // Return the last set bit in the bitmap. Return -1 if no bits are set.
787 int find_last() const {
788 if (Elements.empty())
790 const SparseBitVectorElement<ElementSize> &Last = *(Elements.rbegin());
791 return (Last.index() * ElementSize) + Last.find_last();
794 // Return true if the SparseBitVector is empty
796 return Elements.empty();
799 unsigned count() const {
800 unsigned BitCount = 0;
801 for (ElementListConstIter Iter = Elements.begin();
802 Iter != Elements.end();
804 BitCount += Iter->count();
809 iterator begin() const {
810 return iterator(this);
813 iterator end() const {
814 return iterator(this, true);
818 // Convenience functions to allow Or and And without dereferencing in the user
821 template <unsigned ElementSize>
822 inline bool operator |=(SparseBitVector<ElementSize> &LHS,
823 const SparseBitVector<ElementSize> *RHS) {
827 template <unsigned ElementSize>
828 inline bool operator |=(SparseBitVector<ElementSize> *LHS,
829 const SparseBitVector<ElementSize> &RHS) {
830 return LHS->operator|=(RHS);
833 template <unsigned ElementSize>
834 inline bool operator &=(SparseBitVector<ElementSize> *LHS,
835 const SparseBitVector<ElementSize> &RHS) {
836 return LHS->operator&=(RHS);
839 template <unsigned ElementSize>
840 inline bool operator &=(SparseBitVector<ElementSize> &LHS,
841 const SparseBitVector<ElementSize> *RHS) {
845 // Convenience functions for infix union, intersection, difference operators.
847 template <unsigned ElementSize>
848 inline SparseBitVector<ElementSize>
849 operator|(const SparseBitVector<ElementSize> &LHS,
850 const SparseBitVector<ElementSize> &RHS) {
851 SparseBitVector<ElementSize> Result(LHS);
856 template <unsigned ElementSize>
857 inline SparseBitVector<ElementSize>
858 operator&(const SparseBitVector<ElementSize> &LHS,
859 const SparseBitVector<ElementSize> &RHS) {
860 SparseBitVector<ElementSize> Result(LHS);
865 template <unsigned ElementSize>
866 inline SparseBitVector<ElementSize>
867 operator-(const SparseBitVector<ElementSize> &LHS,
868 const SparseBitVector<ElementSize> &RHS) {
869 SparseBitVector<ElementSize> Result;
870 Result.intersectWithComplement(LHS, RHS);
874 // Dump a SparseBitVector to a stream
875 template <unsigned ElementSize>
876 void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
879 typename SparseBitVector<ElementSize>::iterator bi = LHS.begin(),
883 for (++bi; bi != be; ++bi) {
890 } // end namespace llvm
892 #endif // LLVM_ADT_SPARSEBITVECTOR_H