llvm.org GIT mirror llvm / dd9d38d
Introduce a SmallDenseMap container that re-uses the existing DenseMap implementation. This type includes an inline bucket array which is used initially. Once it is exceeded, an array of 64 buckets is allocated on the heap. The bucket count grows from there as needed. Some highlights of this implementation: - The inline buffer is very carefully aligned, and so supports types with alignment constraints. - It works hard to avoid aliasing issues. - Supports types with non-trivial constructors, destructors, copy constructions, etc. It works reasonably hard to minimize copies and unnecessary initialization. The most common initialization is to set keys to the empty key, and so that should be fast if at all possible. This class has a performance / space trade-off. It tries to optimize for relatively small maps, and so packs the inline bucket array densely into the object. It will be marginally slower than a normal DenseMap in a few use patterns, so it isn't appropriate everywhere. The unit tests for DenseMap have been generalized a bit to support running over different map implementations in addition to different key/value types. They've then been automatically extended to cover the new container through the magic of GoogleTest's typed tests. All of this is still a bit rough though. I'm going to be cleaning up some aspects of the implementation, documenting things better, and adding tests which include non-trivial types. As soon as I'm comfortable with the correctness, I plan to switch existing users of SmallMap over to this class as it is already more correct w.r.t. construction and destruction of objects iin the map. Thanks to Benjamin Kramer for all the reviews of this and the lead-up patches. That said, more review on this would really be appreciated. As I've noted a few times, I'm quite surprised how hard it is to get the semantics for a hashtable-based map container with a small buffer optimization correct. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158638 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 8 years ago
2 changed file(s) with 336 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
1414 #define LLVM_ADT_DENSEMAP_H
1515
1616 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/AlignOf.h"
1718 #include "llvm/Support/MathExtras.h"
1819 #include "llvm/Support/PointerLikeTypeTraits.h"
1920 #include "llvm/Support/type_traits.h"
2324 #include
2425 #include
2526 #include
27 #include
2628 #include
2729 #include
2830
9698
9799 /// count - Return true if the specified key is in the map.
98100 bool count(const KeyT &Val) const {
99 BucketT *TheBucket;
101 const BucketT *TheBucket;
100102 return LookupBucketFor(Val, TheBucket);
101103 }
102104
107109 return end();
108110 }
109111 const_iterator find(const KeyT &Val) const {
110 BucketT *TheBucket;
112 const BucketT *TheBucket;
111113 if (LookupBucketFor(Val, TheBucket))
112114 return const_iterator(TheBucket, getBucketsEnd(), true);
113115 return end();
127129 }
128130 template
129131 const_iterator find_as(const LookupKeyT &Val) const {
130 BucketT *TheBucket;
132 const BucketT *TheBucket;
131133 if (LookupBucketFor(Val, TheBucket))
132134 return const_iterator(TheBucket, getBucketsEnd(), true);
133135 return end();
136138 /// lookup - Return the entry for the specified key, or a default
137139 /// constructed value if no such entry exists.
138140 ValueT lookup(const KeyT &Val) const {
139 BucketT *TheBucket;
141 const BucketT *TheBucket;
140142 if (LookupBucketFor(Val, TheBucket))
141143 return TheBucket->second;
142144 return ValueT();
346348 void decrementNumTombstones() {
347349 setNumTombstones(getNumTombstones() - 1);
348350 }
349 BucketT *getBuckets() const {
351 const BucketT *getBuckets() const {
350352 return static_cast(this)->getBuckets();
353 }
354 BucketT *getBuckets() {
355 return static_cast(this)->getBuckets();
351356 }
352357 unsigned getNumBuckets() const {
353358 return static_cast(this)->getNumBuckets();
354359 }
355 BucketT *getBucketsEnd() const {
360 BucketT *getBucketsEnd() {
361 return getBuckets() + getNumBuckets();
362 }
363 const BucketT *getBucketsEnd() const {
356364 return getBuckets() + getNumBuckets();
357365 }
358366
404412 // table completely filled with tombstones, no lookup would ever succeed,
405413 // causing infinite loops in lookup.
406414 unsigned NewNumEntries = getNumEntries() + 1;
407 if (NewNumEntries*4 >= getNumBuckets()*3) {
408 this->grow(getNumBuckets() * 2);
415 unsigned NumBuckets = getNumBuckets();
416 if (NewNumEntries*4 >= NumBuckets*3) {
417 this->grow(NumBuckets * 2);
409418 LookupBucketFor(Key, TheBucket);
410 }
411 if (getNumBuckets()-(NewNumEntries+getNumTombstones()) < getNumBuckets()/8) {
412 this->grow(getNumBuckets());
419 NumBuckets = getNumBuckets();
420 }
421 if (NumBuckets-(NewNumEntries+getNumTombstones()) <= NumBuckets/8) {
422 this->grow(NumBuckets);
413423 LookupBucketFor(Key, TheBucket);
414424 }
415425
429439 /// true, otherwise it returns a bucket with an empty marker or tombstone and
430440 /// returns false.
431441 template
432 bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) const {
442 bool LookupBucketFor(const LookupKeyT &Val,
443 const BucketT *&FoundBucket) const {
433444 unsigned BucketNo = getHashValue(Val);
434445 unsigned ProbeAmt = 1;
435 BucketT *BucketsPtr = getBuckets();
446 const BucketT *BucketsPtr = getBuckets();
436447
437448 if (getNumBuckets() == 0) {
438449 FoundBucket = 0;
440451 }
441452
442453 // FoundTombstone - Keep track of whether we find a tombstone while probing.
443 BucketT *FoundTombstone = 0;
454 const BucketT *FoundTombstone = 0;
444455 const KeyT EmptyKey = getEmptyKey();
445456 const KeyT TombstoneKey = getTombstoneKey();
446457 assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
448459 "Empty/Tombstone value shouldn't be inserted into map!");
449460
450461 while (1) {
451 BucketT *ThisBucket = BucketsPtr + (BucketNo & (getNumBuckets()-1));
462 const BucketT *ThisBucket = BucketsPtr + (BucketNo & (getNumBuckets()-1));
452463 // Found Val's bucket? If so, return it.
453464 if (KeyInfoT::isEqual(Val, ThisBucket->first)) {
454465 FoundBucket = ThisBucket;
474485 // probing.
475486 BucketNo += ProbeAmt++;
476487 }
488 }
489
490 template
491 bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
492 const BucketT *ConstFoundBucket = FoundBucket;
493 bool Result = const_cast(this)
494 ->LookupBucketFor(Val, ConstFoundBucket);
495 FoundBucket = const_cast(ConstFoundBucket);
496 return Result;
477497 }
478498
479499 public:
637657
638658 Buckets = static_cast(operator new(sizeof(BucketT) * NumBuckets));
639659 return true;
660 }
661 };
662
663 template
664 unsigned InlineBuckets = 4,
665 typename KeyInfoT = DenseMapInfo >
666 class SmallDenseMap
667 : public DenseMapBase,
668 KeyT, ValueT, KeyInfoT> {
669 // Lift some types from the dependent base class into this class for
670 // simplicity of referring to them.
671 typedef DenseMapBase BaseT;
672 typedef typename BaseT::BucketT BucketT;
673 friend class DenseMapBase;
674
675 unsigned Small : 1;
676 unsigned NumEntries : 31;
677 unsigned NumTombstones;
678
679 struct LargeRep {
680 BucketT *Buckets;
681 unsigned NumBuckets;
682 };
683
684 /// A "union" of an inline bucket array and the struct representing
685 /// a large bucket. This union will be discriminated by the 'Small' bit.
686 typename AlignedCharArray::union_type
687 storage;
688
689 public:
690 explicit SmallDenseMap(unsigned NumInitBuckets = 0) {
691 init(NumInitBuckets);
692 }
693
694 SmallDenseMap(const SmallDenseMap &other) {
695 init(0);
696 copyFrom(other);
697 }
698
699 #if LLVM_USE_RVALUE_REFERENCES
700 SmallDenseMap(SmallDenseMap &&other) {
701 init(0);
702 swap(other);
703 }
704 #endif
705
706 template
707 SmallDenseMap(const InputIt &I, const InputIt &E) {
708 init(NextPowerOf2(std::distance(I, E)));
709 this->insert(I, E);
710 }
711
712 ~SmallDenseMap() {
713 this->destroyAll();
714 deallocateBuckets();
715 }
716
717 void swap(SmallDenseMap& RHS) {
718 std::swap(NumEntries, RHS.NumEntries);
719 std::swap(NumTombstones, RHS.NumTombstones);
720 if (Small && RHS.Small) {
721 for (unsigned i = 0, e = InlineBuckets; i != e; ++i)
722 std::swap(getInlineBuckets()[i], RHS.getInlineBuckes()[i]);
723 return;
724 }
725 if (!Small && !RHS.Small) {
726 std::swap(getLargeRep()->Buckets, RHS.getLargeRep()->Buckets);
727 std::swap(getLargeRep()->NumBuckets, RHS.getLargeRep()->NumBuckets);
728 return;
729 }
730
731 SmallDenseMap &SmallSide = Small ? *this : RHS;
732 SmallDenseMap &LargeSide = Small ? RHS : *this;
733
734 // First stash the large side's rep and move the small side across.
735 LargeRep TmpRep = llvm_move(*LargeSide.getLargeRep());
736 LargeSide.getLargeRep()->~LargeRep();
737 LargeSide.Small = true;
738 // This is similar to the standard move-from-old-buckets, but the bucket
739 // count hasn't actually rotate in this case. So we have to carefully
740 // move construct the keys and values into their new locations, but there
741 // is no need to re-hash things.
742 const KeyT EmptyKey = this->getEmptyKey();
743 const KeyT TombstoneKey = this->getTombstoneKey();
744 for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
745 BucketT *NewB = &LargeSide.getInlineBuckets()[i],
746 *OldB = &SmallSide.getInlineBuckets()[i];
747 new (&NewB->first) KeyT(llvm_move(OldB->first));
748 NewB->first.~KeyT();
749 if (!KeyInfoT::isEqual(NewB->first, EmptyKey) &&
750 !KeyInfoT::isEqual(NewB->first, TombstoneKey)) {
751 new (&NewB->second) ValueT(llvm_move(OldB->second));
752 OldB->second.~ValueT();
753 }
754 }
755
756 // The hard part of moving the small buckets across is done, just move
757 // the TmpRep into its new home.
758 SmallSide.Small = false;
759 new (SmallSide.getLargeRep()) LargeRep(llvm_move(TmpRep));
760 }
761
762 SmallDenseMap& operator=(const SmallDenseMap& other) {
763 copyFrom(other);
764 return *this;
765 }
766
767 #if LLVM_USE_RVALUE_REFERENCES
768 SmallDenseMap& operator=(SmallDenseMap &&other) {
769 this->destroyAll();
770 deallocateBuckets();
771 init(0);
772 swap(other);
773 return *this;
774 }
775 #endif
776
777 void copyFrom(const SmallDenseMap& other) {
778 this->destroyAll();
779 deallocateBuckets();
780 Small = true;
781 if (other.getNumBuckets() > InlineBuckets) {
782 Small = false;
783 allocateBuckets(other.getNumBuckets());
784 }
785 this->BaseT::copyFrom(other);
786 }
787
788 void init(unsigned InitBuckets) {
789 Small = true;
790 if (InitBuckets > InlineBuckets) {
791 Small = false;
792 new (getLargeRep()) LargeRep(allocateBuckets(InitBuckets));
793 }
794 this->BaseT::initEmpty();
795 }
796
797 void grow(unsigned AtLeast) {
798 if (AtLeast > InlineBuckets)
799 AtLeast = std::max(64, NextPowerOf2(AtLeast));
800
801 if (Small) {
802 if (AtLeast <= InlineBuckets)
803 return; // Nothing to do.
804
805 // First grow an allocated bucket array in another map and move our
806 // entries into it.
807 // FIXME: This is wasteful, we don't need the inline buffer here, and we
808 // certainly don't need to initialize it to empty.
809 SmallDenseMap TmpMap;
810 TmpMap.Small = false;
811 new (TmpMap.getLargeRep()) LargeRep(allocateBuckets(AtLeast));
812 TmpMap.moveFromOldBuckets(getInlineBuckets(),
813 getInlineBuckets()+InlineBuckets);
814
815 // Now steal the innards back into this map, and arrange for the
816 // temporary map to be cleanly torn down.
817 assert(NumEntries == TmpMap.NumEntries);
818 Small = false;
819 NumTombstones = llvm_move(TmpMap.NumTombstones);
820 new (getLargeRep()) LargeRep(llvm_move(*TmpMap.getLargeRep()));
821 TmpMap.getLargeRep()->~LargeRep();
822 TmpMap.Small = true;
823 return;
824 }
825
826 LargeRep OldRep = llvm_move(*getLargeRep());
827 getLargeRep()->~LargeRep();
828 if (AtLeast <= InlineBuckets) {
829 Small = true;
830 } else {
831 new (getLargeRep()) LargeRep(allocateBuckets(AtLeast));
832 }
833
834 this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets);
835
836 // Free the old table.
837 operator delete(OldRep.Buckets);
838 }
839
840 void shrink_and_clear() {
841 unsigned OldSize = this->size();
842 this->destroyAll();
843
844 // Reduce the number of buckets.
845 unsigned NewNumBuckets = 0;
846 if (OldSize) {
847 NewNumBuckets = 1 << (Log2_32_Ceil(OldSize) + 1);
848 if (NewNumBuckets > InlineBuckets && NewNumBuckets < 64u)
849 NewNumBuckets = 64;
850 }
851 if ((Small && NewNumBuckets <= InlineBuckets) ||
852 (!Small && NewNumBuckets == getLargeRep()->NumBuckets)) {
853 this->BaseT::initEmpty();
854 return;
855 }
856
857 deallocateBuckets();
858 init(NewNumBuckets);
859 }
860
861 private:
862 unsigned getNumEntries() const {
863 return NumEntries;
864 }
865 void setNumEntries(unsigned Num) {
866 assert(Num < INT_MAX && "Cannot support more than INT_MAX entries");
867 NumEntries = Num;
868 }
869
870 unsigned getNumTombstones() const {
871 return NumTombstones;
872 }
873 void setNumTombstones(unsigned Num) {
874 NumTombstones = Num;
875 }
876
877 const BucketT *getInlineBuckets() const {
878 assert(Small);
879 // Note that this cast does not violate aliasing rules as we assert that
880 // the memory's dynamic type is the small, inline bucket buffer, and the
881 // 'storage.buffer' static type is 'char *'.
882 return reinterpret_cast(storage.buffer);
883 }
884 BucketT *getInlineBuckets() {
885 return const_cast(
886 const_cast(this)->getInlineBuckets());
887 }
888 const LargeRep *getLargeRep() const {
889 assert(!Small);
890 // Note, same rule about aliasing as with getInlineBuckets.
891 return reinterpret_cast(storage.buffer);
892 }
893 LargeRep *getLargeRep() {
894 return const_cast(
895 const_cast(this)->getLargeRep());
896 }
897
898 const BucketT *getBuckets() const {
899 return Small ? getInlineBuckets() : getLargeRep()->Buckets;
900 }
901 BucketT *getBuckets() {
902 return const_cast(
903 const_cast(this)->getBuckets());
904 }
905 unsigned getNumBuckets() const {
906 return Small ? InlineBuckets : getLargeRep()->NumBuckets;
907 }
908
909 void deallocateBuckets() {
910 if (Small)
911 return;
912
913 operator delete(getLargeRep()->Buckets);
914 getLargeRep()->~LargeRep();
915 }
916
917 LargeRep allocateBuckets(unsigned Num) {
918 assert(Num > InlineBuckets && "Must allocate more buckets than are inline");
919 LargeRep Rep = {
920 static_cast(operator new(sizeof(BucketT) * Num)), Num
921 };
922 return Rep;
640923 }
641924 };
642925
1414
1515 namespace {
1616
17 // Test fixture
18 template
19 class DenseMapTest : public testing::Test {
20 protected:
21 T Map;
22
23 typename T::key_type getKey(int i = 0);
24 typename T::mapped_type getValue(int i = 0);
25 };
26
27 template <>
28 uint32_t DenseMapTest >::getKey(int i) {
29 return i;
30 }
31
32 template <>
33 uint32_t DenseMapTest >::getValue(int i) {
34 return 42 + i;
35 }
36
37 template <>
38 uint32_t *DenseMapTest >::getKey(int i) {
17 uint32_t getTestKey(int i, uint32_t *) { return i; }
18 uint32_t getTestValue(int i, uint32_t *) { return 42 + i; }
19
20 uint32_t *getTestKey(int i, uint32_t **) {
3921 static uint32_t dummy_arr1[8192];
4022 assert(i < 8192 && "Only support 8192 dummy keys.");
4123 return &dummy_arr1[i];
4224 }
43
44 template <>
45 uint32_t *DenseMapTest >::getValue(int i) {
46 static uint32_t dummy_arr2[8192];
47 assert(i < 8192 && "Only support 8192 dummy values.");
48 return &dummy_arr2[i];
49 }
25 uint32_t *getTestValue(int i, uint32_t **) {
26 static uint32_t dummy_arr1[8192];
27 assert(i < 8192 && "Only support 8192 dummy keys.");
28 return &dummy_arr1[i];
29 }
30
31 // Test fixture, with helper functions implemented by forwarding to global
32 // function overloads selected by component types of the type parameter. This
33 // allows all of the map implementations to be tested with shared
34 // implementations of helper routines.
35 template
36 class DenseMapTest : public ::testing::Test {
37 protected:
38 T Map;
39
40 static typename T::key_type *const dummy_key_ptr;
41 static typename T::mapped_type *const dummy_value_ptr;
42
43 typename T::key_type getKey(int i = 0) {
44 return getTestKey(i, dummy_key_ptr);
45 }
46 typename T::mapped_type getValue(int i = 0) {
47 return getTestValue(i, dummy_value_ptr);
48 }
49 };
50
51 template
52 typename T::key_type *const DenseMapTest::dummy_key_ptr = 0;
53 template
54 typename T::mapped_type *const DenseMapTest::dummy_value_ptr = 0;
5055
5156 // Register these types for testing.
5257 typedef ::testing::Types,
53 DenseMap > DenseMapTestTypes;
58 DenseMap,
59 SmallDenseMap,
60 SmallDenseMap
61 > DenseMapTestTypes;
5462 TYPED_TEST_CASE(DenseMapTest, DenseMapTestTypes);
5563
5664 // Empty map tests