#pragma once #include "Array.h" #include NS_BF_BEGIN; template class HashSet : public TAlloc { public: typedef int int_cosize; typedef TKey key_type; public: struct Entry { typename std::aligned_storage::type mKey; int_cosize mNext; // Index of next entry, -1 if last int32 mHashCode; // Lower 31 bits of hash code, -1 if unused }; public: int_cosize* mBuckets; Entry* mEntries; int_cosize mAllocSize; int_cosize mCount; int_cosize mFreeList; int_cosize mFreeCount; public: struct iterator { public: typedef std::bidirectional_iterator_tag iterator_category; public: HashSet* mDictionary; int_cosize mIdx; int_cosize mCurrentIdx; protected: void MoveNext() { while ((uintptr)mIdx < (uintptr)mDictionary->mCount) { if (mDictionary->mEntries[mIdx].mHashCode >= 0) { mCurrentIdx = mIdx; mIdx++; return; } mIdx++; } mIdx = mDictionary->mCount + 1; mCurrentIdx = -1; } public: iterator() { mDictionary = NULL; mIdx = 0; mCurrentIdx = -1; } iterator(HashSet* dict, int idx) { mDictionary = dict; mIdx = idx; mCurrentIdx = -1; } iterator& operator++() { MoveNext(); return *this; } iterator operator++(int) { auto prevVal = *this; MoveNext(); return prevVal; } bool operator!=(const iterator& itr) const { return (itr.mDictionary != mDictionary) || (itr.mIdx != mIdx); } bool operator==(const iterator& itr) const { return (itr.mDictionary == mDictionary) && (itr.mIdx == mIdx); } TKey& operator*() { return *(TKey*)&mDictionary->mEntries[mCurrentIdx].mKey; } TKey* operator->() { return (TKey*)&mDictionary->mEntries[mCurrentIdx].mKey; } }; struct HashSelector { HashSet* mDictionary; int32 mHashCode; HashSelector(HashSet* dictionary, int32 hashCode) { mDictionary = dictionary; mHashCode = hashCode; } struct iterator { public: typedef std::bidirectional_iterator_tag iterator_category; public: HashSelector* mHashSelector; int_cosize mIdx; int_cosize mCurrentIdx; protected: void MoveNext() { auto dictionary = mHashSelector->mDictionary; while (mIdx >= 0) { if (dictionary->mEntries[mIdx].mHashCode == mHashSelector->mHashCode) { mCurrentIdx = mIdx; mIdx = dictionary->mEntries[mIdx].mNext; return; } else { mIdx = dictionary->mEntries[mIdx].mNext; } } mIdx = -1; mCurrentIdx = -1; } public: iterator() { mHashSelector = NULL; mIdx = 0; mCurrentIdx = -1; } iterator(HashSelector* hashSelector, int idx) { mHashSelector = hashSelector; mIdx = idx; mCurrentIdx = -1; } iterator& operator++() { MoveNext(); return *this; } iterator operator++(int) { auto prevVal = *this; MoveNext(); return prevVal; } /*iterator& operator--() { mPtr--; return *this; } iterator operator--(int) { auto prevVal = *this; mPtr--; return prevVal; }*/ bool operator!=(const iterator& itr) const { return (itr.mHashSelector != mHashSelector) || (itr.mCurrentIdx != mCurrentIdx ); } bool operator==(const iterator& itr) const { return (itr.mHashSelector == mHashSelector) && (itr.mCurrentIdx == mCurrentIdx ); } TKey& operator*() { return *(TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey; } TKey* operator->() { return (TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey; } }; iterator begin() { int_cosize hashCode = (int_cosize)(mHashCode & 0x7FFFFFFF); iterator itr(this, mDictionary->mBuckets[hashCode % mDictionary->mAllocSize]); ++itr; return itr; } iterator end() { return iterator(this, -1); } }; private: int_cosize GetPrimeish(int_cosize min) { // This is a minimal effort to help address-aligned dataa return (min | 1); } int_cosize ExpandSize(int_cosize oldSize) { int_cosize newSize = 2 * oldSize; // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow. // Note that this check works even when mAllocSize overflowed thanks to the (uint) cast /*if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength"); return MaxPrimeArrayLength; }*/ return GetPrimeish(newSize); } void Resize() { Resize(ExpandSize(mCount), false); } void Resize(intptr newSize, bool forceNewHashCodes) { BF_ASSERT(newSize >= mAllocSize); Entry* newEntries; int_cosize* newBuckets; AllocData(newSize, newEntries, newBuckets); for (int_cosize i = 0; i < newSize; i++) newBuckets[i] = -1; for (int i = 0; i < mCount; i++) { auto& newEntry = newEntries[i]; auto& oldEntry = mEntries[i]; newEntry.mHashCode = oldEntry.mHashCode; newEntry.mNext = oldEntry.mNext; new (&newEntry.mKey) TKey(std::move(*(TKey*)&oldEntry.mKey)); } for (int i = mCount; i < newSize; i++) { newEntries[i].mHashCode = -1; } if (forceNewHashCodes) { for (int_cosize i = 0; i < mCount; i++) { if (newEntries[i].mHashCode != -1) { newEntries[i].mHashCode = (int_cosize)BeefHash()(*(TKey*)&newEntries[i].mKey) & 0x7FFFFFFF; } } } for (int_cosize i = 0; i < mCount; i++) { if (newEntries[i].mHashCode >= 0) { int_cosize bucket = (int_cosize)(newEntries[i].mHashCode % newSize); newEntries[i].mNext = newBuckets[bucket]; newBuckets[bucket] = i; } } DeleteData(); mBuckets = newBuckets; mEntries = newEntries; mAllocSize = (int_cosize)newSize; } int FindEntry(const TKey& key) { if (mBuckets != NULL) { int_cosize hashCode = (int_cosize)BeefHash()(key) & 0x7FFFFFFF; for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext) { if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i; } } return -1; } template int FindEntryWith(const TOther& key) { if (mBuckets != NULL) { int_cosize hashCode = (int_cosize)BeefHash()(key) & 0x7FFFFFFF; for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext) { if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i; } } return -1; } void Initialize(intptr capacity) { int_cosize size = GetPrimeish((int_cosize)capacity); AllocData(size, mEntries, mBuckets); mAllocSize = size; for (int_cosize i = 0; i < (int_cosize)mAllocSize; i++) mBuckets[i] = -1; mFreeList = -1; } bool Insert(const TKey& key, bool add, TKey** keyPtr) { if (mBuckets == NULL) Initialize(0); int_cosize hashCode = (int_cosize)BeefHash()(key) & 0x7FFFFFFF; int_cosize targetBucket = hashCode % (int_cosize)mAllocSize; for (int_cosize i = mBuckets[targetBucket]; i >= 0; i = mEntries[i].mNext) { if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key)) { if (add) { BF_FATAL("Duplicate key"); //ThrowUnimplemented(); //ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate); } //entries[i].value = value; //mVersion++; if (keyPtr != NULL) *keyPtr = (TKey*)&mEntries[i].mKey; return false; } } int_cosize index; if (mFreeCount > 0) { index = mFreeList; mFreeList = mEntries[index].mNext; mFreeCount--; } else { if (mCount == mAllocSize) { Resize(); targetBucket = hashCode % (int_cosize)mAllocSize; } index = mCount; mCount++; } mEntries[index].mHashCode = hashCode; mEntries[index].mNext = mBuckets[targetBucket]; new (&mEntries[index].mKey) TKey(key); mBuckets[targetBucket] = index; //mVersion++; if (keyPtr != NULL) *keyPtr = (TKey*)&mEntries[index].mKey; return true; } void RemoveIdx(int_cosize bucket, int_cosize i, int_cosize last) { if (last < 0) { mBuckets[bucket] = mEntries[i].mNext; } else { mEntries[last].mNext = mEntries[i].mNext; } mEntries[i].mHashCode = -1; mEntries[i].mNext = mFreeList; ((TKey*)&mEntries[i].mKey)->~TKey(); mFreeList = i; mFreeCount++; } public: HashSet() { mBuckets = NULL; mEntries = NULL; mAllocSize = 0; mCount = 0; mFreeList = 0; mFreeCount = 0; } HashSet(const HashSet& val) { mAllocSize = val.mAllocSize; mCount = val.mCount; if (mAllocSize == 0) { mBuckets = NULL; mEntries = NULL; } else { AllocData(mAllocSize, mEntries, mBuckets); for (int_cosize i = 0; i < mAllocSize; i++) mBuckets[i] = val.mBuckets[i]; for (int i = 0; i < mCount; i++) { auto& newEntry = mEntries[i]; auto& oldEntry = val.mEntries[i]; newEntry.mHashCode = oldEntry.mHashCode; newEntry.mNext = oldEntry.mNext; new (&newEntry.mKey) TKey(*(TKey*)&oldEntry.mKey); } for (int i = mCount; i < mAllocSize; i++) { mEntries[i].mHashCode = -1; } } mFreeCount = val.mFreeCount; mFreeList = val.mFreeList; } HashSet(HashSet&& val) { mAllocSize = val.mAllocSize; mCount = val.mCount; mBuckets = val.mBuckets; mEntries = val.mEntries; mFreeCount = val.mFreeCount; mFreeList = val.mFreeList; } void AllocData(intptr size, Entry*& outEntries, int_cosize*& outBuckets) { uint8* data = (uint8*)this->rawAllocate(size * (sizeof(Entry) + sizeof(int_cosize))); outEntries = (Entry*)data; outBuckets = (int_cosize*)(data + size * sizeof(Entry)); } void DeleteData() { this->rawDeallocate(mEntries); } ~HashSet() { if (!std::is_pod::value) { for (int_cosize i = 0; i < mCount; i++) { if (mEntries[i].mHashCode != -1) ((TKey*)&mEntries[i].mKey)->~TKey(); } } DeleteData(); } HashSet& operator=(const HashSet& rhs) { Clear(); for (auto& val : rhs) Add(val); return *this; } intptr GetCount() const { return mCount - mFreeCount; } intptr size() const { return mCount - mFreeCount; } bool IsEmpty() const { return mCount - mFreeCount == 0; } void Reserve(intptr size) { if (size > mAllocSize) Resize(size, false); } bool Add(const TKey& key) { if (!Insert(key, false, NULL)) return false; return true; } bool TryAdd(const TKey& key, TKey** keyPtr) { if (!Insert(key, false, keyPtr)) return false; return true; } bool TryGet(const TKey& key, TKey** keyPtr) { int idx = FindEntry(key); if (idx == -1) return false; *keyPtr = (TKey*)&mEntries[idx].mKey; return true; } template bool TryGetWith(const TOther& key, TKey** keyPtr) { int idx = FindEntryWith(key); if (idx == -1) return false; *keyPtr = (TKey*)&mEntries[idx].mKey; return true; } bool Remove(const TKey& key) { if (mBuckets != NULL) { int_cosize hashCode = (int_cosize)BeefHash()(key) & 0x7FFFFFFF; int_cosize bucket = hashCode % (int_cosize)mAllocSize; int_cosize last = -1; for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext) { if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key)) { RemoveIdx(bucket, i, last); return true; } } } return false; } iterator Remove(const iterator& itr) { iterator nextItr = itr; ++nextItr; auto& entry = mEntries[itr.mCurrentIdx]; BF_ASSERT(entry.mHashCode >= 0); // We have to iterate through to mCurrentIdx so we can get 'last' int_cosize hashCode = entry.mHashCode; int_cosize bucket = hashCode % (int_cosize)mAllocSize; int_cosize last = -1; for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext) { if ((mEntries[i].mHashCode == hashCode) && (i == itr.mCurrentIdx)) { RemoveIdx(bucket, i, last); return nextItr; } } BF_FATAL("not found"); return nextItr; } void Clear() { if (mCount > 0) { for (int_cosize i = 0; i < mAllocSize; i++) mBuckets[i] = -1; if (!std::is_pod::value) { for (int_cosize i = 0; i < mCount; i++) { if (mEntries[i].mHashCode != -1) ((TKey*)&mEntries[i].mKey)->~TKey(); } } for (int_cosize i = 0; i < mCount; i++) { mEntries[i].mHashCode = -1; } mFreeList = -1; mCount = 0; mFreeCount = 0; } } bool Contains(const TKey& key) { return FindEntry(key) >= 0; } template bool ContainsWith(const TOther& key) { return FindEntryWith(key) != -1; } // Note: there's no const_iterator iterator begin() const { iterator itr((HashSet*)this, 0); ++itr; return itr; } iterator end() const { return iterator((HashSet*)this, mCount + 1); } HashSelector SelectHashes(size_t hashCode) { return HashSelector(this, (int32)hashCode); } }; NS_BF_END;