1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-06-08 19:48:20 +02:00
Beef/BeefySysLib/util/HashSet.h
2024-12-29 11:02:17 -08:00

656 lines
13 KiB
C++

#pragma once
#include "Array.h"
#include <unordered_map>
NS_BF_BEGIN;
template <typename TKey, typename TAlloc = AllocatorCLib >
class HashSet : public TAlloc
{
public:
typedef int int_cosize;
typedef TKey key_type;
public:
struct Entry
{
typename std::aligned_storage<sizeof(TKey), alignof(TKey)>::type mKey;
int_cosize mNext; // Index of next entry, -1 if last
int32 mHashCode; // Lower 31 bits of hash code, -1 if unused
};
public:
int_cosize* mBuckets;
Entry* mEntries;
int_cosize mAllocSize;
int_cosize mCount;
int_cosize mFreeList;
int_cosize mFreeCount;
public:
struct iterator
{
public:
typedef std::bidirectional_iterator_tag iterator_category;
public:
HashSet* mDictionary;
int_cosize mIdx;
int_cosize mCurrentIdx;
protected:
void MoveNext()
{
while ((uintptr)mIdx < (uintptr)mDictionary->mCount)
{
if (mDictionary->mEntries[mIdx].mHashCode >= 0)
{
mCurrentIdx = mIdx;
mIdx++;
return;
}
mIdx++;
}
mIdx = mDictionary->mCount + 1;
mCurrentIdx = -1;
}
public:
iterator()
{
mDictionary = NULL;
mIdx = 0;
mCurrentIdx = -1;
}
iterator(HashSet* dict, int idx)
{
mDictionary = dict;
mIdx = idx;
mCurrentIdx = -1;
}
iterator& operator++()
{
MoveNext();
return *this;
}
iterator operator++(int)
{
auto prevVal = *this;
MoveNext();
return prevVal;
}
bool operator!=(const iterator& itr) const
{
return (itr.mDictionary != mDictionary) || (itr.mIdx != mIdx);
}
bool operator==(const iterator& itr) const
{
return (itr.mDictionary == mDictionary) && (itr.mIdx == mIdx);
}
TKey& operator*()
{
return *(TKey*)&mDictionary->mEntries[mCurrentIdx].mKey;
}
TKey* operator->()
{
return (TKey*)&mDictionary->mEntries[mCurrentIdx].mKey;
}
};
struct HashSelector
{
HashSet* mDictionary;
int32 mHashCode;
HashSelector(HashSet* dictionary, int32 hashCode)
{
mDictionary = dictionary;
mHashCode = hashCode;
}
struct iterator
{
public:
typedef std::bidirectional_iterator_tag iterator_category;
public:
HashSelector* mHashSelector;
int_cosize mIdx;
int_cosize mCurrentIdx;
protected:
void MoveNext()
{
auto dictionary = mHashSelector->mDictionary;
while (mIdx >= 0)
{
if (dictionary->mEntries[mIdx].mHashCode == mHashSelector->mHashCode)
{
mCurrentIdx = mIdx;
mIdx = dictionary->mEntries[mIdx].mNext;
return;
}
else
{
mIdx = dictionary->mEntries[mIdx].mNext;
}
}
mIdx = -1;
mCurrentIdx = -1;
}
public:
iterator()
{
mHashSelector = NULL;
mIdx = 0;
mCurrentIdx = -1;
}
iterator(HashSelector* hashSelector, int idx)
{
mHashSelector = hashSelector;
mIdx = idx;
mCurrentIdx = -1;
}
iterator& operator++()
{
MoveNext();
return *this;
}
iterator operator++(int)
{
auto prevVal = *this;
MoveNext();
return prevVal;
}
/*iterator& operator--()
{
mPtr--;
return *this;
}
iterator operator--(int)
{
auto prevVal = *this;
mPtr--;
return prevVal;
}*/
bool operator!=(const iterator& itr) const
{
return (itr.mHashSelector != mHashSelector) || (itr.mCurrentIdx != mCurrentIdx );
}
bool operator==(const iterator& itr) const
{
return (itr.mHashSelector == mHashSelector) && (itr.mCurrentIdx == mCurrentIdx );
}
TKey& operator*()
{
return *(TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey;
}
TKey* operator->()
{
return (TKey*)&mHashSelector->mDictionary->mEntries[mCurrentIdx].mKey;
}
};
iterator begin()
{
int_cosize hashCode = (int_cosize)(mHashCode & 0x7FFFFFFF);
iterator itr(this, mDictionary->mBuckets[hashCode % mDictionary->mAllocSize]);
++itr;
return itr;
}
iterator end()
{
return iterator(this, -1);
}
};
private:
int_cosize GetPrimeish(int_cosize min)
{
// This is a minimal effort to help address-aligned dataa
return (min | 1);
}
int_cosize ExpandSize(int_cosize oldSize)
{
int_cosize newSize = 2 * oldSize;
// Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
// Note that this check works even when mAllocSize overflowed thanks to the (uint) cast
/*if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
{
Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
return MaxPrimeArrayLength;
}*/
return GetPrimeish(newSize);
}
void Resize()
{
Resize(ExpandSize(mCount), false);
}
void Resize(intptr newSize, bool forceNewHashCodes)
{
BF_ASSERT(newSize >= mAllocSize);
Entry* newEntries;
int_cosize* newBuckets;
AllocData(newSize, newEntries, newBuckets);
for (int_cosize i = 0; i < newSize; i++)
newBuckets[i] = -1;
for (int i = 0; i < mCount; i++)
{
auto& newEntry = newEntries[i];
auto& oldEntry = mEntries[i];
newEntry.mHashCode = oldEntry.mHashCode;
newEntry.mNext = oldEntry.mNext;
new (&newEntry.mKey) TKey(std::move(*(TKey*)&oldEntry.mKey));
}
for (int i = mCount; i < newSize; i++)
{
newEntries[i].mHashCode = -1;
}
if (forceNewHashCodes)
{
for (int_cosize i = 0; i < mCount; i++)
{
if (newEntries[i].mHashCode != -1)
{
newEntries[i].mHashCode = (int_cosize)BeefHash<TKey>()(*(TKey*)&newEntries[i].mKey) & 0x7FFFFFFF;
}
}
}
for (int_cosize i = 0; i < mCount; i++)
{
if (newEntries[i].mHashCode >= 0)
{
int_cosize bucket = (int_cosize)(newEntries[i].mHashCode % newSize);
newEntries[i].mNext = newBuckets[bucket];
newBuckets[bucket] = i;
}
}
DeleteData();
mBuckets = newBuckets;
mEntries = newEntries;
mAllocSize = (int_cosize)newSize;
}
int FindEntry(const TKey& key)
{
if (mBuckets != NULL)
{
int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext)
{
if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i;
}
}
return -1;
}
template <typename TOther>
int FindEntryWith(const TOther& key)
{
if (mBuckets != NULL)
{
int_cosize hashCode = (int_cosize)BeefHash<TOther>()(key) & 0x7FFFFFFF;
for (int_cosize i = mBuckets[hashCode % mAllocSize]; i >= 0; i = mEntries[i].mNext)
{
if (mEntries[i].mHashCode == hashCode && (*(TKey*)&mEntries[i].mKey == key)) return i;
}
}
return -1;
}
void Initialize(intptr capacity)
{
int_cosize size = GetPrimeish((int_cosize)capacity);
AllocData(size, mEntries, mBuckets);
mAllocSize = size;
for (int_cosize i = 0; i < (int_cosize)mAllocSize; i++) mBuckets[i] = -1;
mFreeList = -1;
}
bool Insert(const TKey& key, bool add, TKey** keyPtr)
{
if (mBuckets == NULL) Initialize(0);
int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
int_cosize targetBucket = hashCode % (int_cosize)mAllocSize;
for (int_cosize i = mBuckets[targetBucket]; i >= 0; i = mEntries[i].mNext)
{
if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key))
{
if (add)
{
BF_FATAL("Duplicate key");
//ThrowUnimplemented();
//ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
}
//entries[i].value = value;
//mVersion++;
if (keyPtr != NULL)
*keyPtr = (TKey*)&mEntries[i].mKey;
return false;
}
}
int_cosize index;
if (mFreeCount > 0)
{
index = mFreeList;
mFreeList = mEntries[index].mNext;
mFreeCount--;
}
else
{
if (mCount == mAllocSize)
{
Resize();
targetBucket = hashCode % (int_cosize)mAllocSize;
}
index = mCount;
mCount++;
}
mEntries[index].mHashCode = hashCode;
mEntries[index].mNext = mBuckets[targetBucket];
new (&mEntries[index].mKey) TKey(key);
mBuckets[targetBucket] = index;
//mVersion++;
if (keyPtr != NULL)
*keyPtr = (TKey*)&mEntries[index].mKey;
return true;
}
void RemoveIdx(int_cosize bucket, int_cosize i, int_cosize last)
{
if (last < 0)
{
mBuckets[bucket] = mEntries[i].mNext;
}
else
{
mEntries[last].mNext = mEntries[i].mNext;
}
mEntries[i].mHashCode = -1;
mEntries[i].mNext = mFreeList;
((TKey*)&mEntries[i].mKey)->~TKey();
mFreeList = i;
mFreeCount++;
}
public:
HashSet()
{
mBuckets = NULL;
mEntries = NULL;
mAllocSize = 0;
mCount = 0;
mFreeList = 0;
mFreeCount = 0;
}
HashSet(const HashSet& val)
{
mAllocSize = val.mAllocSize;
mCount = val.mCount;
if (mAllocSize == 0)
{
mBuckets = NULL;
mEntries = NULL;
}
else
{
AllocData(mAllocSize, mEntries, mBuckets);
for (int_cosize i = 0; i < mAllocSize; i++)
mBuckets[i] = val.mBuckets[i];
for (int i = 0; i < mCount; i++)
{
auto& newEntry = mEntries[i];
auto& oldEntry = val.mEntries[i];
newEntry.mHashCode = oldEntry.mHashCode;
newEntry.mNext = oldEntry.mNext;
new (&newEntry.mKey) TKey(*(TKey*)&oldEntry.mKey);
}
for (int i = mCount; i < mAllocSize; i++)
{
mEntries[i].mHashCode = -1;
}
}
mFreeCount = val.mFreeCount;
mFreeList = val.mFreeList;
}
HashSet(HashSet&& val)
{
mAllocSize = val.mAllocSize;
mCount = val.mCount;
mBuckets = val.mBuckets;
mEntries = val.mEntries;
mFreeCount = val.mFreeCount;
mFreeList = val.mFreeList;
}
void AllocData(intptr size, Entry*& outEntries, int_cosize*& outBuckets)
{
uint8* data = (uint8*)this->rawAllocate(size * (sizeof(Entry) + sizeof(int_cosize)));
outEntries = (Entry*)data;
outBuckets = (int_cosize*)(data + size * sizeof(Entry));
}
void DeleteData()
{
this->rawDeallocate(mEntries);
}
~HashSet()
{
if (!std::is_pod<TKey>::value)
{
for (int_cosize i = 0; i < mCount; i++)
{
if (mEntries[i].mHashCode != -1)
((TKey*)&mEntries[i].mKey)->~TKey();
}
}
DeleteData();
}
HashSet& operator=(const HashSet& rhs)
{
Clear();
for (auto& val : rhs)
Add(val);
return *this;
}
intptr GetCount() const
{
return mCount - mFreeCount;
}
intptr size() const
{
return mCount - mFreeCount;
}
bool IsEmpty() const
{
return mCount - mFreeCount == 0;
}
void Reserve(intptr size)
{
if (size > mAllocSize)
Resize(size, false);
}
bool Add(const TKey& key)
{
if (!Insert(key, false, NULL))
return false;
return true;
}
bool TryAdd(const TKey& key, TKey** keyPtr)
{
if (!Insert(key, false, keyPtr))
return false;
return true;
}
bool TryGet(const TKey& key, TKey** keyPtr)
{
int idx = FindEntry(key);
if (idx == -1)
return false;
*keyPtr = (TKey*)&mEntries[idx].mKey;
return true;
}
template <typename TOther>
bool TryGetWith(const TOther& key, TKey** keyPtr)
{
int idx = FindEntryWith(key);
if (idx == -1)
return false;
*keyPtr = (TKey*)&mEntries[idx].mKey;
return true;
}
bool Remove(const TKey& key)
{
if (mBuckets != NULL)
{
int_cosize hashCode = (int_cosize)BeefHash<TKey>()(key) & 0x7FFFFFFF;
int_cosize bucket = hashCode % (int_cosize)mAllocSize;
int_cosize last = -1;
for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext)
{
if ((mEntries[i].mHashCode == hashCode) && (*(TKey*)&mEntries[i].mKey == key))
{
RemoveIdx(bucket, i, last);
return true;
}
}
}
return false;
}
iterator Remove(const iterator& itr)
{
iterator nextItr = itr;
++nextItr;
auto& entry = mEntries[itr.mCurrentIdx];
BF_ASSERT(entry.mHashCode >= 0);
// We have to iterate through to mCurrentIdx so we can get 'last'
int_cosize hashCode = entry.mHashCode;
int_cosize bucket = hashCode % (int_cosize)mAllocSize;
int_cosize last = -1;
for (int_cosize i = mBuckets[bucket]; i >= 0; last = i, i = mEntries[i].mNext)
{
if ((mEntries[i].mHashCode == hashCode) && (i == itr.mCurrentIdx))
{
RemoveIdx(bucket, i, last);
return nextItr;
}
}
BF_FATAL("not found");
return nextItr;
}
void Clear()
{
if (mCount > 0)
{
for (int_cosize i = 0; i < mAllocSize; i++) mBuckets[i] = -1;
if (!std::is_pod<TKey>::value)
{
for (int_cosize i = 0; i < mCount; i++)
{
if (mEntries[i].mHashCode != -1)
((TKey*)&mEntries[i].mKey)->~TKey();
}
}
for (int_cosize i = 0; i < mCount; i++)
{
mEntries[i].mHashCode = -1;
}
mFreeList = -1;
mCount = 0;
mFreeCount = 0;
}
}
bool Contains(const TKey& key)
{
return FindEntry(key) >= 0;
}
template <typename TOther>
bool ContainsWith(const TOther& key)
{
return FindEntryWith(key) != -1;
}
// Note: there's no const_iterator
iterator begin() const
{
iterator itr((HashSet*)this, 0);
++itr;
return itr;
}
iterator end() const
{
return iterator((HashSet*)this, mCount + 1);
}
HashSelector SelectHashes(size_t hashCode)
{
return HashSelector(this, (int32)hashCode);
}
};
NS_BF_END;