mirror of
https://github.com/beefytech/Beef.git
synced 2025-07-04 23:36:00 +02:00
Merge pull request #1277 from aharabada/FuzzyAutoComplete
Fuzzy string matiching for autocomplete
This commit is contained in:
commit
634dd7e509
17 changed files with 656 additions and 97 deletions
|
@ -508,3 +508,11 @@ Path = "X86Target.h"
|
|||
[[ProjectFolder.Items]]
|
||||
Type = "Source"
|
||||
Path = "X86XmmInfo.cpp"
|
||||
|
||||
[[ProjectFolder.Items]]
|
||||
Type = "Folder"
|
||||
Name = "third_party"
|
||||
|
||||
[[ProjectFolder.Items.Items]]
|
||||
Type = "Source"
|
||||
Path = "third_party/FtsFuzzyMatch.h"
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#include "BfFixits.h"
|
||||
#include "BfResolvedTypeUtils.h"
|
||||
|
||||
#define FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
#include "../third_party/FtsFuzzyMatch.h"
|
||||
|
||||
#pragma warning(disable:4996)
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -16,6 +19,7 @@ AutoCompleteBase::AutoCompleteBase()
|
|||
{
|
||||
mIsGetDefinition = false;
|
||||
mIsAutoComplete = true;
|
||||
mDoFuzzyAutoComplete = false;
|
||||
mInsertStartIdx = -1;
|
||||
mInsertEndIdx = -1;
|
||||
}
|
||||
|
@ -25,22 +29,70 @@ AutoCompleteBase::~AutoCompleteBase()
|
|||
Clear();
|
||||
}
|
||||
|
||||
AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter)
|
||||
inline void UpdateEntryMatchindices(uint8* matches, AutoCompleteEntry& entry)
|
||||
{
|
||||
if ((!DoesFilterMatch(entry.mDisplay, filter.c_str())) || (entry.mNamePrefixCount < 0))
|
||||
return NULL;
|
||||
return AddEntry(entry);
|
||||
if (matches[0] != UINT8_MAX)
|
||||
{
|
||||
// Count entries in matches
|
||||
// Note: entry.mMatchesLength should be the amount of unicode-codepoints in the filter
|
||||
for (uint8 i = 0;; i++)
|
||||
{
|
||||
uint8 matchIndex = matches[i];
|
||||
|
||||
if ((matchIndex == 0 && i != 0) || i == UINT8_MAX)
|
||||
{
|
||||
entry.mMatchesLength = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
entry.mMatches = matches;
|
||||
}
|
||||
else
|
||||
{
|
||||
entry.mMatches = nullptr;
|
||||
entry.mMatchesLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const char* filter)
|
||||
AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const StringImpl& filter)
|
||||
{
|
||||
if ((!DoesFilterMatch(entry.mDisplay, filter)) || (entry.mNamePrefixCount < 0))
|
||||
uint8 matches[256];
|
||||
|
||||
if (!DoesFilterMatch(entry.mDisplay, filter.c_str(), entry.mScore, matches, 256) || (entry.mNamePrefixCount < 0))
|
||||
return NULL;
|
||||
return AddEntry(entry);
|
||||
|
||||
UpdateEntryMatchindices(matches, entry);
|
||||
|
||||
auto result = AddEntry(entry);
|
||||
|
||||
// Reset matches because the array will be invalid after return
|
||||
entry.mMatches = nullptr;
|
||||
entry.mMatchesLength = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const char* filter)
|
||||
{
|
||||
uint8 matches[256];
|
||||
|
||||
if (!DoesFilterMatch(entry.mDisplay, filter, entry.mScore, matches, 256) || (entry.mNamePrefixCount < 0))
|
||||
return NULL;
|
||||
|
||||
UpdateEntryMatchindices(matches, entry);
|
||||
|
||||
auto result = AddEntry(entry);
|
||||
|
||||
// Reset matches because the array will be invalid after return
|
||||
entry.mMatches = nullptr;
|
||||
entry.mMatchesLength = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry)
|
||||
{
|
||||
{
|
||||
if (mEntriesSet.mAllocSize == 0)
|
||||
{
|
||||
mEntriesSet.Reserve(128);
|
||||
|
@ -55,13 +107,16 @@ AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry)
|
|||
int size = (int)strlen(display) + 1;
|
||||
insertedEntry->mDisplay = (char*)mAlloc.AllocBytes(size);
|
||||
memcpy((char*)insertedEntry->mDisplay, display, size);
|
||||
|
||||
insertedEntry->mMatches = (uint8*)mAlloc.AllocBytes(insertedEntry->mMatchesLength);
|
||||
memcpy((char*)insertedEntry->mMatches, entry.mMatches, insertedEntry->mMatchesLength);
|
||||
}
|
||||
|
||||
return insertedEntry;
|
||||
}
|
||||
|
||||
bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
|
||||
{
|
||||
bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches)
|
||||
{
|
||||
if (mIsGetDefinition)
|
||||
{
|
||||
int entryLen = (int)strlen(entry);
|
||||
|
@ -73,59 +128,71 @@ bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
|
|||
if (!mIsAutoComplete)
|
||||
return false;
|
||||
|
||||
if (filter[0] == 0)
|
||||
matches[0] = UINT8_MAX;
|
||||
|
||||
if (filter[0] == '\0')
|
||||
return true;
|
||||
|
||||
int filterLen = (int)strlen(filter);
|
||||
int entryLen = (int)strlen(entry);
|
||||
|
||||
bool hasUnderscore = false;
|
||||
bool checkInitials = filterLen > 1;
|
||||
for (int i = 0; i < (int)filterLen; i++)
|
||||
{
|
||||
char c = filter[i];
|
||||
if (c == '_')
|
||||
hasUnderscore = true;
|
||||
else if (islower((uint8)filter[i]))
|
||||
checkInitials = false;
|
||||
}
|
||||
|
||||
if (hasUnderscore)
|
||||
return strnicmp(filter, entry, filterLen) == 0;
|
||||
|
||||
char initialStr[256];
|
||||
char* initialStrP = initialStr;
|
||||
|
||||
//String initialStr;
|
||||
bool prevWasUnderscore = false;
|
||||
|
||||
for (int entryIdx = 0; entryIdx < entryLen; entryIdx++)
|
||||
{
|
||||
char entryC = entry[entryIdx];
|
||||
|
||||
if (entryC == '_')
|
||||
{
|
||||
prevWasUnderscore = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((entryIdx == 0) || (prevWasUnderscore) || (isupper((uint8)entryC) || (isdigit((uint8)entryC))))
|
||||
{
|
||||
if (strnicmp(filter, entry + entryIdx, filterLen) == 0)
|
||||
return true;
|
||||
if (checkInitials)
|
||||
*(initialStrP++) = entryC;
|
||||
}
|
||||
prevWasUnderscore = false;
|
||||
|
||||
if (filterLen == 1)
|
||||
break; // Don't check inners for single-character case
|
||||
}
|
||||
|
||||
if (!checkInitials)
|
||||
if (filterLen > entryLen)
|
||||
return false;
|
||||
*(initialStrP++) = 0;
|
||||
return strnicmp(filter, initialStr, filterLen) == 0;
|
||||
|
||||
if (mDoFuzzyAutoComplete)
|
||||
{
|
||||
return fts::fuzzy_match(filter, entry, score, matches, maxMatches);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool hasUnderscore = false;
|
||||
bool checkInitials = filterLen > 1;
|
||||
for (int i = 0; i < (int)filterLen; i++)
|
||||
{
|
||||
char c = filter[i];
|
||||
if (c == '_')
|
||||
hasUnderscore = true;
|
||||
else if (islower((uint8)filter[i]))
|
||||
checkInitials = false;
|
||||
}
|
||||
|
||||
if (hasUnderscore)
|
||||
return strnicmp(filter, entry, filterLen) == 0;
|
||||
|
||||
char initialStr[256];
|
||||
char* initialStrP = initialStr;
|
||||
|
||||
//String initialStr;
|
||||
bool prevWasUnderscore = false;
|
||||
|
||||
for (int entryIdx = 0; entryIdx < entryLen; entryIdx++)
|
||||
{
|
||||
char entryC = entry[entryIdx];
|
||||
|
||||
if (entryC == '_')
|
||||
{
|
||||
prevWasUnderscore = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((entryIdx == 0) || (prevWasUnderscore) || (isupper((uint8)entryC) || (isdigit((uint8)entryC))))
|
||||
{
|
||||
if (strnicmp(filter, entry + entryIdx, filterLen) == 0)
|
||||
return true;
|
||||
if (checkInitials)
|
||||
*(initialStrP++) = entryC;
|
||||
}
|
||||
prevWasUnderscore = false;
|
||||
|
||||
if (filterLen == 1)
|
||||
break; // Don't check inners for single-character case
|
||||
}
|
||||
|
||||
if (!checkInitials)
|
||||
return false;
|
||||
*(initialStrP++) = 0;
|
||||
return strnicmp(filter, initialStr, filterLen) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AutoCompleteBase::Clear()
|
||||
|
@ -137,7 +204,7 @@ void AutoCompleteBase::Clear()
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
BfAutoComplete::BfAutoComplete(BfResolveType resolveType)
|
||||
BfAutoComplete::BfAutoComplete(BfResolveType resolveType, bool doFuzzyAutoComplete)
|
||||
{
|
||||
mResolveType = resolveType;
|
||||
mModule = NULL;
|
||||
|
@ -154,6 +221,8 @@ BfAutoComplete::BfAutoComplete(BfResolveType resolveType)
|
|||
(resolveType == BfResolveType_GoToDefinition);
|
||||
mIsAutoComplete = (resolveType == BfResolveType_Autocomplete);
|
||||
|
||||
mDoFuzzyAutoComplete = doFuzzyAutoComplete;
|
||||
|
||||
mGetDefinitionNode = NULL;
|
||||
mShowAttributeProperties = NULL;
|
||||
mIdentifierUsed = NULL;
|
||||
|
@ -550,7 +619,9 @@ void BfAutoComplete::AddTypeDef(BfTypeDef* typeDef, const StringImpl& filter, bo
|
|||
return;
|
||||
}
|
||||
|
||||
if (!DoesFilterMatch(name.c_str(), filter.c_str()))
|
||||
int score;
|
||||
uint8 matches[256];
|
||||
if (!DoesFilterMatch(name.c_str(), filter.c_str(), score, matches, sizeof(matches)))
|
||||
return;
|
||||
|
||||
auto type = mModule->ResolveTypeDef(typeDef, BfPopulateType_Declaration);
|
||||
|
@ -1128,8 +1199,10 @@ void BfAutoComplete::AddExtensionMethods(BfTypeInstance* targetType, BfTypeInsta
|
|||
if (methodInstance == NULL)
|
||||
continue;
|
||||
|
||||
int score;
|
||||
uint8 matches[256];
|
||||
// Do filter match first- may be cheaper than generic validation
|
||||
if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str()))
|
||||
if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str(), score, matches, sizeof(matches)))
|
||||
continue;
|
||||
|
||||
auto thisType = methodInstance->GetParamType(0);
|
||||
|
|
|
@ -16,11 +16,16 @@ public:
|
|||
const char* mDisplay;
|
||||
const char* mDocumentation;
|
||||
int8 mNamePrefixCount;
|
||||
int mScore;
|
||||
uint8* mMatches;
|
||||
uint8 mMatchesLength;
|
||||
|
||||
public:
|
||||
AutoCompleteEntry()
|
||||
{
|
||||
mNamePrefixCount = 0;
|
||||
mMatches = nullptr;
|
||||
mMatchesLength = 0;
|
||||
}
|
||||
|
||||
AutoCompleteEntry(const char* entryType, const char* display)
|
||||
|
@ -29,6 +34,9 @@ public:
|
|||
mDisplay = display;
|
||||
mDocumentation = NULL;
|
||||
mNamePrefixCount = 0;
|
||||
mScore = 0;
|
||||
mMatches = nullptr;
|
||||
mMatchesLength = 0;
|
||||
}
|
||||
|
||||
AutoCompleteEntry(const char* entryType, const StringImpl& display)
|
||||
|
@ -37,6 +45,9 @@ public:
|
|||
mDisplay = display.c_str();
|
||||
mDocumentation = NULL;
|
||||
mNamePrefixCount = 0;
|
||||
mScore = 0;
|
||||
mMatches = nullptr;
|
||||
mMatchesLength = 0;
|
||||
}
|
||||
|
||||
AutoCompleteEntry(const char* entryType, const StringImpl& display, int namePrefixCount)
|
||||
|
@ -45,8 +56,11 @@ public:
|
|||
mDisplay = display.c_str();
|
||||
mDocumentation = NULL;
|
||||
mNamePrefixCount = (int8)namePrefixCount;
|
||||
mScore = 0;
|
||||
mMatches = nullptr;
|
||||
mMatchesLength = 0;
|
||||
}
|
||||
|
||||
|
||||
bool operator==(const AutoCompleteEntry& other) const
|
||||
{
|
||||
return strcmp(mDisplay, other.mDisplay) == 0;
|
||||
|
@ -97,12 +111,13 @@ public:
|
|||
|
||||
bool mIsGetDefinition;
|
||||
bool mIsAutoComplete;
|
||||
bool mDoFuzzyAutoComplete;
|
||||
int mInsertStartIdx;
|
||||
int mInsertEndIdx;
|
||||
|
||||
bool DoesFilterMatch(const char* entry, const char* filter);
|
||||
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter);
|
||||
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const char* filter);
|
||||
bool DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches);
|
||||
AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const StringImpl& filter);
|
||||
AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const char* filter);
|
||||
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry);
|
||||
|
||||
AutoCompleteBase();
|
||||
|
@ -226,7 +241,7 @@ public:
|
|||
String ConstantToString(BfIRConstHolder* constHolder, BfIRValue id);
|
||||
|
||||
public:
|
||||
BfAutoComplete(BfResolveType resolveType = BfResolveType_Autocomplete);
|
||||
BfAutoComplete(BfResolveType resolveType = BfResolveType_Autocomplete, bool doFuzzyAutoComplete = false);
|
||||
~BfAutoComplete();
|
||||
|
||||
void SetModule(BfModule* module);
|
||||
|
|
|
@ -8026,9 +8026,13 @@ void BfCompiler::GenerateAutocompleteInfo()
|
|||
{
|
||||
entries.Add(&entry);
|
||||
}
|
||||
|
||||
std::sort(entries.begin(), entries.end(), [](AutoCompleteEntry* lhs, AutoCompleteEntry* rhs)
|
||||
{
|
||||
return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
|
||||
if (lhs->mScore == rhs->mScore)
|
||||
return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
|
||||
|
||||
return lhs->mScore > rhs->mScore;
|
||||
});
|
||||
|
||||
String docString;
|
||||
|
@ -8043,6 +8047,25 @@ void BfCompiler::GenerateAutocompleteInfo()
|
|||
autoCompleteResultString += '@';
|
||||
autoCompleteResultString += String(entry->mDisplay);
|
||||
|
||||
if (entry->mMatchesLength > 0)
|
||||
{
|
||||
autoCompleteResultString += "\x02";
|
||||
for (int i = 0; i < entry->mMatchesLength; i++)
|
||||
{
|
||||
int match = entry->mMatches[i];
|
||||
|
||||
// Need max 3 chars (largest Hex (FF) + '\0')
|
||||
char buffer[3];
|
||||
|
||||
_itoa_s(match, buffer, 16);
|
||||
|
||||
autoCompleteResultString += String(buffer);
|
||||
autoCompleteResultString += ",";
|
||||
}
|
||||
|
||||
autoCompleteResultString += "X";
|
||||
}
|
||||
|
||||
if (entry->mDocumentation != NULL)
|
||||
{
|
||||
autoCompleteResultString += '\x03';
|
||||
|
|
|
@ -2071,7 +2071,7 @@ void BfModule::UpdateCEEmit(CeEmitContext* ceEmitContext, BfTypeInstance* typeIn
|
|||
{
|
||||
for (int ifaceTypeId : ceEmitContext->mInterfaces)
|
||||
typeInstance->mCeTypeInfo->mPendingInterfaces.Add(ifaceTypeId);
|
||||
|
||||
|
||||
if (ceEmitContext->mEmitData.IsEmpty())
|
||||
return;
|
||||
|
||||
|
|
|
@ -3898,13 +3898,13 @@ BF_EXPORT const char* BF_CALLTYPE BfParser_GetDebugExpressionAt(BfParser* bfPars
|
|||
return outString.c_str();
|
||||
}
|
||||
|
||||
BF_EXPORT BfResolvePassData* BF_CALLTYPE BfParser_CreateResolvePassData(BfParser* bfParser, BfResolveType resolveType)
|
||||
BF_EXPORT BfResolvePassData* BF_CALLTYPE BfParser_CreateResolvePassData(BfParser* bfParser, BfResolveType resolveType, bool doFuzzyAutoComplete)
|
||||
{
|
||||
auto bfResolvePassData = new BfResolvePassData();
|
||||
bfResolvePassData->mResolveType = resolveType;
|
||||
bfResolvePassData->mParser = bfParser;
|
||||
if ((bfParser != NULL) && ((bfParser->mParserFlags & ParserFlag_Autocomplete) != 0))
|
||||
bfResolvePassData->mAutoComplete = new BfAutoComplete(resolveType);
|
||||
bfResolvePassData->mAutoComplete = new BfAutoComplete(resolveType, doFuzzyAutoComplete);
|
||||
return bfResolvePassData;
|
||||
}
|
||||
|
||||
|
|
|
@ -400,6 +400,7 @@
|
|||
<ClInclude Include="Compiler\BfUtil.h" />
|
||||
<ClInclude Include="Compiler\BfVarDeclChecker.h" />
|
||||
<ClInclude Include="Compiler\CeMachine.h" />
|
||||
<ClInclude Include="third_party\FtsFuzzyMatch.h" />
|
||||
<ClInclude Include="Compiler\MemReporter.h" />
|
||||
<ClInclude Include="DbgMiniDump.h" />
|
||||
<ClInclude Include="Debugger.h" />
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
<Filter Include="Beef">
|
||||
<UniqueIdentifier>{83b97406-2f83-49ad-bbbc-3ff70ecda6bb}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="third_party">
|
||||
<UniqueIdentifier>{d36777f2-b326-4a8c-84a3-5c2f39153f75}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Compiler\BfAst.cpp">
|
||||
|
@ -399,5 +402,8 @@
|
|||
<ClInclude Include="Compiler\CeMachine.h">
|
||||
<Filter>Compiler</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="third_party\FtsFuzzyMatch.h">
|
||||
<Filter>third_party</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -183,7 +183,7 @@ namespace Tests
|
|||
mStr.AppendF($"{name} {val}\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
interface ISerializable
|
||||
{
|
||||
void Serialize(SerializationContext ctx);
|
||||
|
|
256
IDEHelper/third_party/FtsFuzzyMatch.h
vendored
Normal file
256
IDEHelper/third_party/FtsFuzzyMatch.h
vendored
Normal file
|
@ -0,0 +1,256 @@
|
|||
// LICENSE
|
||||
//
|
||||
// This software is dual-licensed to the public domain and under the following
|
||||
// license: you are granted a perpetual, irrevocable license to copy, modify,
|
||||
// publish, and distribute this file as you see fit.
|
||||
//
|
||||
// VERSION
|
||||
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
|
||||
// 0.1.0 (2016-03-28) Initial release
|
||||
//
|
||||
// AUTHOR
|
||||
// Forrest Smith
|
||||
//
|
||||
// NOTES
|
||||
// Compiling
|
||||
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
|
||||
//
|
||||
// fuzzy_match_simple(...)
|
||||
// Returns true if each character in pattern is found sequentially within str
|
||||
//
|
||||
// fuzzy_match(...)
|
||||
// Returns true if pattern is found AND calculates a score.
|
||||
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
|
||||
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
|
||||
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
|
||||
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
|
||||
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
|
||||
|
||||
|
||||
#ifndef FTS_FUZZY_MATCH_H
|
||||
#define FTS_FUZZY_MATCH_H
|
||||
|
||||
|
||||
#include <cstdint> // uint8_t
|
||||
#include <ctype.h> // ::tolower, ::toupper
|
||||
#include <cstring> // memcpy
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "BeefySysLib/util/UTF8.h"
|
||||
#include "BeefySysLib/third_party/utf8proc/utf8proc.h"
|
||||
|
||||
// Public interface
|
||||
namespace fts {
|
||||
static bool fuzzy_match_simple(char const* pattern, char const* str);
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
|
||||
}
|
||||
|
||||
BF_EXPORT bool BF_CALLTYPE fts_fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
|
||||
|
||||
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
namespace fts {
|
||||
|
||||
// Forward declarations for "private" implementation
|
||||
namespace fuzzy_internal {
|
||||
static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
|
||||
uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
|
||||
int& recursionCount, int recursionLimit);
|
||||
}
|
||||
|
||||
// Public interface
|
||||
static bool fuzzy_match_simple(char const* pattern, char const* str) {
|
||||
while (*pattern != '\0' && *str != '\0') {
|
||||
if (tolower(*pattern) == tolower(*str))
|
||||
++pattern;
|
||||
++str;
|
||||
}
|
||||
|
||||
return *pattern == '\0' ? true : false;
|
||||
}
|
||||
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
|
||||
|
||||
uint8_t matches[256];
|
||||
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
|
||||
}
|
||||
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
|
||||
int recursionCount = 0;
|
||||
int recursionLimit = 10;
|
||||
|
||||
return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
|
||||
}
|
||||
|
||||
bool IsLower(uint32 c)
|
||||
{
|
||||
return utf8proc_category(c) == UTF8PROC_CATEGORY_LL;
|
||||
}
|
||||
|
||||
bool IsUpper(uint32 c)
|
||||
{
|
||||
return utf8proc_category(c) == UTF8PROC_CATEGORY_LU;
|
||||
}
|
||||
|
||||
// Private implementation
|
||||
static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
|
||||
const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
|
||||
int nextMatch, int& recursionCount, int recursionLimit)
|
||||
{
|
||||
// Count recursions
|
||||
++recursionCount;
|
||||
if (recursionCount >= recursionLimit)
|
||||
return false;
|
||||
|
||||
// Detect end of strings
|
||||
if (*pattern == '\0' || *str == '\0')
|
||||
return false;
|
||||
|
||||
// Recursion params
|
||||
bool recursiveMatch = false;
|
||||
uint8_t bestRecursiveMatches[256];
|
||||
int bestRecursiveScore = 0;
|
||||
|
||||
// Loop through pattern and str looking for a match
|
||||
bool first_match = true;
|
||||
while (*pattern != '\0' && *str != '\0') {
|
||||
|
||||
int patternOffset = 0;
|
||||
uint32 patternChar = Beefy::u8_nextchar((char*)pattern, &patternOffset);
|
||||
int strOffset = 0;
|
||||
uint32 strChar = Beefy::u8_nextchar((char*)str, &strOffset);
|
||||
|
||||
// TODO: tolower only works for A-Z
|
||||
// Found match
|
||||
if (utf8proc_tolower(patternChar) == utf8proc_tolower(strChar)) {
|
||||
|
||||
// Supplied matches buffer was too short
|
||||
if (nextMatch >= maxMatches)
|
||||
return false;
|
||||
|
||||
// "Copy-on-Write" srcMatches into matches
|
||||
if (first_match && srcMatches) {
|
||||
memcpy(matches, srcMatches, nextMatch);
|
||||
first_match = false;
|
||||
}
|
||||
|
||||
// Recursive call that "skips" this match
|
||||
uint8_t recursiveMatches[256];
|
||||
int recursiveScore;
|
||||
if (fuzzy_match_recursive(pattern, str + strOffset, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
|
||||
|
||||
// Pick best recursive score
|
||||
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
|
||||
memcpy(bestRecursiveMatches, recursiveMatches, 256);
|
||||
bestRecursiveScore = recursiveScore;
|
||||
}
|
||||
recursiveMatch = true;
|
||||
}
|
||||
|
||||
// Advance
|
||||
matches[nextMatch++] = (uint8_t)(str - strBegin);
|
||||
// Clear the next char so that we know which match is the last one
|
||||
matches[nextMatch + 1] = 0;
|
||||
pattern += patternOffset;
|
||||
}
|
||||
str += strOffset;
|
||||
}
|
||||
|
||||
// Determine if full pattern was matched
|
||||
bool matched = *pattern == '\0' ? true : false;
|
||||
|
||||
// Calculate score
|
||||
if (matched) {
|
||||
const int sequential_bonus = 15; // bonus for adjacent matches
|
||||
const int separator_bonus = 30; // bonus if match occurs after a separator
|
||||
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
|
||||
const int first_letter_bonus = 15; // bonus if the first letter is matched
|
||||
|
||||
const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
|
||||
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
|
||||
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
|
||||
|
||||
// Iterate str to end
|
||||
while (*str != '\0')
|
||||
++str;
|
||||
|
||||
// Initialize score
|
||||
outScore = 100;
|
||||
|
||||
// Apply leading letter penalty
|
||||
int penalty = leading_letter_penalty * matches[0];
|
||||
if (penalty < max_leading_letter_penalty)
|
||||
penalty = max_leading_letter_penalty;
|
||||
outScore += penalty;
|
||||
|
||||
// Apply unmatched penalty
|
||||
int unmatched = (int)(str - strBegin) - nextMatch;
|
||||
outScore += unmatched_letter_penalty * unmatched;
|
||||
|
||||
// Apply ordering bonuses
|
||||
for (int i = 0; i < nextMatch; ++i) {
|
||||
uint8_t currIdx = matches[i];
|
||||
|
||||
int currOffset = currIdx;
|
||||
uint32 curr = Beefy::u8_nextchar((char*)strBegin, &currOffset);
|
||||
|
||||
if (i > 0) {
|
||||
uint8_t prevIdx = matches[i - 1];
|
||||
|
||||
int offsetPrevidx = prevIdx;
|
||||
Beefy::u8_inc((char*)strBegin, &offsetPrevidx);
|
||||
|
||||
// Sequential
|
||||
if (currIdx == offsetPrevidx)
|
||||
outScore += sequential_bonus;
|
||||
}
|
||||
|
||||
// Check for bonuses based on neighbor character value
|
||||
if (currIdx > 0) {
|
||||
int neighborOffset = currIdx;
|
||||
Beefy::u8_dec((char*)strBegin, &neighborOffset);
|
||||
uint32 neighbor = Beefy::u8_nextchar((char*)strBegin, &neighborOffset);
|
||||
|
||||
// Camel case
|
||||
if (IsLower(neighbor) && IsUpper(curr))
|
||||
outScore += camel_bonus;
|
||||
|
||||
// Separator
|
||||
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
|
||||
if (neighborSeparator)
|
||||
outScore += separator_bonus;
|
||||
}
|
||||
else {
|
||||
// First letter
|
||||
outScore += first_letter_bonus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return best result
|
||||
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
|
||||
// Recursive score is better than "this"
|
||||
memcpy(matches, bestRecursiveMatches, maxMatches);
|
||||
outScore = bestRecursiveScore;
|
||||
return true;
|
||||
}
|
||||
else if (matched) {
|
||||
// "this" score is better than recursive
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// no match
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace fts
|
||||
|
||||
BF_EXPORT bool BF_CALLTYPE fts_fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches)
|
||||
{
|
||||
return fts::fuzzy_match(pattern, str, outScore, matches, maxMatches);
|
||||
}
|
||||
|
||||
#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
|
||||
#endif // FTS_FUZZY_MATCH_H
|
Loading…
Add table
Add a link
Reference in a new issue