1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-06-09 12:02:21 +02:00

Basic fuzzy search for autocomplete

This commit is contained in:
Simon Lübeß 2021-12-08 22:08:57 +01:00
parent 195c705a46
commit f9f53eb97b
7 changed files with 347 additions and 20 deletions

View file

@ -382,6 +382,7 @@ namespace IDE.ui
public String mEntryInsert;
public String mDocumentation;
public Image mIcon;
public List<uint8> mMatchIndices;
public float Y
{
@ -401,7 +402,31 @@ namespace IDE.ui
g.Draw(mIcon, 0, 0);
g.SetFont(IDEApp.sApp.mCodeFont);
g.DrawString(mEntryDisplay, GS!(20), 0);
float offset = GS!(20);
// TODO(FUZZY): this is not unicode compatible
for(int i < mEntryDisplay.Length)
{
char8 c = mEntryDisplay[i];
if(mMatchIndices.Contains((uint8)i))
{
g.PushColor(.Blue);
}
else
{
g.PushColor(.White);
}
g.DrawString(.(&c, 1), offset, 0);
offset += IDEApp.sApp.mCodeFont.GetWidth(.(&c, 1));
g.PopColor();
}
//g.DrawString(mEntryDisplay, GS!(20), 0);
}
}
@ -603,7 +628,7 @@ namespace IDE.ui
}*/
}
public void AddEntry(StringView entryType, StringView entryDisplay, Image icon, StringView entryInsert = default, StringView documentation = default)
public void AddEntry(StringView entryType, StringView entryDisplay, Image icon, StringView entryInsert = default, StringView documentation = default, List<uint8> matchIndices = null)
{
var entryWidget = new:mAlloc EntryWidget();
entryWidget.mAutoCompleteListWidget = this;
@ -614,6 +639,9 @@ namespace IDE.ui
if (!documentation.IsEmpty)
entryWidget.mDocumentation = new:mAlloc String(documentation);
entryWidget.mIcon = icon;
// TODO(FUZZY): There may be a better way
if (matchIndices != null && !matchIndices.IsEmpty)
entryWidget.mMatchIndices = new:mAlloc List<uint8>(matchIndices.GetEnumerator());
UpdateEntry(entryWidget, mEntryList.Count);
mEntryList.Add(entryWidget);
@ -1981,9 +2009,9 @@ namespace IDE.ui
InvokeWidget oldInvokeWidget = null;
String selectString = null;
List<uint8> matchIndices = new:ScopedAlloc! .(256);
for (var entryView in info.Split('\n'))
{
Image entryIcon = null;
StringView entryType = StringView(entryView);
int tabPos = entryType.IndexOf('\t');
@ -1993,13 +2021,34 @@ namespace IDE.ui
entryDisplay = StringView(entryView, tabPos + 1);
entryType = StringView(entryType, 0, tabPos);
}
StringView matches = default;
int matchesPos = entryDisplay.IndexOf('\x02');
matchIndices.Clear();
if (matchesPos != -1)
{
matches = StringView(entryDisplay, matchesPos + 1);
entryDisplay = StringView(entryDisplay, 0, matchesPos);
for(var sub in matches.Split(','))
{
if(sub == "X")
break;
var result = int64.Parse(sub, .HexNumber);
Debug.Assert((result case .Ok(let value)) && value <= uint8.MaxValue);
// TODO(FUZZY): we could save start and length instead of single chars
matchIndices.Add((uint8)result.Value);
}
}
StringView documentation = default;
int docPos = entryDisplay.IndexOf('\x03');
int docPos = matches.IndexOf('\x03');
if (docPos != -1)
{
documentation = StringView(entryDisplay, docPos + 1);
entryDisplay = StringView(entryDisplay, 0, docPos);
documentation = StringView(matches, docPos + 1);
matches = StringView(matches, 0, docPos);
}
StringView entryInsert = default;
@ -2128,7 +2177,7 @@ namespace IDE.ui
if (!mInvokeOnly)
{
mIsFixit |= entryType == "fixit";
mAutoCompleteListWidget.AddEntry(entryType, entryDisplay, entryIcon, entryInsert, documentation);
mAutoCompleteListWidget.AddEntry(entryType, entryDisplay, entryIcon, entryInsert, documentation, matchIndices);
}
}
}

View file

@ -6,6 +6,9 @@
#include "BfFixits.h"
#include "BfResolvedTypeUtils.h"
#define FTS_FUZZY_MATCH_IMPLEMENTATION
#include "FtsFuzzyMatch.h"
#pragma warning(disable:4996)
using namespace llvm;
@ -25,16 +28,16 @@ AutoCompleteBase::~AutoCompleteBase()
Clear();
}
AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter)
AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const StringImpl& filter)
{
if ((!DoesFilterMatch(entry.mDisplay, filter.c_str())) || (entry.mNamePrefixCount < 0))
if ((!DoesFilterMatch(entry.mDisplay, filter.c_str(), entry.mScore, entry.mMatches, sizeof(entry.mMatches))) || (entry.mNamePrefixCount < 0))
return NULL;
return AddEntry(entry);
}
AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry, const char* filter)
AutoCompleteEntry* AutoCompleteBase::AddEntry(AutoCompleteEntry& entry, const char* filter)
{
if ((!DoesFilterMatch(entry.mDisplay, filter)) || (entry.mNamePrefixCount < 0))
if ((!DoesFilterMatch(entry.mDisplay, filter, entry.mScore, entry.mMatches, sizeof(entry.mMatches))) || (entry.mNamePrefixCount < 0))
return NULL;
return AddEntry(entry);
}
@ -60,7 +63,7 @@ AutoCompleteEntry* AutoCompleteBase::AddEntry(const AutoCompleteEntry& entry)
return insertedEntry;
}
bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches)
{
if (mIsGetDefinition)
{
@ -73,12 +76,28 @@ bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
if (!mIsAutoComplete)
return false;
if (filter[0] == 0)
if (filter[0] == '\0')
{
// Kinda dirty
matches[0] = UINT8_MAX;
matches[1] = 0;
return true;
}
int filterLen = (int)strlen(filter);
int entryLen = (int)strlen(entry);
if (filterLen > entryLen)
{
// Kinda dirty
matches[0] = UINT8_MAX;
matches[1] = 0;
return false;
}
// TODO: also do matches (but probably optimize them)
return fts::fuzzy_match(filter, entry, score, matches, maxMatches);
/*
bool hasUnderscore = false;
bool checkInitials = filterLen > 1;
for (int i = 0; i < (int)filterLen; i++)
@ -126,6 +145,7 @@ bool AutoCompleteBase::DoesFilterMatch(const char* entry, const char* filter)
return false;
*(initialStrP++) = 0;
return strnicmp(filter, initialStr, filterLen) == 0;
*/
}
void AutoCompleteBase::Clear()
@ -550,7 +570,9 @@ void BfAutoComplete::AddTypeDef(BfTypeDef* typeDef, const StringImpl& filter, bo
return;
}
if (!DoesFilterMatch(name.c_str(), filter.c_str()))
int score;
uint8 matches[256];
if (!DoesFilterMatch(name.c_str(), filter.c_str(), score, matches, sizeof(matches)))
return;
auto type = mModule->ResolveTypeDef(typeDef, BfPopulateType_Declaration);
@ -1128,8 +1150,10 @@ void BfAutoComplete::AddExtensionMethods(BfTypeInstance* targetType, BfTypeInsta
if (methodInstance == NULL)
continue;
int score;
uint8 matches[256];
// Do filter match first- may be cheaper than generic validation
if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str()))
if (!DoesFilterMatch(methodDef->mName.c_str(), filter.c_str(), score, matches, sizeof(matches)))
continue;
auto thisType = methodInstance->GetParamType(0);

View file

@ -16,6 +16,8 @@ public:
const char* mDisplay;
const char* mDocumentation;
int8 mNamePrefixCount;
int mScore;
uint8 mMatches[256];
public:
AutoCompleteEntry()
@ -29,6 +31,7 @@ public:
mDisplay = display;
mDocumentation = NULL;
mNamePrefixCount = 0;
mScore = 0;
}
AutoCompleteEntry(const char* entryType, const StringImpl& display)
@ -37,6 +40,7 @@ public:
mDisplay = display.c_str();
mDocumentation = NULL;
mNamePrefixCount = 0;
mScore = 0;
}
AutoCompleteEntry(const char* entryType, const StringImpl& display, int namePrefixCount)
@ -45,6 +49,7 @@ public:
mDisplay = display.c_str();
mDocumentation = NULL;
mNamePrefixCount = (int8)namePrefixCount;
mScore = 0;
}
bool operator==(const AutoCompleteEntry& other) const
@ -100,9 +105,9 @@ public:
int mInsertStartIdx;
int mInsertEndIdx;
bool DoesFilterMatch(const char* entry, const char* filter);
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const StringImpl& filter);
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry, const char* filter);
bool DoesFilterMatch(const char* entry, const char* filter, int& score, uint8* matches, int maxMatches);
AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const StringImpl& filter);
AutoCompleteEntry* AddEntry(AutoCompleteEntry& entry, const char* filter);
AutoCompleteEntry* AddEntry(const AutoCompleteEntry& entry);
AutoCompleteBase();

View file

@ -8007,7 +8007,9 @@ void BfCompiler::GenerateAutocompleteInfo()
}
std::sort(entries.begin(), entries.end(), [](AutoCompleteEntry* lhs, AutoCompleteEntry* rhs)
{
return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
// TODO(FUZZY): SORT BY Score
return lhs->mScore > rhs->mScore;
//return stricmp(lhs->mDisplay, rhs->mDisplay) < 0;
});
String docString;
@ -8022,6 +8024,28 @@ void BfCompiler::GenerateAutocompleteInfo()
autoCompleteResultString += '@';
autoCompleteResultString += String(entry->mDisplay);
// TODO(FUZZY): OUTPUT
// TODO(FUZZY): this is not really efficient
autoCompleteResultString += "\x02";
for (int i = 0; i < 256; i++)
{
int match = entry->mMatches[i];
// no more matches after this
if (match == 0 && i != 0)
break;
// Need max 3 chars (largest Hex (FF) + '\0')
char buffer[3];
_itoa_s(match, buffer, 16);
autoCompleteResultString += String(buffer);
autoCompleteResultString += ",";
}
autoCompleteResultString += "X";
if (entry->mDocumentation != NULL)
{
autoCompleteResultString += '\x03';

View file

@ -0,0 +1,223 @@
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
//
// VERSION
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
// 0.1.0 (2016-03-28) Initial release
//
// AUTHOR
// Forrest Smith
//
// NOTES
// Compiling
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
//
// fuzzy_match_simple(...)
// Returns true if each character in pattern is found sequentially within str
//
// fuzzy_match(...)
// Returns true if pattern is found AND calculates a score.
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H
#include <cstdint> // uint8_t
#include <ctype.h> // ::tolower, ::toupper
#include <cstring> // memcpy
#include <cstdio>
// Public interface
namespace fts {
static bool fuzzy_match_simple(char const* pattern, char const* str);
static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
}
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
namespace fts {
// Forward declarations for "private" implementation
namespace fuzzy_internal {
static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
int& recursionCount, int recursionLimit);
}
// Public interface
static bool fuzzy_match_simple(char const* pattern, char const* str) {
while (*pattern != '\0' && *str != '\0') {
if (tolower(*pattern) == tolower(*str))
++pattern;
++str;
}
return *pattern == '\0' ? true : false;
}
static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
uint8_t matches[256];
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
}
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
int recursionCount = 0;
int recursionLimit = 10;
return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
}
// Private implementation
static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
int nextMatch, int& recursionCount, int recursionLimit)
{
// Count recursions
++recursionCount;
if (recursionCount >= recursionLimit)
return false;
// Detect end of strings
if (*pattern == '\0' || *str == '\0')
return false;
// Recursion params
bool recursiveMatch = false;
uint8_t bestRecursiveMatches[256];
int bestRecursiveScore = 0;
// Loop through pattern and str looking for a match
bool first_match = true;
while (*pattern != '\0' && *str != '\0') {
// Found match
if (tolower(*pattern) == tolower(*str)) {
// Supplied matches buffer was too short
if (nextMatch >= maxMatches)
return false;
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) {
memcpy(matches, srcMatches, nextMatch);
first_match = false;
}
// Recursive call that "skips" this match
uint8_t recursiveMatches[256];
int recursiveScore;
if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
memcpy(bestRecursiveMatches, recursiveMatches, 256);
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
}
// Advance
matches[nextMatch++] = (uint8_t)(str - strBegin);
// Clear the next char so that we know which match is the last one
matches[nextMatch + 1] = 0;
++pattern;
}
++str;
}
// Determine if full pattern was matched
bool matched = *pattern == '\0' ? true : false;
// Calculate score
if (matched) {
const int sequential_bonus = 15; // bonus for adjacent matches
const int separator_bonus = 30; // bonus if match occurs after a separator
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
const int first_letter_bonus = 15; // bonus if the first letter is matched
const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Iterate str to end
while (*str != '\0')
++str;
// Initialize score
outScore = 100;
// Apply leading letter penalty
int penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
outScore += penalty;
// Apply unmatched penalty
int unmatched = (int)(str - strBegin) - nextMatch;
outScore += unmatched_letter_penalty * unmatched;
// Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i) {
uint8_t currIdx = matches[i];
if (i > 0) {
uint8_t prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1))
outScore += sequential_bonus;
}
// Check for bonuses based on neighbor character value
if (currIdx > 0) {
// Camel case
char neighbor = strBegin[currIdx - 1];
char curr = strBegin[currIdx];
if (::islower(neighbor) && ::isupper(curr))
outScore += camel_bonus;
// Separator
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
outScore += separator_bonus;
}
else {
// First letter
outScore += first_letter_bonus;
}
}
}
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
// Recursive score is better than "this"
memcpy(matches, bestRecursiveMatches, maxMatches);
outScore = bestRecursiveScore;
return true;
}
else if (matched) {
// "this" score is better than recursive
return true;
}
else {
// no match
return false;
}
}
} // namespace fts
#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
#endif // FTS_FUZZY_MATCH_H

View file

@ -400,6 +400,7 @@
<ClInclude Include="Compiler\BfUtil.h" />
<ClInclude Include="Compiler\BfVarDeclChecker.h" />
<ClInclude Include="Compiler\CeMachine.h" />
<ClInclude Include="Compiler\FtsFuzzyMatch.h" />
<ClInclude Include="Compiler\MemReporter.h" />
<ClInclude Include="DbgMiniDump.h" />
<ClInclude Include="Debugger.h" />

View file

@ -399,5 +399,6 @@
<ClInclude Include="Compiler\CeMachine.h">
<Filter>Compiler</Filter>
</ClInclude>
<ClInclude Include="Compiler\FtsFuzzyMatch.h" />
</ItemGroup>
</Project>