diff --git a/BingusApi/Controllers/FaqController.cs b/BingusApi/Controllers/FaqController.cs index 43b4725..f2cd04f 100644 --- a/BingusApi/Controllers/FaqController.cs +++ b/BingusApi/Controllers/FaqController.cs @@ -1,7 +1,8 @@ +using BingusLib.Config; using BingusLib.FaqHandling; using BingusLib.HNSW; +using HNSW.Net; using Microsoft.AspNetCore.Mvc; -using static BingusLib.FaqHandling.FaqHandler; namespace BingusApi.Controllers; @@ -16,12 +17,16 @@ public class FaqController : ControllerBase private static readonly int MaxLength = 4000; private readonly FaqHandler _faqHandler; + private readonly BingusConfig _bingusConfig; private readonly FaqConfig _faqConfig; + private readonly FaqDict? _faqDict; - public FaqController(FaqHandler faqHandler, FaqConfig faqConfig) + public FaqController(FaqHandler faqHandler, BingusConfig bingusConfig, FaqConfig faqConfig) { _faqHandler = faqHandler; + _bingusConfig = bingusConfig; _faqConfig = faqConfig; + _faqDict = bingusConfig.UseQ2A ? new FaqDict(faqConfig) : null; } private static FaqEntry GetEntry(ILazyItem item) @@ -43,18 +48,29 @@ public IEnumerable Search(string question, int responseCount = // Actually query a larger set amount to reduce duplicates in the response, // but one result will never have duplicates - var results = _faqHandler.Search(question, responseCount > 1 ? SearchAmount : 1); + var searchAmount = _bingusConfig.UseQ2A + ? responseCount + : (responseCount > 1 ? SearchAmount : 1); + var results = _faqHandler.Search(question, searchAmount); + + IEnumerable, float>.KNNSearchResult> filteredResults = + results; + + // Only consider duplicates if Q2Q, there will only be one for Q2A + if (!_bingusConfig.UseQ2A) + { + // Group the duplicates + // Select the highest relevance entry for each duplicate group + filteredResults = filteredResults + .GroupBy(result => GetEntry(result.Item).Answer) + .Select(groupedResults => + groupedResults.MinBy(result => result.Distance) ?? groupedResults.First() + ); + } - // Format the entry JSON - // Group the duplicates - // Select the highest relevance entry for each duplicate group // Sort the entries by relevance // Take only the requested number of results - var responses = results - .GroupBy(result => GetEntry(result.Item).Answer) - .Select(groupedResults => - groupedResults.MinBy(result => result.Distance) ?? groupedResults.First() - ) + var response = filteredResults .OrderByDescending(result => -result.Distance) .Take(responseCount) .Select(result => @@ -69,7 +85,24 @@ public IEnumerable Search(string question, int responseCount = }; }); - return responses; + var dictAnswer = _faqDict?.Search(question); + if (dictAnswer != null) + { + response = response + .Where(result => result.Text != dictAnswer.Answer) + .Prepend( + new FaqEntryResponse() + { + Relevance = 100f, + MatchedQuestion = dictAnswer.Question, + Title = dictAnswer.Title, + Text = dictAnswer.Answer, + } + ) + .Take(responseCount); + } + + return response; } [HttpGet(template: "Config", Name = "Config")] diff --git a/BingusApi/Program.cs b/BingusApi/Program.cs index e98d114..103d476 100644 --- a/BingusApi/Program.cs +++ b/BingusApi/Program.cs @@ -62,7 +62,7 @@ string GetConfig(string fileName) ); // Initialize Bingus library dependencies -builder.Services.AddSingleton(); +builder.Services.AddHttpClient(); builder.Services.AddSingleton(sp => sp.GetRequiredService().GetSentenceEncoder(sp)); builder.Services.AddSingleton(CosineDistance.SIMDForUnits); builder.Services.AddSingleton(sp => new SeededRandom( @@ -104,9 +104,8 @@ string GetConfig(string fileName) app.MapControllers(); // Load FAQ -var useQ2A = app.Services.GetService()?.UseQ2A ?? false; -var faqConf = app.Services.GetRequiredService(); +var useQ2A = app.Services.GetRequiredService().UseQ2A; app.Services.GetRequiredService() - .AddItems(useQ2A ? faqConf.AnswerEntryEnumerator() : faqConf.QaEntryEnumerator(), useQ2A); + .AddItems(app.Services.GetRequiredService(), useQ2A); app.Run(); diff --git a/BingusLib/FaqHandling/FaqDict.cs b/BingusLib/FaqHandling/FaqDict.cs new file mode 100644 index 0000000..69be6f7 --- /dev/null +++ b/BingusLib/FaqHandling/FaqDict.cs @@ -0,0 +1,30 @@ +namespace BingusLib.FaqHandling +{ + public class FaqDict + { + private readonly Dictionary _faqDict = []; + + public FaqDict(FaqConfig faqConfig) + : this(faqConfig.QaEntryEnumerator()) { } + + public FaqDict(IEnumerable<(string title, string question, string answer)> tqaMapping) + { + foreach (var (title, question, answer) in tqaMapping) + { + _faqDict[CleanQuery(question)] = new FaqEntry() + { + Title = title, + Question = question, + Answer = answer, + }; + } + } + + private static string CleanQuery(string query) => query.Trim().ToLowerInvariant(); + + public FaqEntry? Search(string query) + { + return _faqDict.TryGetValue(CleanQuery(query), out var entry) ? entry : null; + } + } +} diff --git a/BingusLib/FaqHandling/FaqEntry.cs b/BingusLib/FaqHandling/FaqEntry.cs new file mode 100644 index 0000000..64e7f97 --- /dev/null +++ b/BingusLib/FaqHandling/FaqEntry.cs @@ -0,0 +1,12 @@ +using MathNet.Numerics.LinearAlgebra; + +namespace BingusLib.FaqHandling +{ + public record class FaqEntry + { + public string Title { get; set; } = ""; + public string Question { get; set; } = ""; + public string Answer { get; set; } = ""; + public Vector? Vector { get; set; } + } +} diff --git a/BingusLib/FaqHandling/FaqHandler.cs b/BingusLib/FaqHandling/FaqHandler.cs index 49a6731..ad57fda 100644 --- a/BingusLib/FaqHandling/FaqHandler.cs +++ b/BingusLib/FaqHandling/FaqHandler.cs @@ -6,16 +6,8 @@ namespace BingusLib.FaqHandling { - public record FaqHandler + public class FaqHandler { - public record FaqEntry - { - public string Title { get; set; } = ""; - public string Question { get; set; } = ""; - public string Answer { get; set; } = ""; - public Vector? Vector { get; set; } - } - private readonly ILogger? _logger; private readonly IEmbeddingStore? _embeddingStore; @@ -41,9 +33,14 @@ public FaqHandler( _hnswHandler = new(distanceFunction, randomProvider, parameters); } + public void AddItems(FaqConfig faq, bool useQ2A = true) + { + AddItems(useQ2A ? faq.AnswerEntryEnumerator() : faq.QaEntryEnumerator(), useQ2A); + } + public void AddItems( IEnumerable<(string title, string question, string answer)> tqaMapping, - bool useQ2A = false + bool useQ2A = true ) { var hnswItems = new List>(); diff --git a/BingusLib/HNSW/LazyItem.cs b/BingusLib/HNSW/LazyItem.cs index ded08f9..22aee7a 100644 --- a/BingusLib/HNSW/LazyItem.cs +++ b/BingusLib/HNSW/LazyItem.cs @@ -1,6 +1,6 @@ namespace BingusLib.HNSW { - public class LazyItem : ILazyItem + public record class LazyItem : ILazyItem { private readonly Func _getValue; public TItem Value => _getValue(); diff --git a/BingusLib/HNSW/LazyItemValue.cs b/BingusLib/HNSW/LazyItemValue.cs index dce404c..36723f7 100644 --- a/BingusLib/HNSW/LazyItemValue.cs +++ b/BingusLib/HNSW/LazyItemValue.cs @@ -1,6 +1,6 @@ namespace BingusLib.HNSW { - public readonly struct LazyItemValue : ILazyItem + public readonly record struct LazyItemValue : ILazyItem { private readonly Func _getValue; public TItem Value => _getValue(); diff --git a/BingusLib/HNSW/LazyKeyItem.cs b/BingusLib/HNSW/LazyKeyItem.cs index c75884d..86aa7fc 100644 --- a/BingusLib/HNSW/LazyKeyItem.cs +++ b/BingusLib/HNSW/LazyKeyItem.cs @@ -3,7 +3,7 @@ namespace BingusLib.HNSW { - public class LazyKeyItem : LazyItem + public record class LazyKeyItem : LazyItem { private readonly TKey _key; public TKey Key => _key;