From 159849b53a6cfae5b31953a726f3414a4589956c Mon Sep 17 00:00:00 2001 From: Butterscotch! Date: Thu, 17 Oct 2024 23:28:56 -0400 Subject: [PATCH 1/5] Handle Q2A better in API --- BingusApi/Controllers/FaqController.cs | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/BingusApi/Controllers/FaqController.cs b/BingusApi/Controllers/FaqController.cs index 43b4725..b4c18f8 100644 --- a/BingusApi/Controllers/FaqController.cs +++ b/BingusApi/Controllers/FaqController.cs @@ -1,5 +1,7 @@ +using BingusLib.Config; using BingusLib.FaqHandling; using BingusLib.HNSW; +using HNSW.Net; using Microsoft.AspNetCore.Mvc; using static BingusLib.FaqHandling.FaqHandler; @@ -16,11 +18,13 @@ public class FaqController : ControllerBase private static readonly int MaxLength = 4000; private readonly FaqHandler _faqHandler; + private readonly BingusConfig _bingusConfig; private readonly FaqConfig _faqConfig; - public FaqController(FaqHandler faqHandler, FaqConfig faqConfig) + public FaqController(FaqHandler faqHandler, BingusConfig bingusConfig, FaqConfig faqConfig) { _faqHandler = faqHandler; + _bingusConfig = bingusConfig; _faqConfig = faqConfig; } @@ -43,18 +47,28 @@ public IEnumerable Search(string question, int responseCount = // Actually query a larger set amount to reduce duplicates in the response, // but one result will never have duplicates - var results = _faqHandler.Search(question, responseCount > 1 ? SearchAmount : 1); + var searchAmount = _bingusConfig.UseQ2A + ? responseCount + : (responseCount > 1 ? SearchAmount : 1); + var results = _faqHandler.Search(question, searchAmount); + + IEnumerable, float>.KNNSearchResult> response = results; + + // Only consider duplicates if Q2Q, there will only be one for Q2A + if (!_bingusConfig.UseQ2A) + { + // Group the duplicates + // Select the highest relevance entry for each duplicate group + response = response + .GroupBy(result => GetEntry(result.Item).Answer) + .Select(groupedResults => + groupedResults.MinBy(result => result.Distance) ?? groupedResults.First() + ); + } - // Format the entry JSON - // Group the duplicates - // Select the highest relevance entry for each duplicate group // Sort the entries by relevance // Take only the requested number of results - var responses = results - .GroupBy(result => GetEntry(result.Item).Answer) - .Select(groupedResults => - groupedResults.MinBy(result => result.Distance) ?? groupedResults.First() - ) + return response .OrderByDescending(result => -result.Distance) .Take(responseCount) .Select(result => @@ -68,8 +82,7 @@ public IEnumerable Search(string question, int responseCount = Text = entry.Answer, }; }); - - return responses; + ; } [HttpGet(template: "Config", Name = "Config")] From f225d6e4d4f69986b683c03bdd2b4990e59a4e10 Mon Sep 17 00:00:00 2001 From: Butterscotch! Date: Thu, 17 Oct 2024 23:36:27 -0400 Subject: [PATCH 2/5] Fix typing --- BingusLib/FaqHandling/FaqHandler.cs | 4 ++-- BingusLib/HNSW/LazyItem.cs | 2 +- BingusLib/HNSW/LazyItemValue.cs | 2 +- BingusLib/HNSW/LazyKeyItem.cs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/BingusLib/FaqHandling/FaqHandler.cs b/BingusLib/FaqHandling/FaqHandler.cs index 49a6731..d810d44 100644 --- a/BingusLib/FaqHandling/FaqHandler.cs +++ b/BingusLib/FaqHandling/FaqHandler.cs @@ -6,9 +6,9 @@ namespace BingusLib.FaqHandling { - public record FaqHandler + public class FaqHandler { - public record FaqEntry + public record class FaqEntry { public string Title { get; set; } = ""; public string Question { get; set; } = ""; diff --git a/BingusLib/HNSW/LazyItem.cs b/BingusLib/HNSW/LazyItem.cs index ded08f9..22aee7a 100644 --- a/BingusLib/HNSW/LazyItem.cs +++ b/BingusLib/HNSW/LazyItem.cs @@ -1,6 +1,6 @@ namespace BingusLib.HNSW { - public class LazyItem : ILazyItem + public record class LazyItem : ILazyItem { private readonly Func _getValue; public TItem Value => _getValue(); diff --git a/BingusLib/HNSW/LazyItemValue.cs b/BingusLib/HNSW/LazyItemValue.cs index dce404c..36723f7 100644 --- a/BingusLib/HNSW/LazyItemValue.cs +++ b/BingusLib/HNSW/LazyItemValue.cs @@ -1,6 +1,6 @@ namespace BingusLib.HNSW { - public readonly struct LazyItemValue : ILazyItem + public readonly record struct LazyItemValue : ILazyItem { private readonly Func _getValue; public TItem Value => _getValue(); diff --git a/BingusLib/HNSW/LazyKeyItem.cs b/BingusLib/HNSW/LazyKeyItem.cs index c75884d..86aa7fc 100644 --- a/BingusLib/HNSW/LazyKeyItem.cs +++ b/BingusLib/HNSW/LazyKeyItem.cs @@ -3,7 +3,7 @@ namespace BingusLib.HNSW { - public class LazyKeyItem : LazyItem + public record class LazyKeyItem : LazyItem { private readonly TKey _key; public TKey Key => _key; From 3ec52ffa83d2ae9f2716d245e7ab9f567afb20b0 Mon Sep 17 00:00:00 2001 From: Butterscotch! Date: Fri, 18 Oct 2024 00:11:14 -0400 Subject: [PATCH 3/5] Add FaqDict for simple queries --- BingusApi/Controllers/FaqController.cs | 40 ++++++++++++++++++++++---- BingusApi/Program.cs | 2 +- BingusLib/FaqHandling/FaqDict.cs | 27 +++++++++++++++++ BingusLib/FaqHandling/FaqEntry.cs | 12 ++++++++ BingusLib/FaqHandling/FaqHandler.cs | 8 ------ 5 files changed, 74 insertions(+), 15 deletions(-) create mode 100644 BingusLib/FaqHandling/FaqDict.cs create mode 100644 BingusLib/FaqHandling/FaqEntry.cs diff --git a/BingusApi/Controllers/FaqController.cs b/BingusApi/Controllers/FaqController.cs index b4c18f8..16a6f4e 100644 --- a/BingusApi/Controllers/FaqController.cs +++ b/BingusApi/Controllers/FaqController.cs @@ -3,7 +3,6 @@ using BingusLib.HNSW; using HNSW.Net; using Microsoft.AspNetCore.Mvc; -using static BingusLib.FaqHandling.FaqHandler; namespace BingusApi.Controllers; @@ -20,12 +19,19 @@ public class FaqController : ControllerBase private readonly FaqHandler _faqHandler; private readonly BingusConfig _bingusConfig; private readonly FaqConfig _faqConfig; + private readonly FaqDict? _faqDict; - public FaqController(FaqHandler faqHandler, BingusConfig bingusConfig, FaqConfig faqConfig) + public FaqController( + FaqHandler faqHandler, + BingusConfig bingusConfig, + FaqConfig faqConfig, + FaqDict? faqDict = null + ) { _faqHandler = faqHandler; _bingusConfig = bingusConfig; _faqConfig = faqConfig; + _faqDict = faqDict; } private static FaqEntry GetEntry(ILazyItem item) @@ -52,14 +58,15 @@ public IEnumerable Search(string question, int responseCount = : (responseCount > 1 ? SearchAmount : 1); var results = _faqHandler.Search(question, searchAmount); - IEnumerable, float>.KNNSearchResult> response = results; + IEnumerable, float>.KNNSearchResult> filteredResults = + results; // Only consider duplicates if Q2Q, there will only be one for Q2A if (!_bingusConfig.UseQ2A) { // Group the duplicates // Select the highest relevance entry for each duplicate group - response = response + filteredResults = filteredResults .GroupBy(result => GetEntry(result.Item).Answer) .Select(groupedResults => groupedResults.MinBy(result => result.Distance) ?? groupedResults.First() @@ -68,7 +75,7 @@ public IEnumerable Search(string question, int responseCount = // Sort the entries by relevance // Take only the requested number of results - return response + var response = filteredResults .OrderByDescending(result => -result.Distance) .Take(responseCount) .Select(result => @@ -82,7 +89,28 @@ public IEnumerable Search(string question, int responseCount = Text = entry.Answer, }; }); - ; + + if (_faqDict != null) + { + var dictAnswer = _faqDict.Search(question); + if (dictAnswer != null) + { + response = response + .Where(result => result.Text != dictAnswer.Answer) + .Prepend( + new FaqEntryResponse() + { + Relevance = 100f, + MatchedQuestion = dictAnswer.Question, + Title = dictAnswer.Title, + Text = dictAnswer.Answer, + } + ) + .Take(responseCount); + } + } + + return response; } [HttpGet(template: "Config", Name = "Config")] diff --git a/BingusApi/Program.cs b/BingusApi/Program.cs index e98d114..723f4ae 100644 --- a/BingusApi/Program.cs +++ b/BingusApi/Program.cs @@ -62,7 +62,7 @@ string GetConfig(string fileName) ); // Initialize Bingus library dependencies -builder.Services.AddSingleton(); +builder.Services.AddHttpClient(); builder.Services.AddSingleton(sp => sp.GetRequiredService().GetSentenceEncoder(sp)); builder.Services.AddSingleton(CosineDistance.SIMDForUnits); builder.Services.AddSingleton(sp => new SeededRandom( diff --git a/BingusLib/FaqHandling/FaqDict.cs b/BingusLib/FaqHandling/FaqDict.cs new file mode 100644 index 0000000..ddaa5b2 --- /dev/null +++ b/BingusLib/FaqHandling/FaqDict.cs @@ -0,0 +1,27 @@ +namespace BingusLib.FaqHandling +{ + public class FaqDict + { + private readonly Dictionary _faqDict = []; + + public FaqDict(IEnumerable<(string title, string question, string answer)> tqaMapping) + { + foreach (var (title, question, answer) in tqaMapping) + { + _faqDict[CleanQuery(question)] = new FaqEntry() + { + Title = title, + Question = question, + Answer = answer, + }; + } + } + + private static string CleanQuery(string query) => query.Trim().ToLowerInvariant(); + + public FaqEntry? Search(string query) + { + return _faqDict.TryGetValue(CleanQuery(query), out var entry) ? entry : null; + } + } +} diff --git a/BingusLib/FaqHandling/FaqEntry.cs b/BingusLib/FaqHandling/FaqEntry.cs new file mode 100644 index 0000000..64e7f97 --- /dev/null +++ b/BingusLib/FaqHandling/FaqEntry.cs @@ -0,0 +1,12 @@ +using MathNet.Numerics.LinearAlgebra; + +namespace BingusLib.FaqHandling +{ + public record class FaqEntry + { + public string Title { get; set; } = ""; + public string Question { get; set; } = ""; + public string Answer { get; set; } = ""; + public Vector? Vector { get; set; } + } +} diff --git a/BingusLib/FaqHandling/FaqHandler.cs b/BingusLib/FaqHandling/FaqHandler.cs index d810d44..343598a 100644 --- a/BingusLib/FaqHandling/FaqHandler.cs +++ b/BingusLib/FaqHandling/FaqHandler.cs @@ -8,14 +8,6 @@ namespace BingusLib.FaqHandling { public class FaqHandler { - public record class FaqEntry - { - public string Title { get; set; } = ""; - public string Question { get; set; } = ""; - public string Answer { get; set; } = ""; - public Vector? Vector { get; set; } - } - private readonly ILogger? _logger; private readonly IEmbeddingStore? _embeddingStore; From 6a478f1396f386af7936793771afb7afb62c491a Mon Sep 17 00:00:00 2001 From: Butterscotch! Date: Fri, 18 Oct 2024 00:32:08 -0400 Subject: [PATCH 4/5] Initialize Q2Q dictionary if in Q2A mode --- BingusApi/Controllers/FaqController.cs | 9 ++------- BingusApi/Program.cs | 5 ++--- BingusLib/FaqHandling/FaqDict.cs | 3 +++ BingusLib/FaqHandling/FaqHandler.cs | 7 ++++++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/BingusApi/Controllers/FaqController.cs b/BingusApi/Controllers/FaqController.cs index 16a6f4e..d9944f3 100644 --- a/BingusApi/Controllers/FaqController.cs +++ b/BingusApi/Controllers/FaqController.cs @@ -21,17 +21,12 @@ public class FaqController : ControllerBase private readonly FaqConfig _faqConfig; private readonly FaqDict? _faqDict; - public FaqController( - FaqHandler faqHandler, - BingusConfig bingusConfig, - FaqConfig faqConfig, - FaqDict? faqDict = null - ) + public FaqController(FaqHandler faqHandler, BingusConfig bingusConfig, FaqConfig faqConfig) { _faqHandler = faqHandler; _bingusConfig = bingusConfig; _faqConfig = faqConfig; - _faqDict = faqDict; + _faqDict = bingusConfig.UseQ2A ? new FaqDict(faqConfig) : null; } private static FaqEntry GetEntry(ILazyItem item) diff --git a/BingusApi/Program.cs b/BingusApi/Program.cs index 723f4ae..103d476 100644 --- a/BingusApi/Program.cs +++ b/BingusApi/Program.cs @@ -104,9 +104,8 @@ string GetConfig(string fileName) app.MapControllers(); // Load FAQ -var useQ2A = app.Services.GetService()?.UseQ2A ?? false; -var faqConf = app.Services.GetRequiredService(); +var useQ2A = app.Services.GetRequiredService().UseQ2A; app.Services.GetRequiredService() - .AddItems(useQ2A ? faqConf.AnswerEntryEnumerator() : faqConf.QaEntryEnumerator(), useQ2A); + .AddItems(app.Services.GetRequiredService(), useQ2A); app.Run(); diff --git a/BingusLib/FaqHandling/FaqDict.cs b/BingusLib/FaqHandling/FaqDict.cs index ddaa5b2..69be6f7 100644 --- a/BingusLib/FaqHandling/FaqDict.cs +++ b/BingusLib/FaqHandling/FaqDict.cs @@ -4,6 +4,9 @@ public class FaqDict { private readonly Dictionary _faqDict = []; + public FaqDict(FaqConfig faqConfig) + : this(faqConfig.QaEntryEnumerator()) { } + public FaqDict(IEnumerable<(string title, string question, string answer)> tqaMapping) { foreach (var (title, question, answer) in tqaMapping) diff --git a/BingusLib/FaqHandling/FaqHandler.cs b/BingusLib/FaqHandling/FaqHandler.cs index 343598a..ad57fda 100644 --- a/BingusLib/FaqHandling/FaqHandler.cs +++ b/BingusLib/FaqHandling/FaqHandler.cs @@ -33,9 +33,14 @@ public FaqHandler( _hnswHandler = new(distanceFunction, randomProvider, parameters); } + public void AddItems(FaqConfig faq, bool useQ2A = true) + { + AddItems(useQ2A ? faq.AnswerEntryEnumerator() : faq.QaEntryEnumerator(), useQ2A); + } + public void AddItems( IEnumerable<(string title, string question, string answer)> tqaMapping, - bool useQ2A = false + bool useQ2A = true ) { var hnswItems = new List>(); From 50dd21e02307704a8621bfe2e80284fc775e9f56 Mon Sep 17 00:00:00 2001 From: Butterscotch! Date: Fri, 18 Oct 2024 00:44:07 -0400 Subject: [PATCH 5/5] Simplify FaqDict usage --- BingusApi/Controllers/FaqController.cs | 31 ++++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/BingusApi/Controllers/FaqController.cs b/BingusApi/Controllers/FaqController.cs index d9944f3..f2cd04f 100644 --- a/BingusApi/Controllers/FaqController.cs +++ b/BingusApi/Controllers/FaqController.cs @@ -85,24 +85,21 @@ public IEnumerable Search(string question, int responseCount = }; }); - if (_faqDict != null) + var dictAnswer = _faqDict?.Search(question); + if (dictAnswer != null) { - var dictAnswer = _faqDict.Search(question); - if (dictAnswer != null) - { - response = response - .Where(result => result.Text != dictAnswer.Answer) - .Prepend( - new FaqEntryResponse() - { - Relevance = 100f, - MatchedQuestion = dictAnswer.Question, - Title = dictAnswer.Title, - Text = dictAnswer.Answer, - } - ) - .Take(responseCount); - } + response = response + .Where(result => result.Text != dictAnswer.Answer) + .Prepend( + new FaqEntryResponse() + { + Relevance = 100f, + MatchedQuestion = dictAnswer.Question, + Title = dictAnswer.Title, + Text = dictAnswer.Answer, + } + ) + .Take(responseCount); } return response;