From 9c370ef0907179891e4f812d1a666148bd354734 Mon Sep 17 00:00:00 2001 From: Angular2guy Date: Tue, 14 Nov 2023 21:31:54 +0100 Subject: [PATCH] feat: return best matching document --- .../usecase/mapping/DocumentMapper.java | 14 ++++++++++--- .../usecase/service/DocumentService.java | 20 ++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/backend/src/main/java/ch/xxx/aidoclibchat/usecase/mapping/DocumentMapper.java b/backend/src/main/java/ch/xxx/aidoclibchat/usecase/mapping/DocumentMapper.java index 897bdfc..a4af413 100644 --- a/backend/src/main/java/ch/xxx/aidoclibchat/usecase/mapping/DocumentMapper.java +++ b/backend/src/main/java/ch/xxx/aidoclibchat/usecase/mapping/DocumentMapper.java @@ -50,17 +50,25 @@ public Document toEntity(DocumentDto dto) { } public DocumentDto toDto(Document entity) { + return this.toDto(entity, false); + } + + private DocumentDto toDto(Document entity, boolean noContent) { var dto = new DocumentDto(); - dto.setDocumentContent(entity.getDocumentContent()); + dto.setDocumentContent(noContent ? null : entity.getDocumentContent()); dto.setDocumentName(entity.getDocumentName()); dto.setDocumentType(entity.getDocumentType()); dto.setId(entity.getId()); return dto; } - + + public DocumentDto toDtoNoContent(Document entity) { + return this.toDto(entity, true); + } + public DocumentSearchDto toDto(AiResult aiResult) { var dto = new DocumentSearchDto(); - dto.setDocuments(aiResult.documents().stream().map(myDoc -> this.toDto(myDoc)).toList()); + dto.setDocuments(aiResult.documents().stream().map(myDoc -> this.toDtoNoContent(myDoc)).toList()); dto.setResultStrings(aiResult.generations().stream().map(myGen -> myGen.getText()).toList()); dto.setSearchString(aiResult.searchString()); return dto; diff --git a/backend/src/main/java/ch/xxx/aidoclibchat/usecase/service/DocumentService.java b/backend/src/main/java/ch/xxx/aidoclibchat/usecase/service/DocumentService.java index 3a15766..8388f94 100644 --- a/backend/src/main/java/ch/xxx/aidoclibchat/usecase/service/DocumentService.java +++ b/backend/src/main/java/ch/xxx/aidoclibchat/usecase/service/DocumentService.java @@ -46,6 +46,7 @@ public class DocumentService { private static final Logger LOGGER = LoggerFactory.getLogger(DocumentService.class); private static final String ID = "id"; + private static final String DISTANCE = "distance"; private final DocumentRepository documentRepository; private final DocumentVsRepository documentVsRepository; private final AiClient aiClient; @@ -83,8 +84,16 @@ record TikaDocumentAndContent(org.springframework.ai.document.Document document, public AiResult queryDocuments(String query) { var similarDocuments = this.documentVsRepository.retrieve(query); LOGGER.info("Documents: {}", similarDocuments.size()); - Message systemMessage = this.getSystemMessage(similarDocuments, - (similarDocuments.size() <= 0 ? 2000 : Math.floorDiv(2000, similarDocuments.size()))); + var mostSimilar = similarDocuments.stream() + .sorted((myDocA, myDocB) -> ((Float) myDocA.getMetadata().get(DISTANCE)) + .compareTo(((Float) myDocB.getMetadata().get(DISTANCE)))) + .findFirst(); + var documentChunks = mostSimilar.stream() + .flatMap(mySimilar -> similarDocuments.stream() + .filter(mySimilar1 -> mySimilar1.getMetadata().get(ID).equals(mySimilar.getMetadata().get(ID)))) + .toList(); + Message systemMessage = this.getSystemMessage(documentChunks, + (documentChunks.size() <= 0 ? 2000 : Math.floorDiv(2000, documentChunks.size()))); UserMessage userMessage = new UserMessage(query); Prompt prompt = new Prompt(List.of(systemMessage, userMessage)); LocalDateTime start = LocalDateTime.now(); @@ -92,9 +101,10 @@ public AiResult queryDocuments(String query) { LOGGER.info("AI response time: {}ms", ZonedDateTime.of(LocalDateTime.now(), ZoneId.systemDefault()).toInstant().toEpochMilli() - ZonedDateTime.of(start, ZoneId.systemDefault()).toInstant().toEpochMilli()); - var documents = response.getGenerations().stream().map(myGen -> myGen.getInfo().get(ID)) - .filter(myId -> (myId instanceof Long)).map(myId -> this.documentRepository.findById((Long) myId)) - .filter(Optional::isPresent).map(Optional::get).toList(); + var documents = mostSimilar.stream().map(myGen -> myGen.getMetadata().get(ID)) + .map(myId -> (myId instanceof Integer ? Integer.valueOf((Integer) myId).longValue() : (Long) myId)) + .map(myId -> this.documentRepository.findById(myId)).filter(Optional::isPresent).map(Optional::get) + .toList(); return new AiResult(query, response.getGenerations(), documents); }