diff --git a/src/modules/aiTools/ai-tools.service.ts b/src/modules/aiTools/ai-tools.service.ts index 1dde8c8..75612fa 100644 --- a/src/modules/aiTools/ai-tools.service.ts +++ b/src/modules/aiTools/ai-tools.service.ts @@ -247,8 +247,10 @@ export class AiToolsService { }); await unlink(csvFilePath) fs.writeFileSync(csvFilePath, response.data); + return response.status }catch(error){ console.log('error', error) + return error.response.status } } diff --git a/src/modules/embeddings/embeddings.controller.ts b/src/modules/embeddings/embeddings.controller.ts index a37eaed..7745c02 100644 --- a/src/modules/embeddings/embeddings.controller.ts +++ b/src/modules/embeddings/embeddings.controller.ts @@ -44,21 +44,21 @@ export class EmbeddingsController { } } - @Post() - async createOrUpdate( - @Body() createFeedbackDto: CreateDocumentDto | CreateDocumentDto[] - ): Promise { - if (!Array.isArray(createFeedbackDto)) { - createFeedbackDto = [createFeedbackDto]; - } - return this.embeddingsService.createOrUpdate(createFeedbackDto); - } + // @Post() + // async createOrUpdate( + // @Body() createFeedbackDto: CreateDocumentDto | CreateDocumentDto[] + // ): Promise { + // if (!Array.isArray(createFeedbackDto)) { + // createFeedbackDto = [createFeedbackDto]; + // } + // return this.embeddingsService.createOrUpdate(createFeedbackDto); + // } @Post("/searchSimilar") async findByCriteria( @Body() searchQueryDto: SearchQueryDto ): Promise { - return this.embeddingsService.findByCriteria(searchQueryDto); + return this.embeddingsService.findByCriteria2(searchQueryDto); } @Delete(':id') diff --git a/src/modules/embeddings/embeddings.dto.ts b/src/modules/embeddings/embeddings.dto.ts index 45bc4b2..47e8090 100644 --- a/src/modules/embeddings/embeddings.dto.ts +++ b/src/modules/embeddings/embeddings.dto.ts @@ -26,6 +26,9 @@ export class SearchQueryDto { message: "Max matched documents need to be defined to limit search results", }) matchCount: number; + + @IsOptional() + searchVia?: string } class Pagination { diff --git a/src/modules/embeddings/embeddings.service.ts b/src/modules/embeddings/embeddings.service.ts index e389a06..ba6864c 100644 --- a/src/modules/embeddings/embeddings.service.ts +++ b/src/modules/embeddings/embeddings.service.ts @@ -26,7 +26,7 @@ export class EmbeddingsService { async findAll(page: number, perPage: number) : Promise{ // using raw sql inorder to get embeddings. const documents:DocumentWithEmbedding[] = await this.prisma.$queryRaw` - SELECT id, content, tags, CAST(embedding AS TEXT) + SELECT id,"chunkId", content, heading, summary, tags, CAST("contentEmbedding" AS TEXT), CAST("headingEmbedding" AS TEXT), CAST("summaryEmbedding" AS TEXT) FROM document ORDER BY id OFFSET ${(page - 1) * perPage} @@ -132,10 +132,45 @@ export class EmbeddingsService { return results; } + async findByCriteria2(searchQueryDto: SearchQueryDto, searchVia: string = 'summaryEmbedding', type = ""): Promise { + const embedding: any = ( + await this.aiToolsService.getEmbedding(searchQueryDto.query) + )[0]; + let query_embedding = `[${embedding + .map((x) => `${x}`) + .join(",")}]` + let similarity_threshold = searchQueryDto.similarityThreshold + let match_count = searchQueryDto.matchCount + + const results = await this.prisma + .$queryRawUnsafe(` + SELECT + document.id as id, + document.content AS content, + document.heading AS heading, + document.summary AS summary, + document.tags as tags, + 1 - (document."${searchVia}" <=> '${query_embedding}') as similarity, + document."metaData" as "metaData", + document."chunkId" as "chunkId", + document.type as type + FROM + document + WHERE + 1 - (document."${searchVia}" <=> '${query_embedding}') > ${similarity_threshold} + ${type? `AND document.type = '${type}'`:''} + ORDER BY + document."${searchVia}" <=> '${query_embedding}' + LIMIT ${match_count};` + ); + + return results; + } + async findOne(id: number): Promise { try { const document:DocumentWithEmbedding[] = await this.prisma.$queryRaw` - SELECT id, content, tags, CAST(embedding AS TEXT) + SELECT id, content, heading, summary, tags, CAST("contentEmbedding" AS TEXT), CAST("headingEmbedding" AS TEXT), CAST("summaryEmbedding" AS TEXT) FROM document where id = ${parseInt(`${id}`)} `; return document[0]; @@ -156,20 +191,30 @@ export class EmbeddingsService { } async getWithFilters(getDocumentsDto: GetDocumentsDto): Promise { + const searchVia = getDocumentsDto.filter.searchVia || 'contentEmbedding' const page = getDocumentsDto.pagination.page || 1; const perPage = getDocumentsDto.pagination.perPage || 10; const embedding: any = ( await this.aiToolsService.getEmbedding(getDocumentsDto.filter.query) )[0]; + let query_embedding = `[${embedding + .map((x) => `${x}`) + .join(",")}]` + let similarity_threshold = getDocumentsDto.filter.similarityThreshold + let match_count = getDocumentsDto.filter.matchCount let result = await this.prisma .$queryRawUnsafe(` WITH matched_docs AS ( - SELECT id, similarity - FROM match_documents( - query_embedding := '[${embedding.map((x) => `${x}`).join(",")}]', - similarity_threshold := ${getDocumentsDto.filter.similarityThreshold}, - match_count := ${getDocumentsDto.filter.matchCount} - ) + SELECT + document.id as id, + 1 - (document."${searchVia}" <=> '${query_embedding}') as similarity + FROM + document + WHERE + 1 - (document."${searchVia}" <=> '${query_embedding}') > ${similarity_threshold} + ORDER BY + document."${searchVia}" <=> '${query_embedding}' + LIMIT ${match_count} ), total_count AS ( SELECT COUNT(*) AS count @@ -189,9 +234,14 @@ export class EmbeddingsService { json_build_object( 'id', doc.id, 'content', doc.content, + 'heading', doc.heading, + 'summary', doc.summary, 'tags', doc.tags, - 'embedding', CAST(doc.embedding AS TEXT), - 'similarity', matched_docs.similarity + 'contentEmbedding', CAST(doc."contentEmbedding" AS TEXT), + 'headingEmbedding', CAST(doc."headingEmbedding" AS TEXT), + 'summaryEmbedding', CAST(doc."summaryEmbedding" AS TEXT), + 'similarity', matched_docs.similarity, + 'chunkId', doc."chunkId" ) ORDER BY matched_docs.similarity DESC ) ) AS result diff --git a/src/modules/pdf/pdf.controller.ts b/src/modules/pdf/pdf.controller.ts index cda78ed..2a94377 100644 --- a/src/modules/pdf/pdf.controller.ts +++ b/src/modules/pdf/pdf.controller.ts @@ -98,63 +98,118 @@ export class PDFController { }) ) async addData(@UploadedFile() file: Express.Multer.File){ - await this.aiToolsService.getCSVFromChunks(file.filename) + let startTime = Date.now() + const csvFilePath = path.join(__dirname, `../../../files/${file.filename}`); + let contentEmbedStatus = await this.aiToolsService.getCSVFromChunks(file.filename) + let timeTakenForContentEmbedding = Date.now() - startTime; + if(contentEmbedStatus != 200) { + await unlink(csvFilePath) + return { + contentEmbedStatus, + timeTakenForContentEmbedding, + message: `Failed with status code ${contentEmbedStatus} while embedding content chunks.` + } + } await this.pdfService.replacePDFHeader(",embeddings",",contentEmbedding",file.filename) await this.pdfService.replacePDFHeader("content,heading,","ignore1,content,",file.filename) - await this.aiToolsService.getCSVFromChunks(file.filename) + let headingEmbedStatus = await this.aiToolsService.getCSVFromChunks(file.filename) + let timeTakenForHeadingEmbedding = Date.now() - startTime; + if(headingEmbedStatus != 200){ + await unlink(csvFilePath) + return { + contentEmbedStatus, + timeTakenForContentEmbedding, + headingEmbedStatus, + timeTakenForHeadingEmbedding, + message: `Failed with status code ${headingEmbedStatus} while embedding heading chunks.` + } + } await this.pdfService.replacePDFHeader(",embeddings",",headingEmbedding",file.filename) await this.pdfService.replacePDFHeader("ignore1,content,summary,","ignore1,ignore2,content,",file.filename) - await this.aiToolsService.getCSVFromChunks(file.filename) + let summaryEmbedStatus = await this.aiToolsService.getCSVFromChunks(file.filename) + let timeTakenForSummaryEmbedding = Date.now() - startTime; + if(summaryEmbedStatus != 200){ + await unlink(csvFilePath) + return { + contentEmbedStatus, + timeTakenForContentEmbedding, + headingEmbedStatus, + timeTakenForHeadingEmbedding, + summaryEmbedStatus, + timeTakenForSummaryEmbedding, + message: `Failed with status code ${summaryEmbedStatus} while embedding summary chunks.` + } + } await this.pdfService.replacePDFHeader(",embeddings",",summaryEmbedding",file.filename) - await this.pdfService.replacePDFHeader("ignore1,ignore2,content,","content,heading,summary,",file.filename) - const csvFilePath = path.join(__dirname, `../../../files/${file.filename}`); + await this.pdfService.replacePDFHeader("ignore1,ignore2,content,","content,heading,summary,",file.filename) let data = await this.pdfService.processCSV(csvFilePath) await unlink(csvFilePath) - for(let i=0;i `${x}`) .join(",")}]', - "summaryEmbedding" = '[${JSON.parse(data[i].summaryEmbedding) - .map((x) => `${x}`) - .join(",")}]' - WHERE "chunkId" = ${parseInt(data[i].chunkId)}` - ); + "headingEmbedding" = '[${JSON.parse(data[i].headingEmbedding) + .map((x) => `${x}`) + .join(",")}]', + "summaryEmbedding" = '[${JSON.parse(data[i].summaryEmbedding) + .map((x) => `${x}`) + .join(",")}]' + WHERE "chunkId" = ${parseInt(data[i].chunkId)}` + ); + } + return { + contentEmbedStatus, + timeTakenForContentEmbedding, + headingEmbedStatus, + timeTakenForHeadingEmbedding, + summaryEmbedStatus, + timeTakenForSummaryEmbedding, + message: `Document uploaded successfully.` + } + } catch (error) { + return { + contentEmbedStatus, + timeTakenForContentEmbedding, + headingEmbedStatus, + timeTakenForHeadingEmbedding, + summaryEmbedStatus, + timeTakenForSummaryEmbedding, + message: `${error.message}` + } } - return "Document uploaded successfully." + } @Post('create-or-update-employee')