diff --git a/Cargo.lock b/Cargo.lock index ec703bf..bce7bf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1944,7 +1944,7 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "letsearch" -version = "0.1.12" +version = "0.1.13" dependencies = [ "actix-web", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index fa6754f..7def68a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "letsearch" -version = "0.1.12" +version = "0.1.13" edition = "2021" [dependencies] diff --git a/README.md b/README.md index 055cc9d..5e2dbec 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,26 @@ With its built-in support for ONNX inference (llama.cpp and GGUF support coming - Convert and bring your own models. - Upload and/or download prebuilt collections on HuggingFace Hub easily (coming soon). +## 😕 Why does it exists? + +Building RAG (Retrieval-Augmented Generation) or semantic search applications often involves dealing with the complexities of vector operations embedding management, and infrastructure setup. `letsearch` was created to eliminate these burdens and streamline the process of building and serving vector indexes. + +### Key Benefits + +- **No More Vector Ops Hassle** + Focus on your application logic without worrying about the intricacies of vector indexing, storage, or retrieval. + +- **Simplified Collection Management** + Easily create, manage, and share collections of embeddings, whether from JSONL, Parquet, or even HuggingFace datasets. + +- **From Experimentation to Production in No Time** + Drastically reduce the time required to go from prototyping your RAG or search workflows to serving requests in production. + +- **Say Goodbye to Boilerplate** + Avoid repetitive setup and integration code. `letsearch` provides a single, ready-to-run binary to embed, index, and search your documents. This is particularly useful for serverless cloud jobs and local AI applications. + +By combining these advantages with built-in support for ONNX models and plans for multimodal / multibackend capabilities, `letsearch` is your go-to tool for making documents AI-ready in record time. + ## 🏎️ Quickstart 1. Download the latest prebuilt binary from [releases](https://github.com/monatis/letsearch/releases). @@ -36,19 +56,50 @@ Wuhu! Now you already know how to use letsearch! 🙋 It's that simple. ⚠️ **Note**: letsearch is at a early stage of development, so rapid changes in the API should be expected. -## 🧮 Models +## 🚧 Indexing documents -- To see the models currently available on HuggingFace Hub, run: +```sh +./letsearch index --collection-name test1 --index-columns context hf://datasets/neural-bridge/rag-dataset-1200/**/*.parquet +``` + +With a single CLI command, you: + +- downloaded `.parquet` files from [a HF dataset repository](https://huggingface.co/datasets/neural-bridge/rag-dataset-1200/). +- downloaded [a model from HF Hub](https://huggingface.co/mys/minilm). +- imported your documents to the DB. +- embedded texts in the column `context`. +- built a vector index. + +You can use local or `hf://` paths to import your documents in `.jsonl` or `.parquet` files. +Regular paths and/or glob patterns are supported. + +Run: ```sh -./letsearch list-models +./letsearch index --help ``` -To convert your own models to a format that you can use with letsearch, see [this script](./scripts/export_to_onnx.py). +for more usage tips. ## 🔍 Search -Se [this](./scripts/test.py) for a dead simple request example. A full Python client is on the way. +Use the same binary to serve your index: + +```sh +./letsearch serve -c test1 +``` + +Then, it's quite easy to make search requests with [letsearch-client](https://github.com/monatis/letsearch-client). + +## 🧮 Models + +- To see the models currently available on HuggingFace Hub, run: + +```sh +./letsearch list-models +``` + +To convert your own models to a format that you can use with letsearch, see [letsearch-client](https://github.com/monatis/letsearch-client). ## 🧭 roadmap diff --git a/scripts/export_to_onnx.py b/scripts/export_to_onnx.py deleted file mode 100644 index 8289e53..0000000 --- a/scripts/export_to_onnx.py +++ /dev/null @@ -1,128 +0,0 @@ -import os -import glob -import json -import argparse -from transformers import AutoTokenizer, AutoModel, AutoConfig, PreTrainedTokenizerFast -import torch -import onnx -import onnxruntime as ort -from onnxruntime.quantization import quantize_dynamic, QuantType, quant_pre_process -import torch - - -ap = argparse.ArgumentParser( - description="Export SentenceTransformers Models to ONNX for use with letsearch" -) -ap.add_argument("-m", "--model", required=True, help="Model to export") -ap.add_argument("-o", "--output", required=True, help="Where to save the ONNX model") -ap.add_argument( - "-d", - "--description", - required=False, - default="", - help="Description to add to the metadata file", -) -args = ap.parse_args() -model_path = args.model -output_path = args.output - -tokenizer = AutoTokenizer.from_pretrained(model_path) -tokenizer.save_pretrained(output_path) -required_files = glob.glob(f"{output_path}/**") -required_files = [os.path.basename(path) for path in required_files] - -device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") -model = AutoModel.from_pretrained( - model_path, -) -model.to(device) -model.eval() - -onnx_path = f"{output_path}/model-f32.onnx" -onnx_f16_path = f"{output_path}/model-f16.onnx" -onnx_infer_path = f"{output_path}/model-infer.onnx" -onnx_int8_path = f"{output_path}/model-i8.onnx" - - -dummy_model_input = tokenizer("Using BERT with ONNX Runtime!", return_tensors="pt").to( - device -) -inputs = tuple(dummy_model_input.values()) -input_names = tuple(dummy_model_input.keys()) -dynamic_axes = { - input_name: {0: "batch_size", 1: "sequence"} for input_name in input_names -} -dynamic_axes["last_hidden_state"] = {0: "batch_size", 1: "sequence"} -dynamic_axes["pooler_output"] = {0: "batch_size", 1: "sequence"} - -torch.onnx.export( - model, - inputs, - onnx_path, - input_names=input_names, - output_names=("last_hidden_state", "pooler_output"), - dynamic_axes=dynamic_axes, - do_constant_folding=True, - opset_version=14, - artifacts_dir="./artifacts", - external_data=False, -) - -print(f"saved f32 model to {onnx_path}") - -model.half() -torch.onnx.export( - model, - inputs, - onnx_f16_path, - input_names=input_names, - output_names=("last_hidden_state", "pooler_output"), - dynamic_axes=dynamic_axes, - do_constant_folding=True, - opset_version=14, - artifacts_dir="./artifacts", - external_data=False, -) -print(f"Saved f16 model to {onnx_f16_path}") - - -quant_pre_process(onnx_path, onnx_infer_path, auto_merge=True) - -quantize_dynamic( - model_input=onnx_infer_path, - model_output=onnx_int8_path, - weight_type=QuantType.QInt8, -) - -print(f"Saved i8 model to {onnx_int8_path}") - -metadata = { - "letsearch_version": 1, - "converted_from": model_path, - "description": args.description, - "variants": [ - {"variant": "f32", "path": "model-f32.onnx"}, - {"variant": "f16", "path": "model-f16.onnx"}, - {"variant": "i8", "path": "model-i8.onnx"}, - ], -} - -with open(f"{output_path}/metadata.json", "w") as f: - f.write(json.dumps(metadata)) - -readme = """--- -license: mit -tags: -- letsearch -- rag -- embedding -- semantic-search -- onnx ---- -## Overview -This is a letsearch-compatible text embedding model. -## Usage -See [letsearch](https://github.com/monatis/letsearch).""" - -with open(f"{output_path}/README.md", "w") as f: - f.write(readme) diff --git a/scripts/requirements.txt b/scripts/requirements.txt deleted file mode 100644 index fd902fd..0000000 --- a/scripts/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -torch -torchvision -transformers -onnx -onnxruntime diff --git a/scripts/test.py b/scripts/test.py deleted file mode 100644 index 34a69db..0000000 --- a/scripts/test.py +++ /dev/null @@ -1,38 +0,0 @@ -import httpx -import time - -ragdb_base_url = "http://localhost:7898" - - -def url_for(endpoint: str): - return "{}{}".format(ragdb_base_url, endpoint) - - -def test_search(collection_name, column_name, query, limit): - client = httpx.Client() - # warmup - _ = client.get(url_for("")).json() - results = client.post( - url_for(f"/collections/{collection_name}/search"), - json={"column_name": column_name, "query": query, "limit": limit}, - ).json() - - return results - - -if __name__ == "__main__": - import argparse - - ap = argparse.ArgumentParser() - ap.add_argument("-r", "--test-reuse", action="store_true") - ap.add_argument("-s", "--test-search", action="store_true") - args = ap.parse_args() - - if args.test_search: - results = test_search( - collection_name="test2", - column_name="passage", - query="When was Abraham Lincoln born?", - limit=5, - ) - print(results) diff --git a/src/main.rs b/src/main.rs index ec67863..094fc7c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ use std::io::Write; #[derive(Parser, Debug)] #[command( name = "letsearch", - version = "0.1.12", + version = "0.1.123, author = "yusufsarigoz@gmail.com", about = "Single binary to embed, index, serve and search your documents", subcommand_required = true, diff --git a/src/serve.rs b/src/serve.rs index cb4ec18..185960f 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -51,7 +51,6 @@ struct QueryRequest { struct HelthcheckResponse { version: String, status: String, - collections: Vec, } #[derive(Serialize)] @@ -70,15 +69,12 @@ struct SearchResultsResponse { results: Vec, } -async fn healthcheck(manager: web::Data>) -> impl Responder { +async fn healthcheck() -> impl Responder { let start = Instant::now(); - let manager_guard = manager.read().await; - let collections = manager_guard.get_collections().await; let response = SuccessResponse::new( HelthcheckResponse { - version: "0.1.0".to_string(), + version: "0.1.13".to_string(), status: "ok".to_string(), - collections: collections, }, start, );