Update to latest gRPC proto API with tokenize additions

IBM · Mar 29, 2024 · 8620421 · 8620421
1 parent 291ca82
commit 8620421
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 4 deletions.
diff --git a/Makefile b/Makefile
@@ -2,7 +2,7 @@
 target_path := "vllm/entrypoints/grpc/pb"
 gen-protos:
 	# Compile protos
-	pip install grpcio-tools==1.62.0 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
+	pip install grpcio-tools==1.62.1 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
 	mkdir -p $(target_path)
 	python -m grpc_tools.protoc -Iproto --python_out=$(target_path) \
 		--grpc_python_out=$(target_path) --mypy_out=$(target_path) proto/generation.proto

diff --git a/proto/generation.proto b/proto/generation.proto
@@ -197,7 +197,11 @@ message TokenInfo {
 message BatchedTokenizeRequest {
   string model_id = 1;
   repeated TokenizeRequest requests = 2;
-  bool return_tokens = 3; //TBD
+  bool return_tokens = 3;
+  bool return_offsets = 4;
+
+  // Zero means don't truncate.
+  uint32 truncate_input_tokens = 5;
 }
 
 message BatchedTokenizeResponse {
@@ -209,10 +213,17 @@ message TokenizeRequest {
 }
 
 message TokenizeResponse {
+  message Offset {
+    uint32 start = 1;
+    uint32 end = 2;
+  }
+
   uint32 token_count = 1;
-  repeated string tokens = 2; // if include_tokens = true
 
-  // We'll possibly add more later
+  // if return_tokens = true
+  repeated string tokens = 2;
+  // if return_tokens = true
+  repeated Offset offsets = 3;
 }
 
 

diff --git a/vllm/entrypoints/grpc/grpc_server.py b/vllm/entrypoints/grpc/grpc_server.py
@@ -477,6 +477,14 @@ async def _validate_prompt_and_tokenize(
     @log_rpc_handler_errors
     async def Tokenize(self, request: BatchedTokenizeRequest,
                        context: ServicerContext) -> BatchedTokenizeResponse:
+        #TODO implement these
+        if request.return_offsets:
+            await context.abort(StatusCode.INVALID_ARGUMENT,
+                                "return_offsets not yet supported")
+        if request.truncate_input_tokens:
+            await context.abort(StatusCode.INVALID_ARGUMENT,
+                                "truncate_input_tokens not yet supported")
+
         responses: List[TokenizeResponse] = []
 
         #TODO maybe parallelize, also move convert_ids_to_tokens