Skip to content

Commit

Permalink
Update to latest gRPC proto API with tokenize additions
Browse files Browse the repository at this point in the history
  • Loading branch information
njhill committed Mar 29, 2024
1 parent 291ca82 commit 8620421
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
target_path := "vllm/entrypoints/grpc/pb"
gen-protos:
# Compile protos
pip install grpcio-tools==1.62.0 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
pip install grpcio-tools==1.62.1 mypy-protobuf==3.5.0 'types-protobuf>=3.20.4'
mkdir -p $(target_path)
python -m grpc_tools.protoc -Iproto --python_out=$(target_path) \
--grpc_python_out=$(target_path) --mypy_out=$(target_path) proto/generation.proto
Expand Down
17 changes: 14 additions & 3 deletions proto/generation.proto
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ message TokenInfo {
message BatchedTokenizeRequest {
string model_id = 1;
repeated TokenizeRequest requests = 2;
bool return_tokens = 3; //TBD
bool return_tokens = 3;
bool return_offsets = 4;

// Zero means don't truncate.
uint32 truncate_input_tokens = 5;
}

message BatchedTokenizeResponse {
Expand All @@ -209,10 +213,17 @@ message TokenizeRequest {
}

message TokenizeResponse {
message Offset {
uint32 start = 1;
uint32 end = 2;
}

uint32 token_count = 1;
repeated string tokens = 2; // if include_tokens = true

// We'll possibly add more later
// if return_tokens = true
repeated string tokens = 2;
// if return_tokens = true
repeated Offset offsets = 3;
}


Expand Down
8 changes: 8 additions & 0 deletions vllm/entrypoints/grpc/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,14 @@ async def _validate_prompt_and_tokenize(
@log_rpc_handler_errors
async def Tokenize(self, request: BatchedTokenizeRequest,
context: ServicerContext) -> BatchedTokenizeResponse:
#TODO implement these
if request.return_offsets:
await context.abort(StatusCode.INVALID_ARGUMENT,
"return_offsets not yet supported")
if request.truncate_input_tokens:
await context.abort(StatusCode.INVALID_ARGUMENT,
"truncate_input_tokens not yet supported")

responses: List[TokenizeResponse] = []

#TODO maybe parallelize, also move convert_ids_to_tokens
Expand Down

0 comments on commit 8620421

Please sign in to comment.