Skip to content

Commit

Permalink
fix: Handle case where server drops connection. Add special tokens to…
Browse files Browse the repository at this point in the history
… tokenizer count.
  • Loading branch information
Hugoch committed Oct 10, 2024
1 parent b7c3347 commit 0af45e1
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ pub async fn run(run_config: RunConfiguration, stop_sender: Sender<()>) -> anyho
report = benchmark.run() => {
match report {
Ok(results) => {
info!("Throughput is {requests_throughput} req/s",requests_throughput = results.get_results()[0].successful_request_rate().unwrap());
let report = benchmark.get_report();
let path = format!("results/{}_{}.json",run_config.tokenizer_name.replace("/","_").replace(".","_"), chrono::Utc::now().format("%Y-%m-%d-%H-%M-%S"));
let path=Path::new(&path);
Expand Down
7 changes: 6 additions & 1 deletion src/requests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
// we need to count the number of tokens generated as each delta chunk may contain multiple tokens
// that's the case with vLLM chunked prefill or speculative decoding
let num_tokens =
self.tokenizer.encode(content.clone(), false).unwrap().len() as u64;
self.tokenizer.encode(content.clone(), true).unwrap().len() as u64;
if num_tokens > 1 {
warn!(
"Generated more than one token: {num_tokens}",
Expand Down Expand Up @@ -242,6 +242,11 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
// server sent no data
aggregated_response.fail();
}
if aggregated_response.end_time.is_none() {
// server closed the connection before we received the final response
warn!("Connection closed before completion");
aggregated_response.fail();
}
}
}
es.close();
Expand Down

0 comments on commit 0af45e1

Please sign in to comment.