fix: Handle case where server drops connection. Add special tokens to…

… tokenizer count.
huggingface · Oct 10, 2024 · 0af45e1 · 0af45e1
1 parent b7c3347
commit 0af45e1
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -168,7 +168,6 @@ pub async fn run(run_config: RunConfiguration, stop_sender: Sender<()>) -> anyho
         report = benchmark.run() => {
             match report {
                 Ok(results) => {
-                    info!("Throughput is {requests_throughput} req/s",requests_throughput = results.get_results()[0].successful_request_rate().unwrap());
                     let report = benchmark.get_report();
                     let path = format!("results/{}_{}.json",run_config.tokenizer_name.replace("/","_").replace(".","_"), chrono::Utc::now().format("%Y-%m-%d-%H-%M-%S"));
                     let path=Path::new(&path);

diff --git a/src/requests.rs b/src/requests.rs
@@ -186,7 +186,7 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
                     // we need to count the number of tokens generated as each delta chunk may contain multiple tokens
                     // that's the case with vLLM chunked prefill or speculative decoding
                     let num_tokens =
-                        self.tokenizer.encode(content.clone(), false).unwrap().len() as u64;
+                        self.tokenizer.encode(content.clone(), true).unwrap().len() as u64;
                     if num_tokens > 1 {
                         warn!(
                             "Generated more than one token: {num_tokens}",
@@ -242,6 +242,11 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
                                 // server sent no data
                                 aggregated_response.fail();
                             }
+                            if aggregated_response.end_time.is_none() {
+                                // server closed the connection before we received the final response
+                                warn!("Connection closed before completion");
+                                aggregated_response.fail();
+                            }
                         }
                     }
                     es.close();