From cf9404762403e3f73a1ffc3801b30abe37a8014f Mon Sep 17 00:00:00 2001 From: Anastasiya Pronina Date: Wed, 15 Jan 2025 13:52:35 +0000 Subject: [PATCH] Pass pad_token_id as model compilation parameters --- src/cpp/src/llm_pipeline_static.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index cffeedfc75..0cc2fa8fcb 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -754,6 +754,7 @@ void StatefulLLMPipeline::updateStatefulConfig( KVAxesPosition axes = get_kv_axes(model_desc.type); update_config(pipeline_config, {"NPUW_LLM_BATCH_DIM", axes.batch}); update_config(pipeline_config, {"NPUW_LLM_SEQ_LEN_DIM", axes.seq_len}); + pipeline_config["NPUW_LLM_PAD_TOKEN_ID"] = m_tokenizer.get_pad_token_id(); update_config(pipeline_config, {"NPUW_LLM_MAX_PROMPT_LEN", kMaxPromptLen}); update_config(pipeline_config, {"NPUW_LLM_MIN_RESPONSE_LEN", kMinResponseLen});