From cf9404762403e3f73a1ffc3801b30abe37a8014f Mon Sep 17 00:00:00 2001
From: Anastasiya Pronina <anastasiya.pronina@intel.com>
Date: Wed, 15 Jan 2025 13:52:35 +0000
Subject: [PATCH] Pass pad_token_id as model compilation parameters

---
 src/cpp/src/llm_pipeline_static.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
index cffeedfc75..0cc2fa8fcb 100644
--- a/src/cpp/src/llm_pipeline_static.cpp
+++ b/src/cpp/src/llm_pipeline_static.cpp
@@ -754,6 +754,7 @@ void StatefulLLMPipeline::updateStatefulConfig(
     KVAxesPosition axes = get_kv_axes(model_desc.type);
     update_config(pipeline_config, {"NPUW_LLM_BATCH_DIM", axes.batch});
     update_config(pipeline_config, {"NPUW_LLM_SEQ_LEN_DIM", axes.seq_len});
+    pipeline_config["NPUW_LLM_PAD_TOKEN_ID"] = m_tokenizer.get_pad_token_id();
 
     update_config(pipeline_config, {"NPUW_LLM_MAX_PROMPT_LEN", kMaxPromptLen});
     update_config(pipeline_config, {"NPUW_LLM_MIN_RESPONSE_LEN", kMinResponseLen});