diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 5072740240e2a5..a8224c2e363f62 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -530,6 +530,7 @@ void program::init_graph() { node->get_output_layouts(); if (node->is_type()) { _config.set_property(ov::intel_gpu::use_onednn(true)); + _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); } } // Perform initial shape_of subgraphs markup diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f98ffd0128bf6a..597b13a750e49a 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -238,10 +238,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto context_impl = get_context_impl(context); auto device_id = ov::DeviceIDParser{context_impl->get_device_name()}.get_device_id(); - OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + if (model->has_rt_info("runtime_options")) + config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); config.apply_user_properties(context_impl->get_engine().get_device_info()); set_cache_info(model, config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index c78bde1b92f5ad..2376b8fba10a33 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -264,7 +264,7 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { } } - if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) { + if (!is_set_by_user(ov::hint::kv_cache_precision)) { if (info.supports_immad) { // MFDNN-11755 set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision))); } else { @@ -274,7 +274,8 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { } // Enable dynamic quantization by default for non-systolic platforms - if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && + get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { set_property(ov::hint::dynamic_quantization_group_size(32)); }