diff --git a/samples/cpp/image_generation/CMakeLists.txt b/samples/cpp/image_generation/CMakeLists.txt index 004b305088..c8c93618ca 100644 --- a/samples/cpp/image_generation/CMakeLists.txt +++ b/samples/cpp/image_generation/CMakeLists.txt @@ -28,6 +28,24 @@ install(TARGETS text2image COMPONENT samples_bin EXCLUDE_FROM_ALL) +# create text2image sample executable with concurrency + +add_executable(text2image_concurrency text2image_concurrency.cpp imwrite.cpp) + +target_include_directories(text2image_concurrency PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(text2image_concurrency PRIVATE openvino::genai) + +set_target_properties(text2image_concurrency PROPERTIES + COMPILE_PDB_NAME text2image_concurrency + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS text2image_concurrency + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + + # create LoRA sample executable add_executable(lora_text2image lora_text2image.cpp imwrite.cpp) diff --git a/samples/cpp/image_generation/text2image_concurrency.cpp b/samples/cpp/image_generation/text2image_concurrency.cpp new file mode 100644 index 0000000000..091daedca1 --- /dev/null +++ b/samples/cpp/image_generation/text2image_concurrency.cpp @@ -0,0 +1,88 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + +#include "imwrite.hpp" +#include +#include + + +void runPipeline(std::string prompt, std::filesystem::path root_dir, ov::genai::CLIPTextModel & text_encoder, ov::genai::UNet2DConditionModel & unet, ov::genai::AutoencoderKL & vae, std::promise & Tensor_prm){ + std::cout << "create pipeline" << prompt << std::endl; + auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"); + auto pipe2 = ov::genai::Text2ImagePipeline::stable_diffusion(scheduler, text_encoder, unet, vae); + std::cout << "start generate " << prompt << std::endl; + try{ + ov::Tensor image = pipe2.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::guidance_scale(0.75f), + ov::genai::num_inference_steps(10)); + Tensor_prm.set_value(image); + std::cout << "finished generate" << std::endl; + } + catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + } + +} + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 2, "Usage: ", argv[0], " "); + + const std::string models_path = argv[1]; + std::filesystem::path root_dir = models_path; + const std::string device = "CPU"; // GPU can be used as well + auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"); + auto text_encoder = ov::genai::CLIPTextModel(root_dir / "text_encoder"); + text_encoder.compile("CPU"); + auto unet = ov::genai::UNet2DConditionModel(root_dir / "unet"); + if (device == "NPU") { + // The max_position_embeddings config from text encoder will be used as a parameter to unet reshape. + int max_position_embeddings = text_encoder.get_config().max_position_embeddings; + unet.reshape(1, 512, 512, max_position_embeddings); + } + unet.compile("CPU"); + + auto vae = ov::genai::AutoencoderKL(root_dir / "vae_decoder"); + vae.compile("CPU"); + std::cout << "models loaded" << std::endl; + + std::promise Tensor1_prm; + std::promise Tensor2_prm; + + std::thread t1(&runPipeline, std::string("a bucket of red roses"), root_dir, std::ref(text_encoder), std::ref(unet), std::ref(vae), std::ref(Tensor1_prm)); + std::thread t2(&runPipeline, std::string("a glass of water on a wooden table"), root_dir, std::ref(text_encoder), std::ref(unet), std::ref(vae), std::ref(Tensor2_prm)); + + + std::cout << "threads started" << std::endl; + std::future T1_ftr = Tensor1_prm.get_future(); + std::future T2_ftr = Tensor2_prm.get_future(); + + ov::Tensor image1 = T1_ftr.get(); + ov::Tensor image2 = T2_ftr.get(); + t1.join(); + t2.join(); + // writes `num_images_per_prompt` images by pattern name + imwrite("image1_%d.bmp", image1, true); + imwrite("image2_%d.bmp", image2, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp index d48661d899..1a3ac31b06 100644 --- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp +++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp @@ -29,6 +29,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { std::vector block_out_channels = { 64 }; explicit Config(const std::filesystem::path& config_path); + Config() = default; }; explicit AutoencoderKL(const std::filesystem::path& vae_decoder_path); @@ -140,6 +141,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { Config m_config; ov::InferRequest m_encoder_request, m_decoder_request; + std::shared_ptr encoder_compiled_model, decoder_compiled_model; std::shared_ptr m_encoder_model = nullptr, m_decoder_model = nullptr; }; diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp index a3b9ebbd88..755abc220c 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp @@ -25,6 +25,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { size_t num_hidden_layers = 12; explicit Config(const std::filesystem::path& config_path); + Config() = default; }; explicit CLIPTextModel(const std::filesystem::path& root_dir); @@ -92,6 +93,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { Config m_config; AdapterController m_adapter_controller; ov::InferRequest m_request; + std::shared_ptr compiled_model; std::shared_ptr m_model; Tokenizer m_clip_tokenizer; diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp index 4acfd2ce9b..557882a3d1 100644 --- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp @@ -28,6 +28,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { int time_cond_proj_dim = -1; explicit Config(const std::filesystem::path& config_path); + Config() = default; }; explicit UNet2DConditionModel(const std::filesystem::path& root_dir); @@ -95,6 +96,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0; } + private: class UNetInference; std::shared_ptr m_impl; diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index ab8b87a13e..6cfb94bbb6 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -175,7 +175,17 @@ AutoencoderKL::AutoencoderKL(const std::string& vae_encoder_model, } } -AutoencoderKL::AutoencoderKL(const AutoencoderKL&) = default; +AutoencoderKL::AutoencoderKL(const AutoencoderKL& original_model){ + encoder_compiled_model = original_model.encoder_compiled_model; + decoder_compiled_model = original_model.decoder_compiled_model; + m_decoder_request = original_model.decoder_compiled_model->create_infer_request(); + if (m_encoder_model){ + m_decoder_request = decoder_compiled_model->create_infer_request(); + } + m_encoder_model = original_model.m_encoder_model; + m_decoder_model = original_model.m_decoder_model; + m_config = original_model.m_config; +} AutoencoderKL& AutoencoderKL::reshape(int batch_size, int height, int width) { OPENVINO_ASSERT(m_decoder_model, "Model has been already compiled. Cannot reshape already compiled model"); @@ -207,16 +217,16 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa ov::Core core = utils::singleton_core(); if (m_encoder_model) { - ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties); - ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model"); - m_encoder_request = encoder_compiled_model.create_infer_request(); + encoder_compiled_model = std::make_shared(core.compile_model(m_encoder_model, device, properties)); + ov::genai::utils::print_compiled_model_properties(*encoder_compiled_model, "Auto encoder KL encoder model"); + m_encoder_request = encoder_compiled_model->create_infer_request(); // release the original model m_encoder_model.reset(); } - ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties); - ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model"); - m_decoder_request = decoder_compiled_model.create_infer_request(); + decoder_compiled_model = std::make_shared(core.compile_model(m_decoder_model, device, properties)); + ov::genai::utils::print_compiled_model_properties(*decoder_compiled_model, "Auto encoder KL decoder model"); + m_decoder_request = decoder_compiled_model->create_infer_request(); // release the original model m_decoder_model.reset(); diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index c49bd5f000..20e37ce477 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -65,7 +65,15 @@ CLIPTextModel::CLIPTextModel(const std::string& model, compile(device, properties); } -CLIPTextModel::CLIPTextModel(const CLIPTextModel&) = default; +CLIPTextModel::CLIPTextModel(const CLIPTextModel& origin_model) { + m_config = origin_model.m_config; + m_adapter_controller = origin_model.m_adapter_controller; + compiled_model = origin_model.compiled_model; + m_request = compiled_model->create_infer_request(); + m_model = origin_model.m_model; + m_clip_tokenizer = origin_model.m_clip_tokenizer; +} + const CLIPTextModel::Config& CLIPTextModel::get_config() const { return m_config; @@ -86,17 +94,16 @@ CLIPTextModel& CLIPTextModel::reshape(int batch_size) { CLIPTextModel& CLIPTextModel::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model; std::optional adapters; if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_te")); m_adapter_controller = AdapterController(m_model, *adapters, device); - compiled_model = core.compile_model(m_model, device, *filtered_properties); + compiled_model = std::make_shared(core.compile_model(m_model, device, *filtered_properties)); } else { - compiled_model = core.compile_model(m_model, device, properties); + compiled_model = std::make_shared(core.compile_model(m_model, device, properties)); } - ov::genai::utils::print_compiled_model_properties(compiled_model, "Clip Text model"); - m_request = compiled_model.create_infer_request(); + ov::genai::utils::print_compiled_model_properties(*compiled_model, "Clip Text model"); + m_request = compiled_model->create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/unet2d_condition_model.cpp b/src/cpp/src/image_generation/models/unet2d_condition_model.cpp index ef35709761..a0dd7f1f56 100644 --- a/src/cpp/src/image_generation/models/unet2d_condition_model.cpp +++ b/src/cpp/src/image_generation/models/unet2d_condition_model.cpp @@ -59,7 +59,18 @@ UNet2DConditionModel::UNet2DConditionModel(const std::string& model, compile(device, properties); } -UNet2DConditionModel::UNet2DConditionModel(const UNet2DConditionModel&) = default; +UNet2DConditionModel::UNet2DConditionModel(const UNet2DConditionModel& original_model) { + m_config = original_model.m_config; + m_adapter_controller = original_model.m_adapter_controller; + m_model = original_model.m_model; + m_vae_scale_factor = original_model.m_vae_scale_factor; + if (typeid(m_impl) == typeid(UNet2DConditionModel::UNetInferenceStaticBS1)) { + m_impl = std::make_shared(original_model.m_impl->get_compiled_model()); + } else { + m_impl = std::make_shared(original_model.m_impl->get_compiled_model()); + } + +} const UNet2DConditionModel::Config& UNet2DConditionModel::get_config() const { return m_config; diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp index ae928aac30..cf7dc03d6b 100644 --- a/src/cpp/src/image_generation/models/unet_inference.hpp +++ b/src/cpp/src/image_generation/models/unet_inference.hpp @@ -15,6 +15,7 @@ class UNet2DConditionModel::UNetInference { virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) = 0; + virtual std::shared_ptr get_compiled_model() = 0; // utility function to resize model given optional dimensions. static void reshape(std::shared_ptr model, @@ -62,6 +63,8 @@ class UNet2DConditionModel::UNetInference { model->reshape(name_to_shape); } + UNetInference(const UNetInference & ); + UNetInference() = default; }; } // namespace genai diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index dd265e3eca..de14f34d43 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -12,10 +12,14 @@ namespace genai { class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel::UNetInference { public: - virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) override { - ov::CompiledModel compiled_model = utils::singleton_core().compile_model(model, device, properties); - ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition dynamic model"); - m_request = compiled_model.create_infer_request(); + + virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) override + { + ov::Core core = utils::singleton_core(); + + compiled_model = std::make_shared(utils::singleton_core().compile_model(model, device, properties)); + ov::genai::utils::print_compiled_model_properties(*compiled_model, "UNet 2D Condition dynamic model"); + m_request = compiled_model->create_infer_request(); } virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) override { @@ -30,17 +34,28 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override { OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model"); - m_request.set_tensor("sample", sample); m_request.set_tensor("timestep", timestep); - + ov::CompiledModel test = m_request.get_compiled_model(); + ov::genai::utils::print_compiled_model_properties(test, "UNet 2D Condition TEST"); m_request.infer(); - return m_request.get_output_tensor(); } + UNetInferenceDynamic(std::shared_ptr origin_compiled_model){ + compiled_model = origin_compiled_model; + m_request = compiled_model->create_infer_request(); + } + + UNetInferenceDynamic() = default; + + std::shared_ptr get_compiled_model(){ + return compiled_model; + } + private: ov::InferRequest m_request; + std::shared_ptr compiled_model; }; } // namespace genai diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index f63a8ea237..ef6f4f8fff 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -13,6 +13,17 @@ namespace genai { // Static Batch-Size 1 variant of UNetInference class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel::UNetInference { public: + UNetInferenceStaticBS1() = default; + + UNetInferenceStaticBS1(const std::shared_ptr & origin_compiled_model){ + OPENVINO_ASSERT(origin_compiled_model, "Source model must be compiled first"); + compiled_model = origin_compiled_model; + m_native_batch_size = compiled_model->input("sample").get_shape()[0]; + for (int i = 0; i < m_native_batch_size; i++) { + m_requests[i] = compiled_model->create_infer_request(); + } + } + virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) override { @@ -39,11 +50,12 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel UNetInference::reshape(model, 1); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(model, device, properties); - ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition batch-1 model"); + compiled_model = std::make_shared(core.compile_model(model, device, properties)); + ov::genai::utils::print_compiled_model_properties(*compiled_model, "UNet 2D Condition batch-1 model"); - for (int i = 0; i < m_native_batch_size; i++) { - m_requests[i] = compiled_model.create_infer_request(); + for (int i = 0; i < m_native_batch_size; i++) + { + m_requests[i] = compiled_model->create_infer_request(); } } @@ -135,10 +147,15 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel return out_sample; } + std::shared_ptr get_compiled_model(){ + return compiled_model; + } private: + std::shared_ptr compiled_model; std::vector m_requests; size_t m_native_batch_size = 0; + }; } // namespace genai diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 52faae02e9..d90baf8c89 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -386,6 +386,12 @@ void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const ch std::cout << " " << cfg << ": " << prop.as() << std::endl; } } + for (const auto& input : compiled_Model.inputs()) { + std::cout << "Input name: " << input.get_any_name() << ", shape: " << input.get_partial_shape().to_string() << std::endl; + } + for (const auto& out : compiled_Model.outputs()) { + std::cout << "Output name: " << out.get_any_name() << ", shape: " << out.get_partial_shape().to_string() << std::endl; + } ov::Core core; std::vector exeTargets;