-
Notifications
You must be signed in to change notification settings - Fork 201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Concurrency in stable-diffusion image generation #1475
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "openvino/genai/image_generation/text2image_pipeline.hpp" | ||
|
||
#include "imwrite.hpp" | ||
#include <thread> | ||
#include <future> | ||
|
||
|
||
void runPipeline(std::string prompt, std::filesystem::path root_dir, ov::genai::CLIPTextModel & text_encoder, ov::genai::UNet2DConditionModel & unet, ov::genai::AutoencoderKL & vae, std::promise<ov::Tensor> & Tensor_prm){ | ||
std::cout << "create pipeline" << prompt << std::endl; | ||
auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"); | ||
auto pipe2 = ov::genai::Text2ImagePipeline::stable_diffusion(scheduler, text_encoder, unet, vae); | ||
std::cout << "start generate " << prompt << std::endl; | ||
try{ | ||
ov::Tensor image = pipe2.generate(prompt, | ||
ov::genai::width(512), | ||
ov::genai::height(512), | ||
ov::genai::guidance_scale(0.75f), | ||
ov::genai::num_inference_steps(10)); | ||
Tensor_prm.set_value(image); | ||
std::cout << "finished generate" << std::endl; | ||
} | ||
catch (const std::exception& error) { | ||
try { | ||
std::cerr << error.what() << '\n'; | ||
} catch (const std::ios_base::failure&) {} | ||
} catch (...) { | ||
try { | ||
std::cerr << "Non-exception object thrown\n"; | ||
} catch (const std::ios_base::failure&) {} | ||
} | ||
|
||
} | ||
|
||
int32_t main(int32_t argc, char* argv[]) try { | ||
OPENVINO_ASSERT(argc == 2, "Usage: ", argv[0], " <MODEL_DIR>"); | ||
|
||
const std::string models_path = argv[1]; | ||
std::filesystem::path root_dir = models_path; | ||
const std::string device = "CPU"; // GPU can be used as well | ||
auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"); | ||
auto text_encoder = ov::genai::CLIPTextModel(root_dir / "text_encoder"); | ||
text_encoder.compile("CPU"); | ||
auto unet = ov::genai::UNet2DConditionModel(root_dir / "unet"); | ||
if (device == "NPU") { | ||
// The max_position_embeddings config from text encoder will be used as a parameter to unet reshape. | ||
int max_position_embeddings = text_encoder.get_config().max_position_embeddings; | ||
unet.reshape(1, 512, 512, max_position_embeddings); | ||
} | ||
unet.compile("CPU"); | ||
|
||
auto vae = ov::genai::AutoencoderKL(root_dir / "vae_decoder"); | ||
vae.compile("CPU"); | ||
std::cout << "models loaded" << std::endl; | ||
|
||
std::promise<ov::Tensor> Tensor1_prm; | ||
std::promise<ov::Tensor> Tensor2_prm; | ||
|
||
std::thread t1(&runPipeline, std::string("a bucket of red roses"), root_dir, std::ref(text_encoder), std::ref(unet), std::ref(vae), std::ref(Tensor1_prm)); | ||
std::thread t2(&runPipeline, std::string("a glass of water on a wooden table"), root_dir, std::ref(text_encoder), std::ref(unet), std::ref(vae), std::ref(Tensor2_prm)); | ||
|
||
|
||
std::cout << "threads started" << std::endl; | ||
std::future<ov::Tensor> T1_ftr = Tensor1_prm.get_future(); | ||
std::future<ov::Tensor> T2_ftr = Tensor2_prm.get_future(); | ||
|
||
ov::Tensor image1 = T1_ftr.get(); | ||
ov::Tensor image2 = T2_ftr.get(); | ||
t1.join(); | ||
t2.join(); | ||
// writes `num_images_per_prompt` images by pattern name | ||
imwrite("image1_%d.bmp", image1, true); | ||
imwrite("image2_%d.bmp", image2, true); | ||
|
||
return EXIT_SUCCESS; | ||
} catch (const std::exception& error) { | ||
try { | ||
std::cerr << error.what() << '\n'; | ||
} catch (const std::ios_base::failure&) {} | ||
return EXIT_FAILURE; | ||
} catch (...) { | ||
try { | ||
std::cerr << "Non-exception object thrown\n"; | ||
} catch (const std::ios_base::failure&) {} | ||
return EXIT_FAILURE; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { | |
std::vector<size_t> block_out_channels = { 64 }; | ||
|
||
explicit Config(const std::filesystem::path& config_path); | ||
Config() = default; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is it required? I think you can initialize |
||
}; | ||
|
||
explicit AutoencoderKL(const std::filesystem::path& vae_decoder_path); | ||
|
@@ -140,6 +141,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { | |
|
||
Config m_config; | ||
ov::InferRequest m_encoder_request, m_decoder_request; | ||
std::shared_ptr<ov::CompiledModel> encoder_compiled_model, decoder_compiled_model; | ||
std::shared_ptr<ov::Model> m_encoder_model = nullptr, m_decoder_model = nullptr; | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,7 +175,17 @@ AutoencoderKL::AutoencoderKL(const std::string& vae_encoder_model, | |
} | ||
} | ||
|
||
AutoencoderKL::AutoencoderKL(const AutoencoderKL&) = default; | ||
AutoencoderKL::AutoencoderKL(const AutoencoderKL& original_model){ | ||
encoder_compiled_model = original_model.encoder_compiled_model; | ||
decoder_compiled_model = original_model.decoder_compiled_model; | ||
m_decoder_request = original_model.decoder_compiled_model->create_infer_request(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if model is not compiled yet? |
||
if (m_encoder_model){ | ||
m_decoder_request = decoder_compiled_model->create_infer_request(); | ||
} | ||
m_encoder_model = original_model.m_encoder_model; | ||
m_decoder_model = original_model.m_decoder_model; | ||
m_config = original_model.m_config; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it does not look safe that copy constructor performs infer request creation. We have code like: StableDiffusionPipeline(
PipelineType pipeline_type,
const CLIPTextModel& clip_text_model,
const UNet2DConditionModel& unet,
const AutoencoderKL& vae)
: StableDiffusionPipeline(pipeline_type) {
m_clip_text_encoder = std::make_shared<CLIPTextModel>(clip_text_model); // LEADS TO RE_CREATION OF REQUEST
m_unet = std::make_shared<UNet2DConditionModel>(unet); // LEADS TO RE_CREATION OF REQUEST
m_vae = std::make_shared<AutoencoderKL>(vae); // LEADS TO RE_CREATION OF REQUEST
const bool is_lcm = m_unet->get_config().time_cond_proj_dim > 0;
const char * const pipeline_name = is_lcm ? "LatentConsistencyModelPipeline" : "StableDiffusionPipeline";
initialize_generation_config(pipeline_name);
} which means inference request will be re-created, while we don't have such goal. |
||
} | ||
|
||
AutoencoderKL& AutoencoderKL::reshape(int batch_size, int height, int width) { | ||
OPENVINO_ASSERT(m_decoder_model, "Model has been already compiled. Cannot reshape already compiled model"); | ||
|
@@ -207,16 +217,16 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa | |
ov::Core core = utils::singleton_core(); | ||
|
||
if (m_encoder_model) { | ||
ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties); | ||
ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model"); | ||
m_encoder_request = encoder_compiled_model.create_infer_request(); | ||
encoder_compiled_model = std::make_shared<ov::CompiledModel>(core.compile_model(m_encoder_model, device, properties)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
ov::genai::utils::print_compiled_model_properties(*encoder_compiled_model, "Auto encoder KL encoder model"); | ||
m_encoder_request = encoder_compiled_model->create_infer_request(); | ||
// release the original model | ||
m_encoder_model.reset(); | ||
} | ||
|
||
ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties); | ||
ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model"); | ||
m_decoder_request = decoder_compiled_model.create_infer_request(); | ||
decoder_compiled_model = std::make_shared<ov::CompiledModel>(core.compile_model(m_decoder_model, device, properties)); | ||
ov::genai::utils::print_compiled_model_properties(*decoder_compiled_model, "Auto encoder KL decoder model"); | ||
m_decoder_request = decoder_compiled_model->create_infer_request(); | ||
// release the original model | ||
m_decoder_model.reset(); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the problem with such approach is that it will be hard to apply LoRA adapters here in generic case.
E.g. SD 1.5 has simple LoRA configuration, while FLUX or other more complex models, require code like this https://github.com/openvinotoolkit/openvino.genai/pull/1602/files
Alternative approach is to have API like
In this case all complexity with LoRA is hidden inside and even clients can use the same API (e.g. generate different images with different LoRAs / alphas in parallel)