Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Image Generation] Image2Image for FLUX #1621

Merged
merged 3 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
<tr>
<td><code>Flux</code></td>
<td>Supported</td>
<td>Not supported</td>
<td>Supported</td>
<td>Not supported</td>
<td>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline {
const UNet2DConditionModel& unet,
const AutoencoderKL& vae);

// creates Flux pipeline from building blocks
static Image2ImagePipeline flux(
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
const std::shared_ptr<Scheduler>& scheduler,
const CLIPTextModel& clip_text_model,
const T5EncoderModel t5_encoder_model,
const FluxTransformer2DModel& transformer,
const AutoencoderKL& vae);

ImageGenerationConfig get_generation_config() const;
void set_generation_config(const ImageGenerationConfig& generation_config);

Expand Down
150 changes: 85 additions & 65 deletions src/cpp/src/image_generation/flux_pipeline.hpp

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions src/cpp/src/image_generation/image2image_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "image_generation/stable_diffusion_pipeline.hpp"
#include "image_generation/stable_diffusion_xl_pipeline.hpp"
#include "image_generation/flux_pipeline.hpp"

#include "utils.hpp"

Expand All @@ -22,6 +23,8 @@ Image2ImagePipeline::Image2ImagePipeline(const std::filesystem::path& root_dir)
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else if (class_name == "StableDiffusionXLPipeline") {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else if (class_name == "FluxPipeline") {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else {
OPENVINO_THROW("Unsupported image to image generation pipeline '", class_name, "'");
}
Expand All @@ -34,6 +37,8 @@ Image2ImagePipeline::Image2ImagePipeline(const std::filesystem::path& root_dir,
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else if (class_name == "StableDiffusionXLPipeline") {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else if (class_name == "FluxPipeline") {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else {
OPENVINO_THROW("Unsupported image to image generation pipeline '", class_name, "'");
}
Expand All @@ -44,6 +49,8 @@ Image2ImagePipeline::Image2ImagePipeline(const InpaintingPipeline& pipe) {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, *stable_diffusion_xl);
} else if (auto stable_diffusion = std::dynamic_pointer_cast<StableDiffusionPipeline>(pipe.m_impl); stable_diffusion != nullptr) {
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, *stable_diffusion);
} else if (auto flux = std::dynamic_pointer_cast<FluxPipeline>(pipe.m_impl); flux != nullptr) {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, *flux);
} else {
OPENVINO_ASSERT("Cannot convert specified InpaintingPipeline to Image2ImagePipeline");
}
Expand Down Expand Up @@ -94,6 +101,20 @@ Image2ImagePipeline Image2ImagePipeline::stable_diffusion_xl(
return Image2ImagePipeline(impl);
}

Image2ImagePipeline Image2ImagePipeline::flux(
const std::shared_ptr<Scheduler>& scheduler,
const CLIPTextModel& clip_text_model,
const T5EncoderModel t5_encoder_model,
const FluxTransformer2DModel& transformer,
const AutoencoderKL& vae){
auto impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, clip_text_model, t5_encoder_model, transformer, vae);

assert(scheduler != nullptr);
impl->set_scheduler(scheduler);

return Image2ImagePipeline(impl);
}

ImageGenerationConfig Image2ImagePipeline::get_generation_config() const {
return m_impl->get_generation_config();
}
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/image_generation/image_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ IImageProcessor::IImageProcessor(const std::string& device) :
}

ov::Tensor IImageProcessor::execute(ov::Tensor image) {
OPENVINO_ASSERT(m_request, "ImageProcessor model must be compiled first. Cannot infer non-compiled model");
m_request.set_input_tensor(image);
m_request.infer();
return m_request.get_output_tensor();
Expand Down Expand Up @@ -124,6 +125,7 @@ ImageResizer::ImageResizer(const std::string& device, ov::element::Type type, ov
}

ov::Tensor ImageResizer::execute(ov::Tensor image, int64_t dst_height, int64_t dst_width) {
OPENVINO_ASSERT(m_request, "ImageResizer model must be compiled first. Cannot infer non-compiled model");
ov::Tensor target_spatial_tensor(ov::element::i64, ov::Shape{2});
target_spatial_tensor.data<int64_t>()[0] = dst_height;
target_spatial_tensor.data<int64_t>()[1] = dst_width;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ std::map<std::string, ov::Tensor> EulerAncestralDiscreteScheduler::step(ov::Tens
return {{"latent", prev_sample}, {"denoised", pred_original_sample}};
}

size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const{
size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const {
for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
if (timestep == m_schedule_timesteps[i]) {
return i;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,43 @@ void FlowMatchEulerDiscreteScheduler::add_noise(ov::Tensor init_latent, ov::Tens
OPENVINO_THROW("Not implemented");
}

size_t FlowMatchEulerDiscreteScheduler::_index_for_timestep(float timestep) {
if (m_schedule_timesteps.empty()) {
m_schedule_timesteps = m_timesteps;
}

for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
if (timestep == m_schedule_timesteps[i]) {
return i;
}
}

OPENVINO_THROW("Failed to find index for timestep ", timestep);
}

void FlowMatchEulerDiscreteScheduler::scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) {
OPENVINO_ASSERT(timestep == -1, "Timestep is not computed yet");

size_t index_for_timestep;
if (m_begin_index == -1) {
index_for_timestep = _index_for_timestep(timestep);
} else if (m_step_index != -1) {
index_for_timestep = m_step_index;
} else {
index_for_timestep = m_begin_index;
}

const float sigma = m_sigmas[index_for_timestep];

float * sample_data = sample.data<float>();
const float * noise_data = noise.data<float>();

for (size_t i = 0; i < sample.get_size(); ++i) {
sample_data[i] = sigma * noise_data[i] + (1.0f - sigma) * sample_data[i];
}

}

void FlowMatchEulerDiscreteScheduler::set_timesteps_with_sigma(std::vector<float> sigma, float mu) {
m_timesteps.clear();
m_sigmas.clear();
Expand Down Expand Up @@ -184,5 +221,13 @@ float FlowMatchEulerDiscreteScheduler::calculate_shift(size_t image_seq_len) {
return mu;
}

void FlowMatchEulerDiscreteScheduler::set_begin_index(size_t begin_index) {
m_begin_index = begin_index;
}

size_t FlowMatchEulerDiscreteScheduler::get_begin_index() {
return m_begin_index;
}

} // namespace genai
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,27 @@ class FlowMatchEulerDiscreteScheduler : public IScheduler {

void add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const override;

void scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) override;

float calculate_shift(size_t image_seq_len) override;

void set_begin_index(size_t begin_index) override;

size_t get_begin_index() override;

private:
Config m_config;

std::vector<float> m_sigmas;
std::vector<float> m_timesteps;
std::vector<float> m_timesteps, m_schedule_timesteps;

float m_sigma_min, m_sigma_max;
size_t m_step_index, m_begin_index;
size_t m_num_inference_steps;

void init_step_index();
double sigma_to_t(double simga);
size_t _index_for_timestep(float timestep);
};

} // namespace genai
Expand Down
10 changes: 10 additions & 0 deletions src/cpp/src/image_generation/schedulers/ischeduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ class IScheduler : public Scheduler {
virtual std::vector<float> get_float_timesteps() const {
OPENVINO_THROW("Scheduler doesn't support float timesteps");
}

virtual void scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) {
OPENVINO_THROW("Scheduler doesn't support `scale_noise` method");
}

virtual void set_begin_index(size_t begin_index) {};

virtual size_t get_begin_index() {
OPENVINO_THROW("Scheduler doesn't support `get_begin_index` method");
}
};

} // namespace genai
Expand Down
3 changes: 3 additions & 0 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ class Image2ImagePipeline:
This class is used for generation with image-to-image models.
"""
@staticmethod
def flux(scheduler: Scheduler, clip_text_model: CLIPTextModel, t5_encoder_model: T5EncoderModel, transformer: FluxTransformer2DModel, vae: AutoencoderKL) -> Image2ImagePipeline:
...
@staticmethod
def latent_consistency_model(scheduler: Scheduler, clip_text_model: CLIPTextModel, unet: UNet2DConditionModel, vae: AutoencoderKL) -> Image2ImagePipeline:
...
@staticmethod
Expand Down
1 change: 1 addition & 0 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ void init_image_generation_pipelines(py::module_& m) {
.def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
.def_static("latent_consistency_model", &ov::genai::Image2ImagePipeline::latent_consistency_model, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
.def_static("stable_diffusion_xl", &ov::genai::Image2ImagePipeline::stable_diffusion_xl, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("clip_text_model_with_projection"), py::arg("unet"), py::arg("vae"))
.def_static("flux", &ov::genai::Image2ImagePipeline::flux, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("t5_encoder_model"), py::arg("transformer"), py::arg("vae"))
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
.def(
"compile",
[](ov::genai::Image2ImagePipeline& pipe,
Expand Down
7 changes: 5 additions & 2 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,11 @@ def test_image_model_types(model_id, model_type, backend):
])),
)
def test_image_model_genai(model_id, model_type):
if ("flux" in model_id or "stable-diffusion-3" in model_id) and model_type != "text-to-image":
pytest.skip(reason="FLUX or SD3 are supported as text to image only")
if ("stable-diffusion-3" in model_id) and model_type != "text-to-image":
pytest.skip(reason="SD3 is supported as text to image only")
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved

if ("flux" in model_id) and model_type == "image-inpainting":
pytest.skip(reason="FLUX is not yet supported as image inpainting")

with tempfile.TemporaryDirectory() as temp_dir:
GT_FILE = os.path.join(temp_dir, "gt.csv")
Expand Down
Loading