diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp index f5f8c53309..6dd2b2f910 100644 --- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp +++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp @@ -56,13 +56,13 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline { PipelineMetrics get_metrics() const; - GenerationHandle add_request(uint64_t request_id, std::string prompt, ov::genai::GenerationConfig sampling_params); + GenerationHandle add_request(uint64_t request_id, const std::string& prompt, ov::genai::GenerationConfig sampling_params); void step(); bool has_non_finished_requests(); // more high level interface, which can process multiple prompts in continuous batching manner - std::vector generate(const std::vector& prompts, std::vector sampling_params); + std::vector generate(const std::vector& prompts, const std::vector& sampling_params); }; } diff --git a/src/cpp/include/openvino/genai/generation_handle.hpp b/src/cpp/include/openvino/genai/generation_handle.hpp index d0ddbc3a32..5fd0fa5fe2 100644 --- a/src/cpp/include/openvino/genai/generation_handle.hpp +++ b/src/cpp/include/openvino/genai/generation_handle.hpp @@ -47,7 +47,7 @@ class OPENVINO_GENAI_EXPORTS GenerationHandleImpl { public: GenerationHandleImpl(std::shared_ptr generation_stream, const ov::genai::GenerationConfig& sampling_params) : - m_generation_stream(generation_stream), + m_generation_stream(std::move(generation_stream)), m_sampling_params(sampling_params) {}; ~GenerationHandleImpl(); diff --git a/src/cpp/src/block_manager.hpp b/src/cpp/src/block_manager.hpp index ab60b7f5ff..489f4e6159 100644 --- a/src/cpp/src/block_manager.hpp +++ b/src/cpp/src/block_manager.hpp @@ -257,7 +257,7 @@ class BlockManager { } bool can_append_slots(SequenceGroup::CPtr seq_group) { - return required_blocks_count(seq_group) <= m_allocator.num_free_blocks(); + return required_blocks_count(std::move(seq_group)) <= m_allocator.num_free_blocks(); } size_t required_blocks_count(SequenceGroup::CPtr seq_group) { @@ -336,7 +336,7 @@ class BlockManager { // write information about block forking for later usage in CacheManager copy_blocks_map[last_block->get_index()].push_back(new_block->get_index()); // release `last_block` usage - m_allocator.free(last_block); + m_allocator.free(std::move(last_block)); } else { // nothing to do, because we are the only users of this block } diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp index 55100f3cb4..5418a14242 100644 --- a/src/cpp/src/continuous_batching_pipeline.cpp +++ b/src/cpp/src/continuous_batching_pipeline.cpp @@ -238,7 +238,7 @@ class ContinuousBatchingPipeline::Impl { return !m_awaiting_requests.empty() || !m_requests.empty(); } - std::vector generate(const std::vector prompts, std::vector sampling_params) { + std::vector generate(const std::vector& prompts, const std::vector& sampling_params) { OPENVINO_ASSERT(!has_non_finished_requests(), "Generate cannot be called while ContinuousBatchingPipeline is already in running state. Use ContinuousBatchingPipeline::add_request"); OPENVINO_ASSERT(prompts.size() == sampling_params.size()); @@ -307,8 +307,8 @@ PipelineMetrics ContinuousBatchingPipeline::get_metrics() const{ return m_impl->get_metrics(); } -GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, std::string prompt, ov::genai::GenerationConfig sampling_params) { - return m_impl->add_request(request_id, prompt, sampling_params); +GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id, const std::string& prompt, ov::genai::GenerationConfig sampling_params) { + return m_impl->add_request(request_id, prompt, std::move(sampling_params)); } void ContinuousBatchingPipeline::step() { @@ -319,6 +319,6 @@ bool ContinuousBatchingPipeline::has_non_finished_requests() { return m_impl->has_non_finished_requests(); } -std::vector ContinuousBatchingPipeline::generate(const std::vector& prompts, std::vector sampling_params) { +std::vector ContinuousBatchingPipeline::generate(const std::vector& prompts, const std::vector& sampling_params) { return m_impl->generate(prompts, sampling_params); } \ No newline at end of file diff --git a/src/cpp/src/generation_stream.hpp b/src/cpp/src/generation_stream.hpp index 0d51897e82..57cb7253c9 100644 --- a/src/cpp/src/generation_stream.hpp +++ b/src/cpp/src/generation_stream.hpp @@ -27,7 +27,7 @@ class GenerationStream { } void push(GenerationOutputs outputs) { - m_output_queue.push(outputs); + m_output_queue.push(std::move(outputs)); } // Retriving vector of pairs as we can generate multiple outputs for a single prompt diff --git a/src/cpp/src/model_runner.hpp b/src/cpp/src/model_runner.hpp index 5fb2e0f524..e2a4bc1aa7 100644 --- a/src/cpp/src/model_runner.hpp +++ b/src/cpp/src/model_runner.hpp @@ -19,7 +19,7 @@ class ModelRunner { SchedulerConfig m_scheduler_config; public: ModelRunner(ov::InferRequest request, const SchedulerConfig& scheduler_config) : - m_request(request), + m_request(std::move(request)), m_scheduler_config(scheduler_config) { } ov::InferRequest get_infer_request() const { diff --git a/src/cpp/src/sampler.hpp b/src/cpp/src/sampler.hpp index 65c17b4961..1d59a85ac6 100644 --- a/src/cpp/src/sampler.hpp +++ b/src/cpp/src/sampler.hpp @@ -96,7 +96,7 @@ struct Beam { float m_score = -std::numeric_limits::infinity(); Beam(Sequence::Ptr sequence) - : m_sequence(sequence) { } + : m_sequence(std::move(sequence)) { } size_t get_generated_len() const { return m_sequence->get_generated_len(); diff --git a/src/cpp/src/sequence_group.hpp b/src/cpp/src/sequence_group.hpp index 88b86b4484..f5f1bb1db5 100644 --- a/src/cpp/src/sequence_group.hpp +++ b/src/cpp/src/sequence_group.hpp @@ -371,7 +371,7 @@ class SequenceGroup { } Sequence::Ptr fork_sequence(Sequence::CPtr sequence) { - m_sequences.emplace_back(Sequence::fork(sequence, m_next_sequence_id++)); + m_sequences.emplace_back(Sequence::fork(std::move(sequence), m_next_sequence_id++)); return m_sequences.back(); } @@ -433,7 +433,7 @@ class SequenceGroup { output.score = sequence->get_beam_search_score(m_sampling_params); outputs.emplace(sequence->get_grouped_id(), output); } - m_generation_stream->push(outputs); + m_generation_stream->push(std::move(outputs)); } void push_partial_outputs() { @@ -445,7 +445,7 @@ class SequenceGroup { const auto last_gen_token = sequence->get_last_generation_output(); outputs.emplace(sequence->get_grouped_id(), last_gen_token); } - m_generation_stream->push(outputs); + m_generation_stream->push(std::move(outputs)); } void notify_handle() {