Skip to content

Commit

Permalink
[CPU] [ARM] [INT8] FullyConnected (#25171)
Browse files Browse the repository at this point in the history
### Details:
 - *[ARM] [INT8] FullyConnected*

### Tickets:
 - *CVS-149494*

---------

Co-authored-by: Aleksandr Voron <[email protected]>
  • Loading branch information
eshoguli and alvoron authored Dec 18, 2024
1 parent 87370fe commit 9ff5942
Show file tree
Hide file tree
Showing 90 changed files with 1,181 additions and 478 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1897,4 +1897,4 @@ bool NetworkHelper::checkConstantNotInf(const std::shared_ptr<Node> constant_nod
}
} // namespace low_precision
} // namespace pass
} // namespace ov
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
std::vector<element::Type> weights_types{ov::element::i8};

auto activations_m = pattern::any_input(ov::pass::pattern::type_matches_any(activation_types));
auto weights_m = wrap_type<ov::op::v0::Constant>(ov::pass::pattern::type_matches_any(weights_types));
auto weights_m = pattern::any_input();
auto bias_m = pattern::any_input();

auto fully_connected_m = wrap_type<ov::op::internal::FullyConnected>({activations_m, weights_m, bias_m});
Expand All @@ -43,7 +43,8 @@ ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
const auto& fc_output_shape = fc_output.get_partial_shape();
const auto& multiply_output_shape = multiply.get_partial_shape();

if (*fc_output_shape.rbegin() != *multiply_output_shape.rbegin()) {
if (*fc_output_shape.rbegin() != *multiply_output_shape.rbegin() ||
!ov::op::util::is_on_constant_path(weights)) {
return false;
}

Expand Down
47 changes: 1 addition & 46 deletions src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "cpu_types.h"
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "nodes/executors/common/common_utils.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "openvino/core/type/element_type.hpp"
#include "utils/cpu_utils.hpp"
Expand All @@ -21,52 +22,6 @@
namespace ov {
namespace intel_cpu {

static std::vector<float> getDeQuantizedScales(const MemoryArgs& memory) {
if (!memory.count(ARG_DST_DEQ_SCALE))
return {};

auto scalesMemory = memory.at(ARG_DST_DEQ_SCALE);

auto scalesData = static_cast<const float*>(scalesMemory->getData());

if (!scalesData)
return {};

auto dstShape = memory.at(ARG_DST)->getShape();
auto dqScalesShape = scalesMemory->getShape();

auto scalesDims = getNormalizedDimsBySize(dqScalesShape.getDims(), dstShape.getDims().size());

auto scaleSize = std::accumulate(scalesDims.begin(), scalesDims.end(), std::size_t(1), std::multiplies<size_t>());

std::vector<float> DQScales(scaleSize, 1.0);

OPENVINO_ASSERT(scaleSize == 1 || DQScales.size() == 1 || DQScales.size() == scaleSize,
"set invalid scales size , DQScales vector size: ",
DQScales.size(),
", scale data size: ",
scaleSize);

// @todo do we really need to broadcast dq scales and then resize them back?
if (scaleSize > DQScales.size())
DQScales.resize(scaleSize, DQScales[0]);
if (1 == scaleSize) {
std::transform(DQScales.begin(), DQScales.end(), DQScales.begin(), [=](float val) {
return (scalesData[0] * val);
});
} else {
for (size_t i = 0; i < DQScales.size(); i++) {
DQScales[i] *= scalesData[i];
}
}
if (std::all_of(DQScales.begin(), DQScales.end(), [&](float val) {
return (val == DQScales[0]);
}))
DQScales.resize(1);

return DQScales;
}

DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
const dnnl::engine& engine,
const VectorDims& outputDims,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
namespace ov {
namespace intel_cpu {

static const std::unordered_map<int, ACLArgs> argConvert = {
{ARG_SRC_0, ACL_SRC_0},
{ARG_SRC_1, ACL_SRC_1},
{ARG_SRC_2, ACL_SRC_2},
{ARG_BIAS, ACL_BIAS},
{ARG_WEI, ACL_WEI},
{ARG_DST, ACL_DST},
};
static const std::unordered_map<int, ACLArgs> argConvert = {{ARG_SRC_0, ACL_SRC_0},
{ARG_SRC_1, ACL_SRC_1},
{ARG_SRC_2, ACL_SRC_2},
{ARG_BIAS, ACL_BIAS},
{ARG_WEI, ACL_WEI},
{ARG_DST, ACL_DST},
{ARG_DST_DEQ_SCALE, ACL_DST_DEQ_SCALE}};

using ACLTypes = std::array<arm_compute::DataType, ACLArgs::COUNT_OF_ARGS>;
using ACLLayouts = std::array<arm_compute::DataLayout, ACLArgs::COUNT_OF_ARGS>;
Expand All @@ -39,9 +38,9 @@ static void initACLTensorParams(const MemoryPtr& memoryPtr,
}
}

static std::shared_ptr<arm_compute::TensorInfo> initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
std::shared_ptr<arm_compute::TensorInfo> ACLCommonExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
std::shared_ptr<arm_compute::TensorInfo> aclMemoryInfo = nullptr;
if (dataType != arm_compute::DataType::UNKNOWN) {
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(tensorShape, 1, dataType, dataLayout);
Expand Down Expand Up @@ -70,6 +69,9 @@ bool ACLCommonExecutor::update(const MemoryArgs& memory) {
ACLTypes aclDataType{};
ACLLayouts aclDataLayout{};
for (auto& cpu_mem_ptr : memory) {
if (cpu_mem_ptr.second->getSize() == 0) {
continue;
}
const ACLArgs index = argConvert.at(cpu_mem_ptr.first);
initACLTensorParams(cpu_mem_ptr.second,
aclTensorAttrs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace ov {
namespace intel_cpu {

enum ACLArgs { ACL_SRC_0, ACL_SRC_1, ACL_SRC_2, ACL_BIAS, ACL_WEI, ACL_DST, COUNT_OF_ARGS };
enum ACLArgs { ACL_SRC_0, ACL_SRC_1, ACL_SRC_2, ACL_BIAS, ACL_WEI, ACL_DST, ACL_DST_DEQ_SCALE, COUNT_OF_ARGS };

using ACLFunction = std::unique_ptr<arm_compute::IFunction>;
using ACLShapes = std::array<arm_compute::TensorShape, ACLArgs::COUNT_OF_ARGS>;
Expand Down Expand Up @@ -42,6 +42,9 @@ class ACLCommonExecutor : public Executor {

protected:
ACLTensorAttrs aclTensorAttrs;
virtual std::shared_ptr<arm_compute::TensorInfo> initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout);

private:
ACLTensors aclMemoryTensors;
Expand Down
Loading

0 comments on commit 9ff5942

Please sign in to comment.