diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp index 5350df32742e9..55a720f200f48 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp @@ -347,9 +347,10 @@ bool IoctlHelperXe::setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) { } void IoctlHelperXe::getTopologyData(size_t nTiles, std::vector> *geomDss, std::vector> *computeDss, - std::vector> *euDss, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty) { + std::vector> *euDss, std::vector> *l3BanksMask, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty) { int subSliceCount = 0; int euPerDss = 0; + int l3Banks = 0; for (auto tileId = 0u; tileId < nTiles; tileId++) { @@ -371,9 +372,15 @@ void IoctlHelperXe::getTopologyData(size_t nTiles, std::vector> * euPerDssPerTile += euDss[tileId][byte].count(); } + int l3BanksPerTile = 0; + for (auto byte = 0u; byte < l3BanksMask[tileId].size(); byte++) { + l3BanksPerTile += l3BanksMask[tileId][byte].count(); + } + // pick smallest config subSliceCount = (subSliceCount == 0) ? subSliceCountPerTile : std::min(subSliceCount, subSliceCountPerTile); euPerDss = (euPerDss == 0) ? euPerDssPerTile : std::min(euPerDss, euPerDssPerTile); + l3Banks = (l3Banks == 0) ? l3BanksPerTile : std::min(l3Banks, l3BanksPerTile); // pick max config topologyData.maxSubSliceCount = std::max(topologyData.maxSubSliceCount, subSliceCountPerTile); @@ -384,6 +391,7 @@ void IoctlHelperXe::getTopologyData(size_t nTiles, std::vector> * topologyData.subSliceCount = subSliceCount; topologyData.euCount = subSliceCount * euPerDss; topologyData.maxSliceCount = 1; + topologyData.numL3Banks = l3Banks; } void IoctlHelperXe::getTopologyMap(size_t nTiles, std::vector> *dssInfo, TopologyMap &topologyMap) { @@ -420,6 +428,7 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo StackVec>, 2> geomDss; StackVec>, 2> computeDss; StackVec>, 2> euDss; + StackVec>, 2> l3Banks; auto topologySize = queryGtTopology.size(); auto dataPtr = queryGtTopology.data(); @@ -428,6 +437,7 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo geomDss.resize(numTiles); computeDss.resize(numTiles); euDss.resize(numTiles); + l3Banks.resize(numTiles); bool receivedDssInfo = false; bool receivedEuPerDssInfo = false; while (topologySize >= sizeof(drm_xe_query_topology_mask)) { @@ -451,6 +461,9 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo fillMask(euDss[tileId], topo); receivedEuPerDssInfo = true; break; + case DRM_XE_TOPO_L3_BANK: + fillMask(l3Banks[tileId], topo); + break; default: xeLog("Unhandle GT Topo type: %d\n", topo->type); } @@ -462,7 +475,7 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo } bool isComputeDssEmpty = false; - getTopologyData(numTiles, geomDss.begin(), computeDss.begin(), euDss.begin(), topologyData, isComputeDssEmpty); + getTopologyData(numTiles, geomDss.begin(), computeDss.begin(), euDss.begin(), l3Banks.begin(), topologyData, isComputeDssEmpty); auto &dssInfo = isComputeDssEmpty ? geomDss : computeDss; getTopologyMap(numTiles, dssInfo.begin(), topologyMap); diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h index 1e1c54e4c401a..0bbffe1076a11 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h @@ -105,7 +105,7 @@ class IoctlHelperXe : public IoctlHelper { bool isWaitBeforeBindRequired(bool bind) const override; std::unique_ptr createEngineInfo(bool isSysmanEnabled) override; std::unique_ptr createMemoryInfo() override; - void getTopologyData(size_t nTiles, std::vector> *geomDss, std::vector> *computeDss, std::vector> *euDss, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty); + void getTopologyData(size_t nTiles, std::vector> *geomDss, std::vector> *computeDss, std::vector> *euDss, std::vector> *l3BanksMask, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty); void getTopologyMap(size_t nTiles, std::vector> *dssInfo, TopologyMap &topologyMap); bool setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) override; diff --git a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp index fc68fee8e0536..11eef3a96b0a7 100644 --- a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp @@ -733,6 +733,31 @@ TEST(IoctlHelperXeTest, givenGeomDssWhenGetTopologyDataAndMapThenResultsAreCorre } } +TEST(IoctlHelperXeTest, givenL3BankMaskInQueryTopolgyWhenGetTopologyDataAndMapThenProperNumberOfL3BankIsSet) { + + auto executionEnvironment = std::make_unique(); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto &hwInfo = *executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); + auto xeIoctlHelper = std::make_unique(drm); + + xeIoctlHelper->initialize(); + + for (auto gtId = 0u; gtId < 3u; gtId++) { + drm.addMockedQueryTopologyData(gtId, DRM_XE_TOPO_DSS_GEOMETRY, 8, {0b11'1111, 0, 0, 0, 0, 0, 0, 0}); + drm.addMockedQueryTopologyData(gtId, DRM_XE_TOPO_DSS_COMPUTE, 8, {0, 0, 0, 0, 0, 0, 0, 0}); + drm.addMockedQueryTopologyData(gtId, DRM_XE_TOPO_EU_PER_DSS, 8, {0b1111'1111, 0b1111'1111, 0, 0, 0, 0, 0, 0}); + drm.addMockedQueryTopologyData(gtId, DRM_XE_TOPO_L3_BANK, 4, {0b1110'1011, 0b1111'1100, 0, 0}); + } + DrmQueryTopologyData topologyData{}; + TopologyMap topologyMap{}; + + auto result = xeIoctlHelper->getTopologyDataAndMap(hwInfo, topologyData, topologyMap); + ASSERT_TRUE(result); + + // verify topology data + EXPECT_EQ(12, topologyData.numL3Banks); +} + TEST(IoctlHelperXeTest, givenUnknownTopologyTypeWhenGetTopologyDataAndMapThenNotRecognizedTopologyIsIgnored) { auto executionEnvironment = std::make_unique(); diff --git a/third_party/uapi/xe/xe_drm.h b/third_party/uapi/xe/xe_drm.h index 6c98c3dfa8909..e9591def3ffcc 100644 --- a/third_party/uapi/xe/xe_drm.h +++ b/third_party/uapi/xe/xe_drm.h @@ -500,6 +500,7 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: @@ -510,9 +511,10 @@ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ __u16 gt_id; -#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0) -#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1) -#define DRM_XE_TOPO_EU_PER_DSS (1 << 2) +#define DRM_XE_TOPO_DSS_GEOMETRY 1 +#define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_L3_BANK 3 +#define DRM_XE_TOPO_EU_PER_DSS 4 /** @type: type of mask */ __u16 type;