From 5cc51ad06b40bc08d9f7d3a7d2ee8db16a66c03d Mon Sep 17 00:00:00 2001 From: JRPan <25518778+JRPan@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:51:40 -0700 Subject: [PATCH] remove is_graphics_kernel from kernel_info --- src/abstract_hardware_model.cc | 2 -- src/abstract_hardware_model.h | 1 - src/gpgpu-sim/gpu-sim.cc | 35 +++++++++++++++++----------------- src/gpgpu-sim/shader.cc | 13 ++++++------- 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/abstract_hardware_model.cc b/src/abstract_hardware_model.cc index 9960e3c91..bc05a8d49 100644 --- a/src/abstract_hardware_model.cc +++ b/src/abstract_hardware_model.cc @@ -801,7 +801,6 @@ kernel_info_t::kernel_info_t(dim3 gridDim, dim3 blockDim, num_blocks() * entry->gpgpu_ctx->device_runtime->g_TB_launch_latency; cache_config_set = false; - is_graphic_kernel = false; } /*A snapshot of the texture mappings needs to be stored in the kernel's info as @@ -835,7 +834,6 @@ kernel_info_t::kernel_info_t( cache_config_set = false; m_NameToCudaArray = nameToCudaArray; m_NameToTextureInfo = nameToTextureInfo; - is_graphic_kernel = false; } kernel_info_t::~kernel_info_t() { diff --git a/src/abstract_hardware_model.h b/src/abstract_hardware_model.h index 6233bae35..041f6cae4 100644 --- a/src/abstract_hardware_model.h +++ b/src/abstract_hardware_model.h @@ -378,7 +378,6 @@ class kernel_info_t { unsigned m_kernel_TB_latency; // this used for any CPU-GPU kernel latency and // counted in the gpu_cycle - bool is_graphic_kernel; }; class core_config { diff --git a/src/gpgpu-sim/gpu-sim.cc b/src/gpgpu-sim/gpu-sim.cc index 562f6500c..5ba6582bd 100644 --- a/src/gpgpu-sim/gpu-sim.cc +++ b/src/gpgpu-sim/gpu-sim.cc @@ -918,17 +918,17 @@ void gpgpu_sim::decrement_kernel_latency() { kernel_info_t *gpgpu_sim::select_kernel_inter(unsigned core_id) { // Kernel1 -> SM1, Kernel2 -> SM2 unsigned idx = -1; - unsigned graphics_count = + unsigned split_at = m_config.num_shader() * dynamic_sm_count / concurrent_granularity; - if (core_id < graphics_count) { + if (core_id < split_at) { for (unsigned i = 0; i < m_running_kernels.size(); i++) { unsigned id = i; // (i + m_last_issued_kernel + 1) % m_config.max_concurrent_kernel; if (!m_running_kernels[id]) { continue; } - if (!m_running_kernels[id]->is_graphic_kernel) { + if (!is_graphics(m_running_kernels[id]->get_streamID())) { // if not graphics continue; } @@ -943,7 +943,7 @@ kernel_info_t *gpgpu_sim::select_kernel_inter(unsigned core_id) { if (!m_running_kernels[id]) { continue; } - if (m_running_kernels[id]->is_graphic_kernel) { + if (is_graphics(m_running_kernels[id]->get_streamID())) { // if this graphics continue; } @@ -1849,10 +1849,11 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, unsigned int padded_cta_size = threads_per_cta; unsigned int warp_size = m_config->warp_size; bool overrided = true; + bool is_graphic = m_gpu->is_graphics(k.get_streamID()); if (padded_cta_size % warp_size) padded_cta_size = ((padded_cta_size / warp_size) + 1) * (warp_size); if (find_available_hwtid(padded_cta_size, false) == -1) return false; - if (!k.is_graphic_kernel) { + if (!is_graphic) { // these values are used for local memory mapping // only compute kernels uses local memory // so this is a little hack. May be a issue in the future. FIXME @@ -1868,12 +1869,12 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, if (m_gpu->slicer_sampled) { graphics_count = m_gpu->dynamic_sm_count; } else { - if (k.is_graphic_kernel) { + if (is_graphic) { if (get_cluster_id() >= m_config->num_shader() / 2) { return false; } graphics_count = (get_cluster_id() + 1) * 2; - } else if (!k.is_graphic_kernel) { + } else if (!is_graphic) { if (get_cluster_id() < m_config->num_shader() / 2) { return false; } @@ -1896,16 +1897,16 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, m_gpu->concurrent_granularity; bool limited_reg = true; bool limited_shmem = true; - if ((k.is_graphic_kernel && m_running_compute) || - (!k.is_graphic_kernel && m_running_graphics)) { + if ((is_graphic && m_running_compute) || + (!is_graphic && m_running_graphics)) { unsigned graphics_cta_size = 0; unsigned compute_cta_size = 0; const struct gpgpu_ptx_sim_info *kernel_g = NULL; const struct gpgpu_ptx_sim_info *kernel_c = NULL; - if (k.is_graphic_kernel && m_running_compute) { + if (is_graphic && m_running_compute) { graphics_cta_size = threads_per_cta; compute_cta_size = m_running_compute->threads_per_cta(); - } else if (!k.is_graphic_kernel && m_running_graphics) { + } else if (!is_graphic && m_running_graphics) { graphics_cta_size = m_running_graphics->threads_per_cta(); compute_cta_size = threads_per_cta; } @@ -1920,10 +1921,10 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, unsigned compute_cta = (m_config->n_thread_per_shader - max_graphics_threads) / compute_cta_size; - if (k.is_graphic_kernel && m_running_compute) { + if (is_graphic && m_running_compute) { kernel_g = kernel_info; kernel_c = ptx_sim_kernel_info(m_running_compute->entry()); - } else if (!k.is_graphic_kernel && m_running_graphics) { + } else if (!is_graphic && m_running_graphics) { kernel_g = ptx_sim_kernel_info(m_running_graphics->entry()); kernel_c = kernel_info; } @@ -1975,7 +1976,7 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, m_gpu->concurrent_granularity; } } - if (k.is_graphic_kernel) { + if (is_graphic) { if (m_occupied_graphics_threads + padded_cta_size > max_graphics_threads) return false; @@ -2036,7 +2037,7 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k, m_occupied_shmem += kernel_info->smem; m_occupied_regs += used_regs; m_occupied_ctas++; - if (k.is_graphic_kernel) { + if (is_graphic) { m_occupied_graphics_threads += padded_cta_size; m_occupied_graphics_shmem += kernel_info->smem; m_occupied_graphics_regs += used_regs; @@ -2089,7 +2090,7 @@ void shader_core_ctx::release_shader_resource_1block(unsigned hw_ctaid, assert(m_occupied_ctas >= 1); m_occupied_ctas--; - if (k.is_graphic_kernel) { + if (m_gpu->is_graphics(k.get_streamID())) { assert(m_occupied_graphics_threads >= padded_cta_size); m_occupied_graphics_threads -= padded_cta_size; @@ -2127,7 +2128,7 @@ void shader_core_ctx::issue_block2core(kernel_info_t &kernel) { set_max_cta(kernel); else { assert(occupy_shader_resource_1block(kernel, true)); - if (kernel.is_graphic_kernel) { + if (m_gpu->is_graphics(kernel.get_streamID())) { m_running_graphics = &kernel; } else { m_running_compute = &kernel; diff --git a/src/gpgpu-sim/shader.cc b/src/gpgpu-sim/shader.cc index 4827de224..7f76d5444 100644 --- a/src/gpgpu-sim/shader.cc +++ b/src/gpgpu-sim/shader.cc @@ -576,7 +576,7 @@ void shader_core_ctx::init_warps(unsigned cta_id, unsigned start_thread, m_warp[i]->init(start_pc, cta_id, ctaid, i, active_threads, m_dynamic_warp_id, kernel.get_streamID(), - kernel.is_graphic_kernel); + m_gpu->is_graphics(kernel.get_streamID())); ++m_dynamic_warp_id; m_not_completed += n_active; ++m_active_warps; @@ -3033,8 +3033,8 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num, release_shader_resource_1block(cta_num, *kernel); kernel->dec_running(); // invalidate vertices - if (kernel->is_graphic_kernel && - m_gpu->getShaderCoreConfig()->gpgpu_invalidate_l2) { + bool is_graphics = m_gpu->is_graphics(kernel->get_streamID()); + if (is_graphics && m_gpu->getShaderCoreConfig()->gpgpu_invalidate_l2) { unsigned kernel_id = kernel->get_uid(); for (unsigned vb = 0; vb < m_gpu->vb_addr[kernel_id].size(); vb++) { unsigned ctaid = kernelcta_id; @@ -3047,8 +3047,7 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num, unsigned start_addr = m_gpu->vb_addr[kernel_id][vb] + ctaid * size_per_cta; if (((ctaid + 1) * size_per_cta < vb_size) && size_per_cta != 0) { - m_gpu->invalidate_l2_range(start_addr, size_per_cta, - kernel->is_graphic_kernel); + m_gpu->invalidate_l2_range(start_addr, size_per_cta, is_graphics); } } } @@ -3062,7 +3061,7 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num, if (m_kernel == kernel) m_kernel = NULL; m_gpu->set_kernel_done(kernel); - if (kernel->is_graphic_kernel) { + if (is_graphics) { m_running_graphics = NULL; } else { m_running_compute = NULL; @@ -4640,7 +4639,7 @@ unsigned simt_core_cluster::issue_block2core() { // (m_core[core]->get_n_active_cta() < // m_config->max_cta(*kernel)) ) { m_core[core]->can_issue_1block(*kernel)) { - if (kernel->is_graphic_kernel) { + if (m_gpu->is_graphics(kernel->get_streamID())) { unsigned kernel_id = kernel->get_uid(); for (unsigned vb = 0; vb < m_gpu->vb_addr[kernel_id].size(); vb++) {