Skip to content

Commit

Permalink
remove is_graphics_kernel from kernel_info
Browse files Browse the repository at this point in the history
  • Loading branch information
JRPan committed Oct 24, 2024
1 parent 591f477 commit 5cc51ad
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 27 deletions.
2 changes: 0 additions & 2 deletions src/abstract_hardware_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,6 @@ kernel_info_t::kernel_info_t(dim3 gridDim, dim3 blockDim,
num_blocks() * entry->gpgpu_ctx->device_runtime->g_TB_launch_latency;

cache_config_set = false;
is_graphic_kernel = false;
}

/*A snapshot of the texture mappings needs to be stored in the kernel's info as
Expand Down Expand Up @@ -835,7 +834,6 @@ kernel_info_t::kernel_info_t(
cache_config_set = false;
m_NameToCudaArray = nameToCudaArray;
m_NameToTextureInfo = nameToTextureInfo;
is_graphic_kernel = false;
}

kernel_info_t::~kernel_info_t() {
Expand Down
1 change: 0 additions & 1 deletion src/abstract_hardware_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ class kernel_info_t {

unsigned m_kernel_TB_latency; // this used for any CPU-GPU kernel latency and
// counted in the gpu_cycle
bool is_graphic_kernel;
};

class core_config {
Expand Down
35 changes: 18 additions & 17 deletions src/gpgpu-sim/gpu-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -918,17 +918,17 @@ void gpgpu_sim::decrement_kernel_latency() {
kernel_info_t *gpgpu_sim::select_kernel_inter(unsigned core_id) {
// Kernel1 -> SM1, Kernel2 -> SM2
unsigned idx = -1;
unsigned graphics_count =
unsigned split_at =
m_config.num_shader() * dynamic_sm_count / concurrent_granularity;

if (core_id < graphics_count) {
if (core_id < split_at) {
for (unsigned i = 0; i < m_running_kernels.size(); i++) {
unsigned id = i;
// (i + m_last_issued_kernel + 1) % m_config.max_concurrent_kernel;
if (!m_running_kernels[id]) {
continue;
}
if (!m_running_kernels[id]->is_graphic_kernel) {
if (!is_graphics(m_running_kernels[id]->get_streamID())) {
// if not graphics
continue;
}
Expand All @@ -943,7 +943,7 @@ kernel_info_t *gpgpu_sim::select_kernel_inter(unsigned core_id) {
if (!m_running_kernels[id]) {
continue;
}
if (m_running_kernels[id]->is_graphic_kernel) {
if (is_graphics(m_running_kernels[id]->get_streamID())) {
// if this graphics
continue;
}
Expand Down Expand Up @@ -1849,10 +1849,11 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
unsigned int padded_cta_size = threads_per_cta;
unsigned int warp_size = m_config->warp_size;
bool overrided = true;
bool is_graphic = m_gpu->is_graphics(k.get_streamID());
if (padded_cta_size % warp_size)
padded_cta_size = ((padded_cta_size / warp_size) + 1) * (warp_size);
if (find_available_hwtid(padded_cta_size, false) == -1) return false;
if (!k.is_graphic_kernel) {
if (!is_graphic) {
// these values are used for local memory mapping
// only compute kernels uses local memory
// so this is a little hack. May be a issue in the future. FIXME
Expand All @@ -1868,12 +1869,12 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
if (m_gpu->slicer_sampled) {
graphics_count = m_gpu->dynamic_sm_count;
} else {
if (k.is_graphic_kernel) {
if (is_graphic) {
if (get_cluster_id() >= m_config->num_shader() / 2) {
return false;
}
graphics_count = (get_cluster_id() + 1) * 2;
} else if (!k.is_graphic_kernel) {
} else if (!is_graphic) {
if (get_cluster_id() < m_config->num_shader() / 2) {
return false;
}
Expand All @@ -1896,16 +1897,16 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
m_gpu->concurrent_granularity;
bool limited_reg = true;
bool limited_shmem = true;
if ((k.is_graphic_kernel && m_running_compute) ||
(!k.is_graphic_kernel && m_running_graphics)) {
if ((is_graphic && m_running_compute) ||
(!is_graphic && m_running_graphics)) {
unsigned graphics_cta_size = 0;
unsigned compute_cta_size = 0;
const struct gpgpu_ptx_sim_info *kernel_g = NULL;
const struct gpgpu_ptx_sim_info *kernel_c = NULL;
if (k.is_graphic_kernel && m_running_compute) {
if (is_graphic && m_running_compute) {
graphics_cta_size = threads_per_cta;
compute_cta_size = m_running_compute->threads_per_cta();
} else if (!k.is_graphic_kernel && m_running_graphics) {
} else if (!is_graphic && m_running_graphics) {
graphics_cta_size = m_running_graphics->threads_per_cta();
compute_cta_size = threads_per_cta;
}
Expand All @@ -1920,10 +1921,10 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
unsigned compute_cta =
(m_config->n_thread_per_shader - max_graphics_threads) /
compute_cta_size;
if (k.is_graphic_kernel && m_running_compute) {
if (is_graphic && m_running_compute) {
kernel_g = kernel_info;
kernel_c = ptx_sim_kernel_info(m_running_compute->entry());
} else if (!k.is_graphic_kernel && m_running_graphics) {
} else if (!is_graphic && m_running_graphics) {
kernel_g = ptx_sim_kernel_info(m_running_graphics->entry());
kernel_c = kernel_info;
}
Expand Down Expand Up @@ -1975,7 +1976,7 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
m_gpu->concurrent_granularity;
}
}
if (k.is_graphic_kernel) {
if (is_graphic) {
if (m_occupied_graphics_threads + padded_cta_size > max_graphics_threads)
return false;

Expand Down Expand Up @@ -2036,7 +2037,7 @@ bool shader_core_ctx::occupy_shader_resource_1block(kernel_info_t &k,
m_occupied_shmem += kernel_info->smem;
m_occupied_regs += used_regs;
m_occupied_ctas++;
if (k.is_graphic_kernel) {
if (is_graphic) {
m_occupied_graphics_threads += padded_cta_size;
m_occupied_graphics_shmem += kernel_info->smem;
m_occupied_graphics_regs += used_regs;
Expand Down Expand Up @@ -2089,7 +2090,7 @@ void shader_core_ctx::release_shader_resource_1block(unsigned hw_ctaid,

assert(m_occupied_ctas >= 1);
m_occupied_ctas--;
if (k.is_graphic_kernel) {
if (m_gpu->is_graphics(k.get_streamID())) {
assert(m_occupied_graphics_threads >= padded_cta_size);
m_occupied_graphics_threads -= padded_cta_size;

Expand Down Expand Up @@ -2127,7 +2128,7 @@ void shader_core_ctx::issue_block2core(kernel_info_t &kernel) {
set_max_cta(kernel);
else {
assert(occupy_shader_resource_1block(kernel, true));
if (kernel.is_graphic_kernel) {
if (m_gpu->is_graphics(kernel.get_streamID())) {
m_running_graphics = &kernel;
} else {
m_running_compute = &kernel;
Expand Down
13 changes: 6 additions & 7 deletions src/gpgpu-sim/shader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ void shader_core_ctx::init_warps(unsigned cta_id, unsigned start_thread,

m_warp[i]->init(start_pc, cta_id, ctaid, i, active_threads,
m_dynamic_warp_id, kernel.get_streamID(),
kernel.is_graphic_kernel);
m_gpu->is_graphics(kernel.get_streamID()));
++m_dynamic_warp_id;
m_not_completed += n_active;
++m_active_warps;
Expand Down Expand Up @@ -3033,8 +3033,8 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num,
release_shader_resource_1block(cta_num, *kernel);
kernel->dec_running();
// invalidate vertices
if (kernel->is_graphic_kernel &&
m_gpu->getShaderCoreConfig()->gpgpu_invalidate_l2) {
bool is_graphics = m_gpu->is_graphics(kernel->get_streamID());
if (is_graphics && m_gpu->getShaderCoreConfig()->gpgpu_invalidate_l2) {
unsigned kernel_id = kernel->get_uid();
for (unsigned vb = 0; vb < m_gpu->vb_addr[kernel_id].size(); vb++) {
unsigned ctaid = kernelcta_id;
Expand All @@ -3047,8 +3047,7 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num,
unsigned start_addr =
m_gpu->vb_addr[kernel_id][vb] + ctaid * size_per_cta;
if (((ctaid + 1) * size_per_cta < vb_size) && size_per_cta != 0) {
m_gpu->invalidate_l2_range(start_addr, size_per_cta,
kernel->is_graphic_kernel);
m_gpu->invalidate_l2_range(start_addr, size_per_cta, is_graphics);
}
}
}
Expand All @@ -3062,7 +3061,7 @@ void shader_core_ctx::register_cta_thread_exit(unsigned cta_num,

if (m_kernel == kernel) m_kernel = NULL;
m_gpu->set_kernel_done(kernel);
if (kernel->is_graphic_kernel) {
if (is_graphics) {
m_running_graphics = NULL;
} else {
m_running_compute = NULL;
Expand Down Expand Up @@ -4640,7 +4639,7 @@ unsigned simt_core_cluster::issue_block2core() {
// (m_core[core]->get_n_active_cta() <
// m_config->max_cta(*kernel)) ) {
m_core[core]->can_issue_1block(*kernel)) {
if (kernel->is_graphic_kernel) {
if (m_gpu->is_graphics(kernel->get_streamID())) {
unsigned kernel_id = kernel->get_uid();

for (unsigned vb = 0; vb < m_gpu->vb_addr[kernel_id].size(); vb++) {
Expand Down

0 comments on commit 5cc51ad

Please sign in to comment.