Skip to content

Commit

Permalink
Changed demo_cdpSimplePrint.py to use DynamicSourceModule.
Browse files Browse the repository at this point in the history
Using Context.set_limit() now.
Needed a missing limit enum CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH (in CUDA8/v8.0/include/cuda.h[975], available since 3.5), added CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH and CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT (same) to pycuda.driver.limit.
  • Loading branch information
Lurch authored and Lurch committed Jan 13, 2017
1 parent e990a4e commit e03e9f7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 11 deletions.
14 changes: 3 additions & 11 deletions examples/demo_cdpSimplePrint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
import sys, os
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import JitLinkModule
from pycuda.driver import jit_input_type
from pycuda.compiler import DynamicSourceModule

cdpSimplePrint_cu = '''
#include <cstdio>
Expand Down Expand Up @@ -96,11 +95,7 @@ def main(argv):

print("starting Simple Print (CUDA Dynamic Parallelism)")

mod = JitLinkModule()
mod.add_source(cdpSimplePrint_cu, nvcc_options=['-O3', '-rdc=true', '-lcudadevrt'])
mod.add_stdlib('cudadevrt')
mod.link()

mod = DynamicSourceModule(cdpSimplePrint_cu)
cdp_kernel = mod.get_function('cdp_kernel').prepare('iiii').prepared_call

print("***************************************************************************")
Expand All @@ -113,14 +108,11 @@ def main(argv):
num_blocks *= 4
print("+%d" % num_blocks)
sum += num_blocks

print("=%d blocks are launched!!! (%d from the GPU)" % (sum, sum-2))
print("***************************************************************************\n")

# TODO: cudaDeviceSetLimit() is not available on PyCuda, works anyway on my GeForce GTX 980; maybe add that function?
#cudaDeviceSetLimit( cudaLimitDevRuntimeSyncDepth, max_depth )
pycuda.autoinit.context.set_limit(cuda.limit.DEV_RUNTIME_SYNC_DEPTH, max_depth)

# Launch the kernel from the CPU.
print("Launching cdp_kernel() with CUDA Dynamic Parallelism:\n")
cdp_kernel((2,1), (2,1,1), max_depth, 0, 0, -1)

Expand Down
4 changes: 4 additions & 0 deletions src/wrapper/wrap_cudadrv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,10 @@ BOOST_PYTHON_MODULE(_driver)
.value("PRINTF_FIFO_SIZE", CU_LIMIT_PRINTF_FIFO_SIZE)
#if CUDAPP_CUDA_VERSION >= 3020
.value("MALLOC_HEAP_SIZE", CU_LIMIT_MALLOC_HEAP_SIZE)
#endif
#if CUDAPP_CUDA_VERSION >= 3050
.value("DEV_RUNTIME_SYNC_DEPTH", CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH)
.value("DEV_RUNTIME_PENDING_LAUNCH_COUNT", CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT)
#endif
;
#endif
Expand Down

0 comments on commit e03e9f7

Please sign in to comment.