From e03e9f74b667acdcfccd7a05abe568b95a12ac53 Mon Sep 17 00:00:00 2001 From: Lurch Date: Fri, 13 Jan 2017 20:06:44 +0100 Subject: [PATCH] Changed demo_cdpSimplePrint.py to use DynamicSourceModule. Using Context.set_limit() now. Needed a missing limit enum CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH (in CUDA8/v8.0/include/cuda.h[975], available since 3.5), added CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH and CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT (same) to pycuda.driver.limit. --- examples/demo_cdpSimplePrint.py | 14 +++----------- src/wrapper/wrap_cudadrv.cpp | 4 ++++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/examples/demo_cdpSimplePrint.py b/examples/demo_cdpSimplePrint.py index d00ba750..d5435987 100644 --- a/examples/demo_cdpSimplePrint.py +++ b/examples/demo_cdpSimplePrint.py @@ -20,8 +20,7 @@ import sys, os import pycuda.autoinit import pycuda.driver as cuda -from pycuda.compiler import JitLinkModule -from pycuda.driver import jit_input_type +from pycuda.compiler import DynamicSourceModule cdpSimplePrint_cu = ''' #include @@ -96,11 +95,7 @@ def main(argv): print("starting Simple Print (CUDA Dynamic Parallelism)") - mod = JitLinkModule() - mod.add_source(cdpSimplePrint_cu, nvcc_options=['-O3', '-rdc=true', '-lcudadevrt']) - mod.add_stdlib('cudadevrt') - mod.link() - + mod = DynamicSourceModule(cdpSimplePrint_cu) cdp_kernel = mod.get_function('cdp_kernel').prepare('iiii').prepared_call print("***************************************************************************") @@ -113,14 +108,11 @@ def main(argv): num_blocks *= 4 print("+%d" % num_blocks) sum += num_blocks - print("=%d blocks are launched!!! (%d from the GPU)" % (sum, sum-2)) print("***************************************************************************\n") - # TODO: cudaDeviceSetLimit() is not available on PyCuda, works anyway on my GeForce GTX 980; maybe add that function? - #cudaDeviceSetLimit( cudaLimitDevRuntimeSyncDepth, max_depth ) + pycuda.autoinit.context.set_limit(cuda.limit.DEV_RUNTIME_SYNC_DEPTH, max_depth) - # Launch the kernel from the CPU. print("Launching cdp_kernel() with CUDA Dynamic Parallelism:\n") cdp_kernel((2,1), (2,1,1), max_depth, 0, 0, -1) diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 4f126065..76f30df4 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -1010,6 +1010,10 @@ BOOST_PYTHON_MODULE(_driver) .value("PRINTF_FIFO_SIZE", CU_LIMIT_PRINTF_FIFO_SIZE) #if CUDAPP_CUDA_VERSION >= 3020 .value("MALLOC_HEAP_SIZE", CU_LIMIT_MALLOC_HEAP_SIZE) +#endif +#if CUDAPP_CUDA_VERSION >= 3050 + .value("DEV_RUNTIME_SYNC_DEPTH", CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH) + .value("DEV_RUNTIME_PENDING_LAUNCH_COUNT", CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT) #endif ; #endif