Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2020-02-13 16:25:00 +0300
committerPatrick Mours <pmours@nvidia.com>2020-02-13 17:22:26 +0300
commit63bde1063f6720320c8206de14ac30a3c74f5cbc (patch)
tree942a98fac0440f81546243f12854e9cf0746d787 /intern/cycles/device
parent13e5e55f3f4da42e3a648542441a3c35cae6d12b (diff)
Cleanup: Remove some unnecessary OptiX device code
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_optix.cpp138
1 files changed, 58 insertions, 80 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index adb0f60a006..fc32679e794 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -108,17 +108,30 @@ struct KernelParams {
} \
(void)0
-# define CUDA_GET_BLOCKSIZE(func, w, h) \
- int threads; \
- check_result_cuda_ret( \
- cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
- threads = (int)sqrt((float)threads); \
- int xblocks = ((w) + threads - 1) / threads; \
- int yblocks = ((h) + threads - 1) / threads;
-
-# define CUDA_LAUNCH_KERNEL(func, args) \
- check_result_cuda_ret(cuLaunchKernel( \
- func, xblocks, yblocks, 1, threads, threads, 1, 0, cuda_stream[thread_index], args, 0));
+# define launch_filter_kernel(func_name, w, h, args) \
+ { \
+ CUfunction func; \
+ check_result_cuda_ret(cuModuleGetFunction(&func, cuFilterModule, func_name)); \
+ check_result_cuda_ret(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); \
+ int threads; \
+ check_result_cuda_ret( \
+ cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ threads = (int)sqrt((float)threads); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads; \
+ check_result_cuda_ret(cuLaunchKernel(func, \
+ xblocks, \
+ yblocks, \
+ 1, \
+ threads, \
+ threads, \
+ 1, \
+ 0, \
+ cuda_stream[thread_index], \
+ args, \
+ 0)); \
+ } \
+ (void)0
class OptiXDevice : public CUDADevice {
@@ -196,7 +209,7 @@ class OptiXDevice : public CUDADevice {
// Make the CUDA context current
if (!cuContext) {
- return;
+ return; // Do not initialize if CUDA context creation failed already
}
const CUDAContextScope scope(cuContext);
@@ -742,44 +755,30 @@ class OptiXDevice : public CUDADevice {
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
tile_info_mem.copy_to_device();
- CUfunction filter_copy_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &filter_copy_func, cuFilterModule, "kernel_cuda_filter_copy_input"));
- check_result_cuda_ret(cuFuncSetCacheConfig(filter_copy_func, CU_FUNC_CACHE_PREFER_L1));
-
void *args[] = {
&input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride};
- CUDA_GET_BLOCKSIZE(filter_copy_func, rect_size.x, rect_size.y);
- CUDA_LAUNCH_KERNEL(filter_copy_func, args);
+ launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args);
}
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
device_only_memory<float> input_rgb(this, "denoiser input rgb");
- {
- input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 *
- task.denoising.optix_input_passes);
-
- CUfunction convert_to_rgb_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &convert_to_rgb_func, cuFilterModule, "kernel_cuda_filter_convert_to_rgb"));
- check_result_cuda_ret(cuFuncSetCacheConfig(convert_to_rgb_func, CU_FUNC_CACHE_PREFER_L1));
-
- void *args[] = {&input_rgb.device_pointer,
- &input_ptr,
- &rect_size.x,
- &rect_size.y,
- &input_stride,
- &task.pass_stride,
- const_cast<int *>(pass_offset),
- &task.denoising.optix_input_passes,
- &rtile.sample};
- CUDA_GET_BLOCKSIZE(convert_to_rgb_func, rect_size.x, rect_size.y);
- CUDA_LAUNCH_KERNEL(convert_to_rgb_func, args);
-
- input_ptr = input_rgb.device_pointer;
- pixel_stride = 3 * sizeof(float);
- input_stride = rect_size.x * pixel_stride;
- }
+ input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.optix_input_passes);
+
+ void *input_args[] = {&input_rgb.device_pointer,
+ &input_ptr,
+ &rect_size.x,
+ &rect_size.y,
+ &input_stride,
+ &task.pass_stride,
+ const_cast<int *>(pass_offset),
+ &task.denoising.optix_input_passes,
+ &rtile.sample};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args);
+
+ input_ptr = input_rgb.device_pointer;
+ pixel_stride = 3 * sizeof(float);
+ input_stride = rect_size.x * pixel_stride;
# endif
const bool recreate_denoiser = (denoiser == NULL) ||
@@ -886,29 +885,21 @@ class OptiXDevice : public CUDADevice {
scratch_size));
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
- {
- CUfunction convert_from_rgb_func;
- check_result_cuda_ret(cuModuleGetFunction(
- &convert_from_rgb_func, cuFilterModule, "kernel_cuda_filter_convert_from_rgb"));
- check_result_cuda_ret(
- cuFuncSetCacheConfig(convert_from_rgb_func, CU_FUNC_CACHE_PREFER_L1));
-
- void *args[] = {&input_ptr,
- &rtiles[9].buffer,
- &output_offset.x,
- &output_offset.y,
- &rect_size.x,
- &rect_size.y,
- &rtiles[9].x,
- &rtiles[9].y,
- &rtiles[9].w,
- &rtiles[9].h,
- &rtiles[9].offset,
- &rtiles[9].stride,
- &task.pass_stride};
- CUDA_GET_BLOCKSIZE(convert_from_rgb_func, rtiles[9].w, rtiles[9].h);
- CUDA_LAUNCH_KERNEL(convert_from_rgb_func, args);
- }
+ void *output_args[] = {&input_ptr,
+ &rtiles[9].buffer,
+ &output_offset.x,
+ &output_offset.y,
+ &rect_size.x,
+ &rect_size.y,
+ &rtiles[9].x,
+ &rtiles[9].y,
+ &rtiles[9].w,
+ &rtiles[9].h,
+ &rtiles[9].offset,
+ &rtiles[9].stride,
+ &task.pass_stride};
+ launch_filter_kernel(
+ "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
# endif
check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
@@ -1448,11 +1439,6 @@ class OptiXDevice : public CUDADevice {
// Upload texture information to device if it has changed since last launch
load_texture_info();
- { // Synchronize all memory copies before executing task
- const CUDAContextScope scope(cuContext);
- check_result_cuda(cuCtxSynchronize());
- }
-
if (task.type == DeviceTask::FILM_CONVERT) {
// Execute in main thread because of OpenGL access
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
@@ -1500,14 +1486,6 @@ bool device_optix_init()
if (!device_cuda_init())
return false;
-# ifdef WITH_CUDA_DYNLOAD
- // Load NVRTC function pointers for adaptive kernel compilation
- if (DebugFlags().cuda.adaptive_compile && cuewInit(CUEW_INIT_NVRTC) != CUEW_SUCCESS) {
- VLOG(1) << "CUEW initialization failed for NVRTC. Adaptive kernel compilation won't be "
- "available.";
- }
-# endif
-
const OptixResult result = optixInit();
if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {