From fc8f428224a8e77ba66394ad8238a406fa24ee3a Mon Sep 17 00:00:00 2001 From: lazydodo Date: Sun, 30 Apr 2017 10:52:38 -0600 Subject: fix typo in WITH_SYSTEM_GFLOG in CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 662735d3fc8..1dac082459f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -365,7 +365,7 @@ mark_as_advanced(WITH_LIBMV_SCHUR_SPECIALIZATIONS) # Logging/unbit test libraries. option(WITH_SYSTEM_GFLAGS "Use system-wide Gflags instead of a bundled one" OFF) -option(WITH_SYSTEM_GFLOG "Use system-wide Glog instead of a bundled one" OFF) +option(WITH_SYSTEM_GLOG "Use system-wide Glog instead of a bundled one" OFF) mark_as_advanced(WITH_SYSTEM_GFLAGS) mark_as_advanced(WITH_SYSTEM_GLOG) -- cgit v1.2.3 From 4174e533c001367b0ef391b72baa5b07cb517ce8 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 2 May 2017 15:02:49 +0200 Subject: Cycles: Cache split kernels in CUDA device This way we don't re-load kernels for every sample in the viewport. Additionally, we don't risk global size changed inbetween of samples. --- intern/cycles/device/device_cuda.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index ef283c9d455..acfb3e1d8f4 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -119,6 +119,7 @@ public: int cuDevId; int cuDevArchitecture; bool first_error; + CUDASplitKernel *split_kernel; struct PixelMem { GLuint cuPBO; @@ -221,6 +222,8 @@ public: cuDevice = 0; cuContext = 0; + split_kernel = NULL; + need_bindless_mapping = false; /* intialize */ @@ -260,6 +263,8 @@ public: { task_pool.stop(); + delete split_kernel; + if(info.has_bindless_textures) { tex_free(bindless_mapping); } @@ -1336,12 +1341,14 @@ public: requested_features.max_closure = 64; } - CUDASplitKernel split_kernel(this); - split_kernel.load_kernels(requested_features); + if(split_kernel == NULL) { + split_kernel = new CUDASplitKernel(this); + split_kernel->load_kernels(requested_features); + } while(task->acquire_tile(this, tile)) { device_memory void_buffer; - split_kernel.path_trace(task, tile, void_buffer, void_buffer); + split_kernel->path_trace(task, tile, void_buffer, void_buffer); task->release_tile(tile); -- cgit v1.2.3 From 4384a7cf463eedea83179da80bbe12ff7d55578a Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 2 May 2017 15:03:51 +0200 Subject: Cycles: Fix CUDA split kernel Global size y needs to be a multiple of 16. --- intern/cycles/device/device_cuda.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index acfb3e1d8f4..a971170318e 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1634,7 +1634,8 @@ int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& << string_human_readable_size(free) << ")."; size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); - int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements)); + size_t side = round_down((int)sqrt(num_elements), 32); + int2 global_size = make_int2(side, round_down(num_elements / side, 16)); VLOG(1) << "Global size: " << global_size << "."; return global_size; } -- cgit v1.2.3