diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-04-12 07:23:47 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-04-12 07:23:47 +0300 |
commit | dc1499ba1c705506409466dfe981621d0b209eff (patch) | |
tree | 2caba544a4a0bebb05c6c64ab85475188a43154a /intern | |
parent | ed8c71da1c88a899b2ab6e38ef6ba35d8115f8fb (diff) | |
parent | 0ebe08af34243631c7826c465cd8429bb20bd98d (diff) |
Merge branch 'master' into 28
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 19 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 27 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.h | 3 |
3 files changed, 33 insertions, 16 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 4c1a49878f5..ef283c9d455 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1613,10 +1613,23 @@ int2 CUDASplitKernel::split_kernel_local_size() return make_int2(32, 1); } -int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) +int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/) { - /* TODO(mai): implement something here to detect ideal work size */ - return make_int2(256, 256); + size_t free; + size_t total; + + device->cuda_push_context(); + cuda_assert(cuMemGetInfo(&free, &total)); + device->cuda_pop_context(); + + VLOG(1) << "Maximum device allocation size: " + << string_human_readable_number(free) << " bytes. (" + << string_human_readable_size(free) << ")."; + + size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); + int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements)); + VLOG(1) << "Global size: " << global_size << "."; + return global_size; } bool device_cuda_init(void) diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index fa641161c05..981ec74fe56 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -128,26 +128,27 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, local_size[1] = lsize[1]; } - /* Set gloabl size */ - size_t global_size[2]; - { - int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task); - - /* Make sure that set work size is a multiple of local - * work size dimensions. - */ - global_size[0] = round_up(gsize[0], local_size[0]); - global_size[1] = round_up(gsize[1], local_size[1]); - } - /* Number of elements in the global state buffer */ int num_global_elements = global_size[0] * global_size[1]; - assert(num_global_elements % WORK_POOL_SIZE == 0); /* Allocate all required global memory once. */ if(first_tile) { first_tile = false; + /* Set gloabl size */ + { + int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task); + + /* Make sure that set work size is a multiple of local + * work size dimensions. + */ + global_size[0] = round_up(gsize[0], local_size[0]); + global_size[1] = round_up(gsize[1], local_size[1]); + } + + num_global_elements = global_size[0] * global_size[1]; + assert(num_global_elements % WORK_POOL_SIZE == 0); + /* Calculate max groups */ /* Denotes the maximum work groups possible w.r.t. current requested tile size. */ diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 15a94953a11..55548122c0c 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -95,6 +95,9 @@ private: /* Marked True in constructor and marked false at the end of path_trace(). */ bool first_tile; + /* Cached global size */ + size_t global_size[2]; + public: explicit DeviceSplitKernel(Device* device); virtual ~DeviceSplitKernel(); |