diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-04-11 09:36:08 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-04-11 10:11:18 +0300 |
commit | 1e6038a426b992bf991040eac18ae7d83ae6a8bb (patch) | |
tree | ed2fcdb6f8f22dab0a725fe6fb5741ab30c70b15 /intern | |
parent | 3722da3b4e8233138f9df16ffc9cb5cb120950d9 (diff) |
Cycles: Implement automatic global size for CUDA split kernel
Not sure this is the best way to do things for CUDA but its much better than
being unimplemented.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 4c1a49878f5..ef283c9d455 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1613,10 +1613,23 @@ int2 CUDASplitKernel::split_kernel_local_size() return make_int2(32, 1); } -int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) +int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/) { - /* TODO(mai): implement something here to detect ideal work size */ - return make_int2(256, 256); + size_t free; + size_t total; + + device->cuda_push_context(); + cuda_assert(cuMemGetInfo(&free, &total)); + device->cuda_pop_context(); + + VLOG(1) << "Maximum device allocation size: " + << string_human_readable_number(free) << " bytes. (" + << string_human_readable_size(free) << ")."; + + size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); + int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements)); + VLOG(1) << "Global size: " << global_size << "."; + return global_size; } bool device_cuda_init(void) |