diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-03-11 13:23:11 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-03-11 13:39:28 +0300 |
commit | 96868a39419f1c9a8962c56e02480fabbf1e5156 (patch) | |
tree | 64128db60a690d0223dad4e6e4073230ffd174ac | |
parent | 5afe4c787f0ed3ac30f7609c7f07c5092a20eac9 (diff) |
Fix T50888: Numeric overflow in split kernel state buffer size calculation
Overflow led to the state buffer being too small and the split kernel to
get stuck doing nothing forever.
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.h | 9 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_split.cu | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_split_data.h | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 16 |
11 files changed, 41 insertions, 28 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 06a1568b4d6..273c3b48936 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -72,7 +72,7 @@ public: virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); virtual int2 split_kernel_local_size(); virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); - virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); + virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); }; class CPUDevice : public Device @@ -860,7 +860,7 @@ int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memo return task->requested_tile_size; } -size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) { +uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) { KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer; return split_data_buffer_size(kg, num_threads); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index a630a3d1183..58471ba67c2 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -89,7 +89,7 @@ class CUDASplitKernel : public DeviceSplitKernel { public: explicit CUDASplitKernel(CUDADevice *device); - virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); + virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, @@ -1473,9 +1473,9 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device) { } -size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads) +uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads) { - device_vector<uint> size_buffer; + device_vector<uint64_t> size_buffer; size_buffer.resize(1); device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); @@ -1504,7 +1504,7 @@ size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& device->cuda_pop_context(); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint)); + device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); device->mem_free(size_buffer); return *size_buffer.get_data(); diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index b69c3dad604..60d166b43ba 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -48,7 +48,8 @@ enum DataType { TYPE_UINT, TYPE_INT, TYPE_FLOAT, - TYPE_HALF + TYPE_HALF, + TYPE_UINT64, }; static inline size_t datatype_size(DataType datatype) @@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype) case TYPE_UINT: return sizeof(uint); case TYPE_INT: return sizeof(int); case TYPE_HALF: return sizeof(half); + case TYPE_UINT64: return sizeof(uint64_t); default: return 0; } } @@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> { static const int num_elements = 4; }; +template<> struct device_type_traits<uint64_t> { + static const DataType data_type = TYPE_UINT64; + static const int num_elements = 1; +}; + /* Device Memory */ class device_memory diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 10a642ed4d0..5b892038ebb 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -105,9 +105,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe return true; } -size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size) +size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size) { - size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024; + uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024; return max_buffer_size / size_per_element; } diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index ae61f9e38c1..6739e754862 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -105,8 +105,8 @@ public: device_memory& kgbuffer, device_memory& kernel_data); - virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0; - size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size); + virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0; + size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size); virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index c5f44f84e8c..51ff39f0ad3 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -334,11 +334,11 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem) size_t num_threads = global_size[0] * global_size[1]; cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer); - unsigned long long d_offset = 0; - unsigned long long d_size = 0; + cl_ulong d_offset = 0; + cl_ulong d_size = 0; while(d_offset < mem.memory_size()) { - d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset); + d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset); kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 89ab19ca93b..a09d93c625e 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -227,9 +227,9 @@ public: return kernel; } - virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) + virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { - device_vector<uint> size_buffer; + device_vector<uint64_t> size_buffer; size_buffer.resize(1); device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); @@ -249,7 +249,7 @@ public: device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint)); + device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); device->mem_free(size_buffer); if(device->ciErr != CL_SUCCESS) { @@ -346,8 +346,8 @@ public: virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/) { - size_t max_buffer_size; - clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL); + cl_ulong max_buffer_size; + clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL); VLOG(1) << "Maximum device allocation side: " << string_human_readable_number(max_buffer_size) << " bytes. (" << string_human_readable_size(max_buffer_size) << ")."; diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 6c508c2cd79..fbdf79697d5 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -46,7 +46,7 @@ /* kernels */ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_state_buffer_size(uint num_threads, uint *size) +kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size) { *size = split_data_buffer_size(NULL, num_threads); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl index 0a1843ff8bd..4c9bf63ef51 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl @@ -21,7 +21,7 @@ __kernel void kernel_ocl_path_trace_state_buffer_size( KernelGlobals *kg, ccl_constant KernelData *data, uint num_threads, - ccl_global uint *size) + ccl_global uint64_t *size) { kg->data = data; *size = split_data_buffer_size(kg, num_threads); diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 81dcdbaedde..d319514c190 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -22,11 +22,11 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements) +ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements) { (void)kg; /* Unused on CPU. */ - size_t size = 0; + uint64_t size = 0; #define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16) size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 36d2f1053c7..dcd0b78e4a4 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -106,10 +106,16 @@ typedef unsigned int uint; #endif -#ifndef __KERNEL_GPU__ - /* Fixed Bits Types */ +#ifdef __KERNEL_OPENCL__ + +typedef ulong uint64_t; + +#endif + +#ifndef __KERNEL_GPU__ + #ifdef _WIN32 typedef signed char int8_t; @@ -474,17 +480,17 @@ ccl_device_inline int4 make_int4(const float3& f) #endif -ccl_device_inline int align_up(int offset, int alignment) +ccl_device_inline size_t align_up(size_t offset, size_t alignment) { return (offset + alignment - 1) & ~(alignment - 1); } -ccl_device_inline int round_up(int x, int multiple) +ccl_device_inline size_t round_up(size_t x, size_t multiple) { return ((x + multiple - 1) / multiple) * multiple; } -ccl_device_inline int round_down(int x, int multiple) +ccl_device_inline size_t round_down(size_t x, size_t multiple) { return (x / multiple) * multiple; } |