Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-03-11 13:23:11 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-03-11 13:39:28 +0300
commit96868a39419f1c9a8962c56e02480fabbf1e5156 (patch)
tree64128db60a690d0223dad4e6e4073230ffd174ac
parent5afe4c787f0ed3ac30f7609c7f07c5092a20eac9 (diff)
Fix T50888: Numeric overflow in split kernel state buffer size calculation
Overflow led to the state buffer being too small and the split kernel to get stuck doing nothing forever.
-rw-r--r--intern/cycles/device/device_cpu.cpp4
-rw-r--r--intern/cycles/device/device_cuda.cpp8
-rw-r--r--intern/cycles/device/device_memory.h9
-rw-r--r--intern/cycles/device/device_split_kernel.cpp4
-rw-r--r--intern/cycles/device/device_split_kernel.h4
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp6
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp10
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h4
-rw-r--r--intern/cycles/util/util_types.h16
11 files changed, 41 insertions, 28 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 06a1568b4d6..273c3b48936 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -72,7 +72,7 @@ public:
virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
virtual int2 split_kernel_local_size();
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
- virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
+ virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
};
class CPUDevice : public Device
@@ -860,7 +860,7 @@ int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memo
return task->requested_tile_size;
}
-size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
+uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
return split_data_buffer_size(kg, num_threads);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index a630a3d1183..58471ba67c2 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -89,7 +89,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
public:
explicit CUDASplitKernel(CUDADevice *device);
- virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
+ virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
@@ -1473,9 +1473,9 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device)
{
}
-size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
+uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
{
- device_vector<uint> size_buffer;
+ device_vector<uint64_t> size_buffer;
size_buffer.resize(1);
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
@@ -1504,7 +1504,7 @@ size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory&
device->cuda_pop_context();
- device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
+ device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
device->mem_free(size_buffer);
return *size_buffer.get_data();
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index b69c3dad604..60d166b43ba 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -48,7 +48,8 @@ enum DataType {
TYPE_UINT,
TYPE_INT,
TYPE_FLOAT,
- TYPE_HALF
+ TYPE_HALF,
+ TYPE_UINT64,
};
static inline size_t datatype_size(DataType datatype)
@@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype)
case TYPE_UINT: return sizeof(uint);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
+ case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
}
}
@@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> {
static const int num_elements = 4;
};
+template<> struct device_type_traits<uint64_t> {
+ static const DataType data_type = TYPE_UINT64;
+ static const int num_elements = 1;
+};
+
/* Device Memory */
class device_memory
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 10a642ed4d0..5b892038ebb 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -105,9 +105,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
return true;
}
-size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size)
+size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
{
- size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
+ uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
return max_buffer_size / size_per_element;
}
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index ae61f9e38c1..6739e754862 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -105,8 +105,8 @@ public:
device_memory& kgbuffer,
device_memory& kernel_data);
- virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
- size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size);
+ virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
+ size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index c5f44f84e8c..51ff39f0ad3 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -334,11 +334,11 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
size_t num_threads = global_size[0] * global_size[1];
cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
- unsigned long long d_offset = 0;
- unsigned long long d_size = 0;
+ cl_ulong d_offset = 0;
+ cl_ulong d_size = 0;
while(d_offset < mem.memory_size()) {
- d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
+ d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 89ab19ca93b..a09d93c625e 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -227,9 +227,9 @@ public:
return kernel;
}
- virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
+ virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
{
- device_vector<uint> size_buffer;
+ device_vector<uint64_t> size_buffer;
size_buffer.resize(1);
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
@@ -249,7 +249,7 @@ public:
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
- device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
+ device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
device->mem_free(size_buffer);
if(device->ciErr != CL_SUCCESS) {
@@ -346,8 +346,8 @@ public:
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
{
- size_t max_buffer_size;
- clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL);
+ cl_ulong max_buffer_size;
+ clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
VLOG(1) << "Maximum device allocation side: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 6c508c2cd79..fbdf79697d5 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -46,7 +46,7 @@
/* kernels */
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_state_buffer_size(uint num_threads, uint *size)
+kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size)
{
*size = split_data_buffer_size(NULL, num_threads);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
index 0a1843ff8bd..4c9bf63ef51 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
@@ -21,7 +21,7 @@ __kernel void kernel_ocl_path_trace_state_buffer_size(
KernelGlobals *kg,
ccl_constant KernelData *data,
uint num_threads,
- ccl_global uint *size)
+ ccl_global uint64_t *size)
{
kg->data = data;
*size = split_data_buffer_size(kg, num_threads);
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 81dcdbaedde..d319514c190 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -22,11 +22,11 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
+ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
{
(void)kg; /* Unused on CPU. */
- size_t size = 0;
+ uint64_t size = 0;
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 36d2f1053c7..dcd0b78e4a4 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -106,10 +106,16 @@ typedef unsigned int uint;
#endif
-#ifndef __KERNEL_GPU__
-
/* Fixed Bits Types */
+#ifdef __KERNEL_OPENCL__
+
+typedef ulong uint64_t;
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
#ifdef _WIN32
typedef signed char int8_t;
@@ -474,17 +480,17 @@ ccl_device_inline int4 make_int4(const float3& f)
#endif
-ccl_device_inline int align_up(int offset, int alignment)
+ccl_device_inline size_t align_up(size_t offset, size_t alignment)
{
return (offset + alignment - 1) & ~(alignment - 1);
}
-ccl_device_inline int round_up(int x, int multiple)
+ccl_device_inline size_t round_up(size_t x, size_t multiple)
{
return ((x + multiple - 1) / multiple) * multiple;
}
-ccl_device_inline int round_down(int x, int multiple)
+ccl_device_inline size_t round_down(size_t x, size_t multiple)
{
return (x / multiple) * multiple;
}