Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-09-27 01:39:53 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-04 22:11:14 +0300
commite3e16cecc4f080edbbd14e4bf1cfc580c5957d62 (patch)
tree5b9da903526442acb10b48d2ccee5686a00a1017 /intern/cycles/device
parent5b7d6ea54b2fc35b8b12c667f5bf9a1c9c46d5c2 (diff)
Code refactor: remove rng_state buffer and compute hash on the fly.
A little faster on some benchmark scenes, a little slower on others, seems about performance neutral on average and saves a little memory.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device_cpu.cpp8
-rw-r--r--intern/cycles/device/device_cuda.cpp4
-rw-r--r--intern/cycles/device/device_multi.cpp1
-rw-r--r--intern/cycles/device/device_network.cpp2
-rw-r--r--intern/cycles/device/device_network.h4
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp4
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp8
7 files changed, 8 insertions, 23 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 6a1106328fb..72330b02a28 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -171,7 +171,7 @@ public:
DeviceRequestedFeatures requested_features;
- KernelFunctions<void(*)(KernelGlobals *, float *, unsigned int *, int, int, int, int, int)> path_trace_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
@@ -192,7 +192,7 @@ public:
KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
- ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int,
+ int, int, int, int, int, int, int, int, ccl_global int*, int,
ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)> data_init_kernel;
unordered_map<string, KernelFunctions<void(*)(KernelGlobals*, KernelData*)> > split_kernels;
@@ -617,7 +617,6 @@ public:
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
float *render_buffer = (float*)tile.buffer;
- uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
@@ -629,7 +628,7 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
- path_trace_kernel()(kg, render_buffer, rng_state,
+ path_trace_kernel()(kg, render_buffer,
sample, x, y, tile.offset, tile.stride);
}
}
@@ -913,7 +912,6 @@ bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,
(void*)split_data.device_pointer,
num_global_elements,
(char*)ray_state.device_pointer,
- (uint*)rtile.rng_state,
rtile.start_sample,
rtile.start_sample + rtile.num_samples,
rtile.x,
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 7ee74e9a512..e5464dcf34e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1322,7 +1322,6 @@ public:
wtile->start_sample = sample;
wtile->num_samples = 1;
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
- wtile->rng_state = (uint*)cuda_device_ptr(rtile.rng_state);
mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
mem_copy_to(work_tiles);
@@ -1945,7 +1944,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
- CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state);
CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
int end_sample = rtile.start_sample + rtile.num_samples;
@@ -1955,7 +1953,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
CUdeviceptr* split_data_buffer;
int* num_elements;
CUdeviceptr* ray_state;
- CUdeviceptr* rng_state;
int* start_sample;
int* end_sample;
int* sx;
@@ -1976,7 +1973,6 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim
&d_split_data,
&num_global_elements,
&d_ray_state,
- &d_rng_state,
&rtile.start_sample,
&end_sample,
&rtile.x,
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index bc505b676fc..164ed50bdf6 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -281,7 +281,6 @@ public:
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
- if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
}
}
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 571ba9465ca..4ff8647f66b 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -737,7 +737,6 @@ protected:
tile = entry.tile;
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
- if(tile.rng_state) tile.rng_state = ptr_map[tile.rng_state];
result = true;
break;
@@ -769,7 +768,6 @@ protected:
thread_scoped_lock acquire_lock(acquire_mutex);
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
- if(tile.rng_state) tile.rng_state = ptr_imap[tile.rng_state];
{
thread_scoped_lock lock(rpc_lock);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index a5d24c66018..7bfebaf5aec 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -142,7 +142,7 @@ public:
archive & tile.x & tile.y & tile.w & tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample;
archive & tile.resolution & tile.offset & tile.stride;
- archive & tile.buffer & tile.rng_state;
+ archive & tile.buffer;
}
void write()
@@ -303,7 +303,7 @@ public:
*archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride;
- *archive & tile.buffer & tile.rng_state;
+ *archive & tile.buffer;
tile.buffers = NULL;
}
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index ec47fdafa3d..f4555eaba4f 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -62,7 +62,6 @@ public:
/* Cast arguments to cl types. */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
- cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
cl_int d_x = rtile.x;
cl_int d_y = rtile.y;
cl_int d_w = rtile.w;
@@ -79,8 +78,7 @@ public:
kernel_set_args(ckPathTraceKernel,
0,
d_data,
- d_buffer,
- d_rng_state);
+ d_buffer);
set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 16a96213100..976cc9df46d 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -192,7 +192,6 @@ struct CachedSplitMemory {
int id;
device_memory *split_data;
device_memory *ray_state;
- device_ptr *rng_state;
device_memory *queue_index;
device_memory *use_queues_flag;
device_memory *work_pools;
@@ -225,8 +224,7 @@ public:
kg,
data,
*cached_memory.split_data,
- *cached_memory.ray_state,
- *cached_memory.rng_state);
+ *cached_memory.ray_state);
device->set_kernel_arg_buffers(program(), &start_arg_index);
@@ -356,8 +354,7 @@ public:
kernel_data,
split_data,
num_global_elements,
- ray_state,
- rtile.rng_state);
+ ray_state);
device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);
@@ -401,7 +398,6 @@ public:
cached_memory.split_data = &split_data;
cached_memory.ray_state = &ray_state;
- cached_memory.rng_state = &rtile.rng_state;
cached_memory.queue_index = &queue_index;
cached_memory.use_queues_flag = &use_queues_flag;
cached_memory.work_pools = &work_pool_wgs;