Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-09-27 01:39:53 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-04 22:11:14 +0300
commite3e16cecc4f080edbbd14e4bf1cfc580c5957d62 (patch)
tree5b9da903526442acb10b48d2ccee5686a00a1017 /intern/cycles/kernel
parent5b7d6ea54b2fc35b8b12c667f5bf9a1c9c46d5c2 (diff)
Code refactor: remove rng_state buffer and compute hash on the fly.
A little faster on some benchmark scenes, a little slower on others, seems about performance neutral on average and saves a little memory.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/kernel_path.h5
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h5
-rw-r--r--intern/cycles/kernel/kernel_path_common.h7
-rw-r--r--intern/cycles/kernel/kernel_random.h4
-rw-r--r--intern/cycles/kernel/kernel_types.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h4
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu4
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h2
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h6
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h15
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h5
15 files changed, 13 insertions, 54 deletions
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 793fede0deb..341ada63ec3 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -672,21 +672,20 @@ ccl_device_forceinline void kernel_path_integrate(
}
ccl_device void kernel_path_trace(KernelGlobals *kg,
- ccl_global float *buffer, ccl_global uint *rng_state,
+ ccl_global float *buffer,
int sample, int x, int y, int offset, int stride)
{
/* buffer offset */
int index = offset + x + y*stride;
int pass_stride = kernel_data.film.pass_stride;
- rng_state += index;
buffer += index*pass_stride;
/* Initialize random numbers and sample ray. */
uint rng_hash;
Ray ray;
- kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, &ray);
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
if(ray.t == 0.0f) {
kernel_write_result(kg, buffer, sample, NULL);
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 6e0ec22d581..70d73a20b97 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -538,21 +538,20 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
}
ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
- ccl_global float *buffer, ccl_global uint *rng_state,
+ ccl_global float *buffer,
int sample, int x, int y, int offset, int stride)
{
/* buffer offset */
int index = offset + x + y*stride;
int pass_stride = kernel_data.film.pass_stride;
- rng_state += index;
buffer += index*pass_stride;
/* initialize random numbers and ray */
uint rng_hash;
Ray ray;
- kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, &ray);
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
/* integrate */
PathRadiance L;
diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h
index 54dd278a185..d83fd474cde 100644
--- a/intern/cycles/kernel/kernel_path_common.h
+++ b/intern/cycles/kernel/kernel_path_common.h
@@ -19,7 +19,6 @@
CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
- ccl_global uint *rng_state,
int sample,
int x, int y,
uint *rng_hash,
@@ -30,11 +29,7 @@ ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg,
int num_samples = kernel_data.integrator.aa_samples;
- if(sample == kernel_data.integrator.start_sample) {
- *rng_state = hash_int_2d(x, y);
- }
-
- path_rng_init(kg, rng_state, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
+ path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v);
/* sample camera ray */
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 11798d87cb5..e7a6134b8eb 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -15,6 +15,7 @@
*/
#include "kernel/kernel_jitter.h"
+#include "util/util_hash.h"
CCL_NAMESPACE_BEGIN
@@ -115,14 +116,13 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
}
ccl_device_inline void path_rng_init(KernelGlobals *kg,
- ccl_global uint *rng_state,
int sample, int num_samples,
uint *rng_hash,
int x, int y,
float *fx, float *fy)
{
/* load state */
- *rng_hash = *rng_state;
+ *rng_hash = hash_int_2d(x, y);
*rng_hash ^= kernel_data.integrator.seed;
#ifdef __DEBUG_CORRELATION__
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index bf3a2881666..19c77c1ed4f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1460,7 +1460,6 @@ typedef struct WorkTile {
uint stride;
ccl_global float *buffer;
- ccl_global uint *rng_state;
} WorkTile;
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index c8938534fe8..f5ebf4ad73f 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -18,7 +18,6 @@
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
- unsigned int *rng_state,
int sample,
int x, int y,
int offset,
@@ -57,7 +56,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index d4315ee5ec4..3fefc1b7e9c 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -75,7 +75,6 @@ CCL_NAMESPACE_BEGIN
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
- unsigned int *rng_state,
int sample,
int x, int y,
int offset,
@@ -88,7 +87,6 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
if(kernel_data.integrator.branched) {
kernel_branched_path_trace(kg,
buffer,
- rng_state,
sample,
x, y,
offset,
@@ -97,7 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
else
# endif
{
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
#endif /* KERNEL_STUB */
}
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 4d100634421..799cd587fcf 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -42,7 +42,7 @@ kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
get_work_pixel(tile, work_index, &x, &y, &sample);
KernelGlobals kg;
- kernel_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
+ kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
@@ -58,7 +58,7 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
get_work_pixel(tile, work_index, &x, &y, &sample);
KernelGlobals kg;
- kernel_branched_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
+ kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index e97e87285a5..43b3d0aa0e6 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -60,7 +60,6 @@ kernel_cuda_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
@@ -76,7 +75,6 @@ kernel_cuda_path_trace_data_init(
split_data_buffer,
num_elements,
ray_state,
- rng_state,
start_sample,
end_sample,
sx, sy, sw, sh, offset, stride,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index b7108f3d0f8..521b86121ff 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -50,7 +50,6 @@
__kernel void kernel_ocl_path_trace(
ccl_constant KernelData *data,
ccl_global float *buffer,
- ccl_global uint *rng_state,
KERNEL_BUFFER_PARAMS,
@@ -68,7 +67,7 @@ __kernel void kernel_ocl_path_trace(
int y = sy + ccl_global_id(1);
if(x < sx + sw && y < sy + sh)
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 95b35e40a45..7125348a49f 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -24,7 +24,6 @@ __kernel void kernel_ocl_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
KERNEL_BUFFER_PARAMS,
int start_sample,
int end_sample,
@@ -41,7 +40,6 @@ __kernel void kernel_ocl_path_trace_data_init(
split_data_buffer,
num_elements,
ray_state,
- rng_state,
KERNEL_BUFFER_ARGS,
start_sample,
end_sample,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index 499138b5581..6aa7681cbed 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -23,7 +23,6 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
ccl_global void *split_data_buffer,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
KERNEL_BUFFER_PARAMS,
@@ -42,7 +41,6 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
if(ccl_local_id(0) + ccl_local_id(1) == 0) {
kg->data = data;
- kernel_split_params.tile.rng_state = rng_state;
kernel_split_params.queue_index = queue_index;
kernel_split_params.use_queues_flag = use_queues_flag;
kernel_split_params.work_pools = work_pools;
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index e8547767480..5e9db821f7b 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -108,10 +108,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
uint x, y, sample;
get_work_pixel(tile, work_index, &x, &y, &sample);
- /* Remap rng_state to current pixel. */
- ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
- rng_state += tile->offset + x + y*tile->stride;
-
/* Store buffer offset for writing to passes. */
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
ccl_global float *buffer = tile->buffer + buffer_offset;
@@ -119,7 +115,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
/* Initialize random numbers and ray. */
uint rng_hash;
- kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng_hash, ray);
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
if(ray->t != 0.0f) {
/* Initialize throughput, path radiance, Ray, PathState;
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 2da3ca47466..f4df949fdb1 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -49,7 +49,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
#ifdef __KERNEL_OPENCL__
KERNEL_BUFFER_PARAMS,
@@ -84,7 +83,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
kernel_split_params.tile.offset = offset;
kernel_split_params.tile.stride = stride;
- kernel_split_params.tile.rng_state = rng_state;
kernel_split_params.tile.buffer = buffer;
kernel_split_params.total_work_size = sw * sh * num_samples;
@@ -122,7 +120,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
*use_queues_flag = 0;
}
- /* zero the tiles pixels and initialize rng_state if this is the first sample */
+ /* zero the tiles pixels if this is the first sample */
if(start_sample == 0) {
int pass_stride = kernel_data.film.pass_stride;
@@ -130,9 +128,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
for(int y = sy; y < sy + sh; y++) {
int index = offset + y * stride;
memset(buffer + (sx + index) * pass_stride, 0, sizeof(float) * pass_stride * sw);
- for(int x = sx; x < sx + sw; x++) {
- rng_state[index + x] = hash_int_2d(x, y);
- }
}
#else
parallel_for(kg, i, sw * sh * pass_stride) {
@@ -146,14 +141,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
*(buffer + index) = 0.0f;
}
-
- parallel_for(kg, i, sw * sh) {
- int x = sx + i % sw;
- int y = sy + i / sw;
-
- int index = (offset + x + y*stride);
- *(rng_state + index) = hash_int_2d(x, y);
- }
#endif
}
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index 701d39403ad..1bd641b031d 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -45,10 +45,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
uint x, y, sample;
get_work_pixel(tile, work_index, &x, &y, &sample);
- /* Remap rng_state and buffer to current pixel. */
- ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
- rng_state += tile->offset + x + y*tile->stride;
-
/* Store buffer offset for writing to passes. */
uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
ccl_global float *buffer = tile->buffer + buffer_offset;
@@ -57,7 +53,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
/* Initialize random numbers and ray. */
uint rng_hash;
kernel_path_trace_setup(kg,
- rng_state,
sample,
x, y,
&rng_hash,