Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2013-08-09 22:47:25 +0400
committerThomas Dinges <blender@dingto.org>2013-08-09 22:47:25 +0400
commita18112249d826046fd7d8692571a0eab5bb5ec04 (patch)
tree36827af77eea515d496e0664bffc7fd461919f64
parent2ab9cbd208766cc7acb8a281c19a27816293cb33 (diff)
Cycles / Non-Progressive integrator:
* Non-Progressive integrator is now available on the GPU (CUDA, sm_20 and above). Implementation details: * kernel_path_trace() has been split up into two functions: kernel_path_trace_non_progressive() and kernel_path_trace_progressive(). * We compile two CUDA kernel entry functions (in kernel.cu) for the two integrators, they are still inside one .cubin file but due to the kernel separation there should be no performance problem. I tested with the BMW file on my Geforce 540M and the render times were the same for 100 samples (1.57 min in my case). This is part of my GSoC project, SVN merge of r59032 + manual merge of UI changes for this from my branch.
-rw-r--r--intern/cycles/blender/addon/ui.py12
-rw-r--r--intern/cycles/blender/blender_sync.cpp2
-rw-r--r--intern/cycles/device/device_cuda.cpp11
-rw-r--r--intern/cycles/device/device_task.h1
-rw-r--r--intern/cycles/kernel/kernel.cl2
-rw-r--r--intern/cycles/kernel/kernel.cpp5
-rw-r--r--intern/cycles/kernel/kernel.cu13
-rw-r--r--intern/cycles/kernel/kernel_path.h84
-rw-r--r--intern/cycles/kernel/kernel_shader.h7
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp5
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp5
-rw-r--r--intern/cycles/kernel/kernel_types.h1
-rw-r--r--intern/cycles/render/session.cpp1
13 files changed, 103 insertions, 46 deletions
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 551a0f85623..1d54915dd35 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -67,9 +67,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
row.operator("render.cycles_sampling_preset_add", text="", icon="ZOOMOUT").remove_active = True
row = layout.row()
- sub = row.row()
- sub.active = (device_type == 'NONE' or cscene.device == 'CPU')
- sub.prop(cscene, "progressive")
+ row.prop(cscene, "progressive")
if not cscene.progressive:
row.prop(cscene, "squared_samples")
@@ -82,7 +80,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
sub.prop(cscene, "seed")
sub.prop(cscene, "sample_clamp")
- if cscene.progressive or (device_type != 'NONE' and cscene.device == 'GPU'):
+ if cscene.progressive:
col = split.column()
sub = col.column(align=True)
sub.label(text="Samples:")
@@ -656,7 +654,6 @@ class CyclesLamp_PT_lamp(CyclesButtonsPanel, Panel):
lamp = context.lamp
clamp = lamp.cycles
cscene = context.scene.cycles
- device_type = context.user_preferences.system.compute_device_type
layout.prop(lamp, "type", expand=True)
@@ -675,7 +672,7 @@ class CyclesLamp_PT_lamp(CyclesButtonsPanel, Panel):
sub.prop(lamp, "size", text="Size X")
sub.prop(lamp, "size_y", text="Size Y")
- if not cscene.progressive and (device_type == 'NONE' or cscene.device == 'CPU'):
+ if not cscene.progressive:
col.prop(clamp, "samples")
col = split.column()
@@ -864,7 +861,6 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel):
world = context.world
cworld = world.cycles
cscene = context.scene.cycles
- device_type = context.user_preferences.system.compute_device_type
col = layout.column()
@@ -872,7 +868,7 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel):
sub = col.row(align=True)
sub.active = cworld.sample_as_light
sub.prop(cworld, "sample_map_resolution")
- if not cscene.progressive and (device_type == 'NONE' or cscene.device == 'CPU'):
+ if not cscene.progressive:
sub.prop(cworld, "samples")
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index d1d235ec41a..1a416368510 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -420,7 +420,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
preview_aa_samples = preview_aa_samples * preview_aa_samples;
}
- if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) {
+ if(get_boolean(cscene, "progressive") == 0) {
if(background) {
params.samples = aa_samples;
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 4441ff1c69f..8f11782b168 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -558,7 +558,7 @@ public:
}
}
- void path_trace(RenderTile& rtile, int sample)
+ void path_trace(RenderTile& rtile, int sample, bool progressive)
{
if(have_error())
return;
@@ -570,7 +570,10 @@ public:
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
+ if(progressive)
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_progressive"))
+ else
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_non_progressive"))
/* pass in parameters */
int offset = 0;
@@ -914,6 +917,8 @@ public:
if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
+ bool progressive = task->integrator_progressive;
+
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
@@ -925,7 +930,7 @@ public:
break;
}
- path_trace(tile, sample);
+ path_trace(tile, sample, progressive);
tile.sample = sample + 1;
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index cfeb2860d41..46226b8856f 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -65,6 +65,7 @@ public:
boost::function<bool(void)> get_cancel;
bool need_finish_queue;
+ bool integrator_progressive;
protected:
double last_update_time;
};
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index a745f5843fc..67be879cdc7 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -51,7 +51,7 @@ __kernel void kernel_ocl_path_trace(
int y = sy + get_global_id(1);
if(x < sx + sw && y < sy + sh)
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
}
__kernel void kernel_ocl_tonemap(
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index 69bc5d8ebb1..88a8b2aa569 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -90,7 +90,10 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ if(kernel_data.integrator.progressive)
+ kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+ else
+ kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Tonemapping */
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 52e3fe01c0e..e3fe23d3a37 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -26,13 +26,22 @@
#include "kernel_path.h"
#include "kernel_displace.h"
-extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+extern "C" __global__ void kernel_cuda_path_trace_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
if(x < sx + sw && y < sy + sh)
- kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
+}
+
+extern "C" __global__ void kernel_cuda_path_trace_non_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+{
+ int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+ int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+
+ if(x < sx + sw && y < sy + sh)
+ kernel_path_trace_non_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
}
extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 2806296ba93..48df60162b1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -1134,20 +1134,8 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
#endif
-__device void kernel_path_trace(KernelGlobals *kg,
- __global float *buffer, __global uint *rng_state,
- int sample, int x, int y, int offset, int stride)
+__device_inline void kernel_path_trace_setup(KernelGlobals *kg, __global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
{
- /* buffer offset */
- int index = offset + x + y*stride;
- int pass_stride = kernel_data.film.pass_stride;
-
- rng_state += index;
- buffer += index*pass_stride;
-
- /* initialize random numbers */
- RNG rng;
-
float filter_u;
float filter_v;
#ifdef __CMJ__
@@ -1156,38 +1144,82 @@ __device void kernel_path_trace(KernelGlobals *kg,
int num_samples = 0;
#endif
- path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
+ path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
/* sample camera ray */
- Ray ray;
float lens_u = 0.0f, lens_v = 0.0f;
if(kernel_data.cam.aperturesize > 0.0f)
- path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+ path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
float time = 0.0f;
#ifdef __CAMERA_MOTION__
if(kernel_data.cam.shuttertime != -1.0f)
- time = path_rng_1D(kg, &rng, sample, num_samples, PRNG_TIME);
+ time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
#endif
- camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
+ camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+}
+
+__device void kernel_path_trace_progressive(KernelGlobals *kg,
+ __global float *buffer, __global uint *rng_state,
+ int sample, int x, int y, int offset, int stride)
+{
+ /* buffer offset */
+ int index = offset + x + y*stride;
+ int pass_stride = kernel_data.film.pass_stride;
+
+ rng_state += index;
+ buffer += index*pass_stride;
+
+ /* initialize random numbers and ray */
+ RNG rng;
+ Ray ray;
+
+ kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
/* integrate */
float4 L;
- if (ray.t != 0.0f) {
-#ifdef __NON_PROGRESSIVE__
- if(kernel_data.integrator.progressive)
-#endif
- L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
+ if (ray.t != 0.0f)
+ L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
+ else
+ L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ /* accumulate result in output buffer */
+ kernel_write_pass_float4(buffer, sample, L);
+
+ path_rng_end(kg, rng_state, rng);
+}
+
+__device void kernel_path_trace_non_progressive(KernelGlobals *kg,
+ __global float *buffer, __global uint *rng_state,
+ int sample, int x, int y, int offset, int stride)
+{
+ /* buffer offset */
+ int index = offset + x + y*stride;
+ int pass_stride = kernel_data.film.pass_stride;
+
+ rng_state += index;
+ buffer += index*pass_stride;
+
+ /* initialize random numbers and ray */
+ RNG rng;
+ Ray ray;
+
+ kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
+
+ /* integrate */
+ float4 L;
+
+ if (ray.t != 0.0f)
#ifdef __NON_PROGRESSIVE__
- else
- L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
+ L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
+#else
+ L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
#endif
- }
else
L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 111f3cab8ab..5fb349ff1ee 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -955,8 +955,11 @@ __device void shader_merge_closures(KernelGlobals *kg, ShaderData *sd)
sci->sample_weight += scj->sample_weight;
int size = sd->num_closure - (j+1);
- if(size > 0)
- memmove(scj, scj+1, size*sizeof(ShaderClosure));
+ if(size > 0) {
+ for(int k = 0; k < size; k++) {
+ scj[k] = scj[k+1];
+ }
+ }
sd->num_closure--;
j--;
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index cfadb02b93b..8321c679f07 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -39,7 +39,10 @@ CCL_NAMESPACE_BEGIN
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ if(kernel_data.integrator.progressive)
+ kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+ else
+ kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Tonemapping */
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index de4c979b82e..35a816f7da9 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -41,7 +41,10 @@ CCL_NAMESPACE_BEGIN
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ if(kernel_data.integrator.progressive)
+ kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+ else
+ kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Tonemapping */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 99f3724afb5..0608dad5c05 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -68,6 +68,7 @@ CCL_NAMESPACE_BEGIN
#define __KERNEL_SHADING__
#if __CUDA_ARCH__ >= 200
#define __KERNEL_ADV_SHADING__
+#define __NON_PROGRESSIVE__
#endif
#endif
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 93d57c65363..13c199f879b 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -832,6 +832,7 @@ void Session::path_trace()
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
task.need_finish_queue = params.progressive_refine;
+ task.integrator_progressive = scene->integrator->progressive;
device->task_add(task);
}