From 72d2d05770a721986986c137a5cbc36cb796062f Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 20 Dec 2011 12:25:37 +0000 Subject: Cycles: border rendering support, includes some refactoring in how pixels are accessed on devices. --- intern/cycles/app/cycles_test.cpp | 19 ++++++-- intern/cycles/blender/addon/engine.py | 6 +-- intern/cycles/blender/blender_camera.cpp | 24 ++++++++++ intern/cycles/blender/blender_session.cpp | 23 +++++++--- intern/cycles/blender/blender_sync.h | 1 + intern/cycles/device/device.h | 1 + intern/cycles/device/device_cpu.cpp | 12 +++-- intern/cycles/device/device_cuda.cpp | 12 +++++ intern/cycles/device/device_opencl.cpp | 10 ++++- intern/cycles/kernel/kernel.cl | 8 ++-- intern/cycles/kernel/kernel.cpp | 8 ++-- intern/cycles/kernel/kernel.cu | 8 ++-- intern/cycles/kernel/kernel.h | 12 +++-- intern/cycles/kernel/kernel_film.h | 5 +-- intern/cycles/kernel/kernel_optimized.cpp | 8 ++-- intern/cycles/kernel/kernel_path.h | 14 +++--- intern/cycles/kernel/kernel_random.h | 14 +++--- intern/cycles/kernel/kernel_types.h | 5 +-- intern/cycles/render/buffers.cpp | 42 ++++++++--------- intern/cycles/render/buffers.h | 49 +++++++++++++++++--- intern/cycles/render/camera.cpp | 7 +-- intern/cycles/render/session.cpp | 75 ++++++++++++++++--------------- intern/cycles/render/session.h | 18 ++++---- intern/cycles/render/tile.cpp | 18 +++++--- intern/cycles/render/tile.h | 8 ++-- 25 files changed, 258 insertions(+), 149 deletions(-) (limited to 'intern') diff --git a/intern/cycles/app/cycles_test.cpp b/intern/cycles/app/cycles_test.cpp index 27e53ded6db..83816727404 100644 --- a/intern/cycles/app/cycles_test.cpp +++ b/intern/cycles/app/cycles_test.cpp @@ -82,10 +82,21 @@ static void session_print_status() session_print(status); } +static BufferParams session_buffer_params() +{ + BufferParams buffer_params; + buffer_params.width = options.width; + buffer_params.height = options.height; + buffer_params.full_width = options.width; + buffer_params.full_height = options.height; + + return buffer_params; +} + static void session_init() { options.session = new Session(options.session_params); - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); options.session->scene = options.scene; if(options.session_params.background && !options.quiet) @@ -151,7 +162,7 @@ static void display_info(Progress& progress) static void display() { - options.session->draw(options.width, options.height); + options.session->draw(session_buffer_params()); display_info(options.session->progress); } @@ -162,13 +173,13 @@ static void resize(int width, int height) options.height= height; if(options.session) - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); } void keyboard(unsigned char key) { if(key == 'r') - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); else if(key == 27) // escape options.session->progress.set_cancel("Cancelled"); } diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 2fedd2c0afa..60b77b23f25 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -62,11 +62,7 @@ def render(engine): def update(engine, data, scene): import bcycles - if scene.render.use_border: - engine.report({'ERROR'}, "Border rendering not supported yet") - free(engine) - else: - bcycles.sync(engine.session) + bcycles.sync(engine.session) def draw(engine, region, v3d, rv3d): diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index 442a8f62bfd..9777de14b1e 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -287,5 +287,29 @@ void BlenderSync::sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int blender_camera_sync(scene->camera, &bcam, width, height); } +BufferParams BlenderSync::get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height) +{ + BufferParams params; + + params.full_width = width; + params.full_height = height; + + /* border render */ + BL::RenderSettings r = b_scene.render(); + + if(!b_rv3d && r.use_border()) { + params.full_x = r.border_min_x()*width; + params.full_y = r.border_min_y()*height; + params.width = (int)(r.border_max_x()*width) - params.full_x; + params.height = (int)(r.border_max_y()*height) - params.full_y; + } + else { + params.width = width; + params.height = height; + } + + return params; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 4433b1e24f9..1803dd36beb 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -100,7 +100,9 @@ void BlenderSession::create_session() session->set_pause(BlenderSync::get_session_pause(b_scene, background)); /* start rendering */ - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); session->start(); } @@ -135,7 +137,10 @@ void BlenderSession::write_render_result() if(!pixels) return; - struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + int w = buffer_params.width, h = buffer_params.height; + + struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h); PointerRNA rrptr; RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr); BL::RenderResult rr(rrptr); @@ -188,8 +193,10 @@ void BlenderSession::synchronize() session->scene->mutex.unlock(); /* reset if needed */ - if(scene->need_reset()) - session->reset(width, height, session_params.samples); + if(scene->need_reset()) { + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + session->reset(buffer_params, session_params.samples); + } } bool BlenderSession::draw(int w, int h) @@ -225,7 +232,9 @@ bool BlenderSession::draw(int w, int h) /* reset if requested */ if(reset) { SessionParams session_params = BlenderSync::get_session_params(b_scene, background); - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); } } @@ -233,7 +242,9 @@ bool BlenderSession::draw(int w, int h) update_status_progress(); /* draw */ - return !session->draw(width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + return !session->draw(buffer_params); } void BlenderSession::get_status(string& status, string& substatus) diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 83c7f70fd59..824904cd81d 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -62,6 +62,7 @@ public: static SceneParams get_scene_params(BL::Scene b_scene, bool background); static SessionParams get_session_params(BL::Scene b_scene, bool background); static bool get_session_pause(BL::Scene b_scene, bool background); + static BufferParams get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height); private: /* sync */ diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 5b87b11b6b8..be6a3f144ed 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -60,6 +60,7 @@ public: device_ptr buffer; int sample; int resolution; + int offset, stride; device_ptr displace_input; device_ptr displace_offset; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 990b7cb94b0..a45a4fb69f6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -162,7 +162,8 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -173,7 +174,8 @@ public: { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -192,14 +194,16 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } else #endif { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 177c90ba2df..dfa2fcb2322 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -520,6 +520,12 @@ public: cuda_assert(cuParamSeti(cuPathTrace, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuPathTrace, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ @@ -581,6 +587,12 @@ public: cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuFilmConvert, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 8eaaebc6629..3a1d3032d6e 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -191,7 +191,7 @@ public: { char version[256]; - int major, minor, req_major = 1, req_minor = 1; + int major, minor, req_major = 1, req_minor = 0; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); @@ -541,6 +541,8 @@ public: cl_int d_w = task.w; cl_int d_h = task.h; cl_int d_sample = task.sample; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -559,6 +561,8 @@ public: ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); @@ -611,6 +615,8 @@ public: cl_int d_h = task.h; cl_int d_sample = task.sample; cl_int d_resolution = task.resolution; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -630,6 +636,8 @@ public: ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index c00bc3fe957..90eb7a2513f 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -36,7 +36,7 @@ __kernel void kernel_ocl_path_trace( #include "kernel_textures.h" int sample, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -50,7 +50,7 @@ __kernel void kernel_ocl_path_trace( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } __kernel void kernel_ocl_tonemap( @@ -63,7 +63,7 @@ __kernel void kernel_ocl_tonemap( #include "kernel_textures.h" int sample, int resolution, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -77,7 +77,7 @@ __kernel void kernel_ocl_tonemap( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 52a3852aa01..b4c3839dbd0 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -204,16 +204,16 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t /* Path Tracing */ -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 75415a00b00..71fc7ac3197 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -26,22 +26,22 @@ #include "kernel_path.h" #include "kernel_displace.h" -extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_path_trace(NULL, buffer, rng_state, sample, x, y); + kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride); } extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 700ee49c5f2..78247504b39 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -36,13 +36,17 @@ bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height); -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #ifdef WITH_OPTIMIZED_KERNEL -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #endif diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 4373701452e..cd8acc9647a 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -48,10 +48,9 @@ __device uchar4 film_float_to_byte(float4 color) return result; } -__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y) +__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - int w = kernel_data.cam.width; - int index = x + y*w; + int index = offset + x + y*stride; float4 irradiance = buffer[index]; float4 float_result = film_map(kg, irradiance, sample); diff --git a/intern/cycles/kernel/kernel_optimized.cpp b/intern/cycles/kernel/kernel_optimized.cpp index 85a2b798a62..ea43e01ab58 100644 --- a/intern/cycles/kernel/kernel_optimized.cpp +++ b/intern/cycles/kernel/kernel_optimized.cpp @@ -35,16 +35,16 @@ CCL_NAMESPACE_BEGIN /* Path Tracing */ -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index c609f6f13fe..05707f31352 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN #ifdef __MODIFY_TP__ -__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int sample) +__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int offset, int stride, int sample) { /* modify throughput to influence path termination probability, to avoid darker regions receiving fewer samples than lighter regions. also RGB @@ -45,7 +45,7 @@ __device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global f const float minL = 0.1f; if(sample >= minsample) { - float3 L = buffer[x + y*kernel_data.cam.width]; + float3 L = buffer[offset + x + y*stride]; float3 Lmin = make_float3(minL, minL, minL); float correct = (float)(sample+1)/(float)sample; @@ -379,7 +379,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R return make_float4(L.x, L.y, L.z, 1.0f - Ltransparent); } -__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y) +__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y, int offset, int stride) { /* initialize random numbers */ RNG rng; @@ -387,7 +387,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl float filter_u; float filter_v; - path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v); + path_rng_init(kg, rng_state, sample, &rng, x, y, offset, stride, &filter_u, &filter_v); /* sample camera ray */ Ray ray; @@ -399,7 +399,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl /* integrate */ #ifdef __MODIFY_TP__ - float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, sample); + float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, offset, stride, sample); float4 L = kernel_path_integrate(kg, &rng, sample, ray, throughput)/throughput; #else float3 throughput = make_float3(1.0f, 1.0f, 1.0f); @@ -407,14 +407,14 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl #endif /* accumulate result in output buffer */ - int index = x + y*kernel_data.cam.width; + int index = offset + x + y*stride; if(sample == 0) buffer[index] = L; else buffer[index] += L; - path_rng_end(kg, rng_state, rng, x, y); + path_rng_end(kg, rng_state, rng, x, y, offset, stride); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index ba97ab3e3b6..41301ebd3dc 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -123,7 +123,7 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime #endif } -__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { #ifdef __SOBOL_FULL_SCREEN__ uint px, py; @@ -138,7 +138,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x; *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y; #else - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -147,7 +147,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, #endif } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* nothing to do */ } @@ -163,10 +163,10 @@ __device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension) return (float)*rng * (1.0f/(float)0xFFFFFFFF); } -__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { /* load state */ - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -174,10 +174,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam *fy = path_rng(kg, rng, sample, PRNG_FILTER_V); } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* store state for next sample */ - rng_state[x + y*kernel_data.cam.width] = rng; + rng_state[offset + x + y*stride] = rng; } #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d9bd645b16d..72ebfefbd90 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -300,10 +300,7 @@ typedef struct ShaderData { typedef struct KernelCamera { /* type */ int ortho; - int pad; - - /* size */ - int width, height; + int pad1, pad2, pad3; /* matrices */ Transform cameratoworld; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index acdddb475d0..29141b25b59 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -36,8 +36,6 @@ CCL_NAMESPACE_BEGIN RenderBuffers::RenderBuffers(Device *device_) { device = device_; - width = 0; - height = 0; } RenderBuffers::~RenderBuffers() @@ -58,24 +56,23 @@ void RenderBuffers::device_free() } } -void RenderBuffers::reset(Device *device, int width_, int height_) +void RenderBuffers::reset(Device *device, BufferParams& params_) { - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate buffer */ - buffer.resize(width, height); + buffer.resize(params.width, params.height); device->mem_alloc(buffer, MEM_READ_WRITE); device->mem_zero(buffer); /* allocate rng state */ - rng_state.resize(width, height); + rng_state.resize(params.width, params.height); - uint *init_state = rng_state.resize(width, height); - int x, y; + uint *init_state = rng_state.resize(params.width, params.height); + int x, y, width = params.width, height = params.height; for(x=0; xmem_copy_from(buffer, 0, buffer.memory_size()); - float4 *out = new float4[width*height]; + float4 *out = new float4[params.width*params.height]; float4 *in = (float4*)buffer.data_pointer; float scale = 1.0f/(float)sample; - for(int i = width*height - 1; i >= 0; i--) { + for(int i = params.width*params.height - 1; i >= 0; i--) { float4 rgba = in[i]*scale; rgba.x = rgba.x*exposure; @@ -117,8 +114,6 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample) DisplayBuffer::DisplayBuffer(Device *device_) { device = device_; - width = 0; - height = 0; draw_width = 0; draw_height = 0; transparent = true; /* todo: determine from background */ @@ -137,28 +132,27 @@ void DisplayBuffer::device_free() } } -void DisplayBuffer::reset(Device *device, int width_, int height_) +void DisplayBuffer::reset(Device *device, BufferParams& params_) { draw_width = 0; draw_height = 0; - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate display pixels */ - rgba.resize(width, height); + rgba.resize(params.width, params.height); device->pixels_alloc(rgba); } -void DisplayBuffer::draw_set(int width_, int height_) +void DisplayBuffer::draw_set(int width, int height) { - assert(width_ <= width && height_ <= height); + assert(width <= params.width && height <= params.height); - draw_width = width_; - draw_height = height_; + draw_width = width; + draw_height = height; } void DisplayBuffer::draw_transparency_grid() @@ -175,11 +169,11 @@ void DisplayBuffer::draw_transparency_grid() }; glColor4ub(50, 50, 50, 255); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glEnable(GL_POLYGON_STIPPLE); glColor4ub(55, 55, 55, 255); glPolygonStipple(checker_stipple_sml); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glDisable(GL_POLYGON_STIPPLE); } @@ -189,7 +183,7 @@ void DisplayBuffer::draw(Device *device) if(transparent) draw_transparency_grid(); - device->draw_pixels(rgba, 0, draw_width, draw_height, width, height, transparent); + device->draw_pixels(rgba, 0, draw_width, draw_height, params.width, params.height, transparent); } } diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index d5eb8d7fa2f..66bd03c8893 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -30,12 +30,49 @@ CCL_NAMESPACE_BEGIN class Device; struct float4; +/* Buffer Parameters + Size of render buffer and how it fits in the full image (border render). */ + +class BufferParams { +public: + /* width/height of the physical buffer */ + int width; + int height; + + /* offset into and width/height of the full buffer */ + int full_x; + int full_y; + int full_width; + int full_height; + + BufferParams() + { + width = 0; + height = 0; + + full_x = 0; + full_y = 0; + full_width = 0; + full_height = 0; + } + + bool modified(const BufferParams& params) + { + return !(full_x == params.full_x + && full_y == params.full_y + && width == params.width + && height == params.height + && full_width == params.full_width + && full_height == params.full_height); + } +}; + /* Render Buffers */ class RenderBuffers { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* float buffer */ device_vector buffer; /* random number generator state */ @@ -46,7 +83,7 @@ public: RenderBuffers(Device *device); ~RenderBuffers(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); float4 *copy_from_device(float exposure, int sample); protected: @@ -62,8 +99,8 @@ protected: class DisplayBuffer { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* dimensions for how much of the buffer is actually ready for display. with progressive render we can be using only a subset of the buffer. if these are zero, it means nothing can be drawn yet */ @@ -78,7 +115,7 @@ public: DisplayBuffer(Device *device); ~DisplayBuffer(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); void write(Device *device, const string& filename); void draw_set(int width, int height); diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index e88c0a388bc..d5fca87491d 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -72,8 +72,9 @@ void Camera::update() if(!need_update) return; + /* ndc to raster */ Transform screentocamera; - Transform ndctoraster = transform_scale((float)width, (float)height, 1.0f); + Transform ndctoraster = transform_scale(width, height, 1.0f); /* raster to screen */ Transform screentoraster = ndctoraster * @@ -148,10 +149,6 @@ void Camera::device_update(Device *device, DeviceScene *dscene) /* type */ kcam->ortho = ortho; - /* size */ - kcam->width = width; - kcam->height = height; - /* store differentials */ kcam->dx = dx; kcam->dy = dy; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 42b4a2bb7e4..26c4dbfbb7a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -51,8 +51,6 @@ Session::Session(const SessionParams& params_) sample = 0; delayed_reset.do_reset = false; - delayed_reset.w = 0; - delayed_reset.h = 0; delayed_reset.samples = 0; display_outdated = false; @@ -108,7 +106,7 @@ bool Session::ready_to_reset() /* GPU Session */ -void Session::reset_gpu(int w, int h, int samples) +void Session::reset_gpu(BufferParams& buffer_params, int samples) { /* block for buffer acces and reset immediately. we can't do this in the thread, because we need to allocate an OpenGL buffer, and @@ -119,7 +117,7 @@ void Session::reset_gpu(int w, int h, int samples) display_outdated = true; reset_time = time_dt(); - reset_(w, h, samples); + reset_(buffer_params, samples); gpu_need_tonemap = false; gpu_need_tonemap_cond.notify_all(); @@ -127,7 +125,7 @@ void Session::reset_gpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_gpu(int w, int h) +bool Session::draw_gpu(BufferParams& buffer_params) { /* block for buffer access */ thread_scoped_lock display_lock(display->mutex); @@ -136,7 +134,7 @@ bool Session::draw_gpu(int w, int h) if(gpu_draw_ready) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { /* for CUDA we need to do tonemapping still, since we can only access GL buffers from the main thread */ if(gpu_need_tonemap) { @@ -261,15 +259,14 @@ void Session::run_gpu() /* CPU Session */ -void Session::reset_cpu(int w, int h, int samples) +void Session::reset_cpu(BufferParams& buffer_params, int samples) { thread_scoped_lock reset_lock(delayed_reset.mutex); display_outdated = true; reset_time = time_dt(); - delayed_reset.w = w; - delayed_reset.h = h; + delayed_reset.params = buffer_params; delayed_reset.samples = samples; delayed_reset.do_reset = true; device->task_cancel(); @@ -277,7 +274,7 @@ void Session::reset_cpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_cpu(int w, int h) +bool Session::draw_cpu(BufferParams& buffer_params) { thread_scoped_lock display_lock(display->mutex); @@ -285,7 +282,7 @@ bool Session::draw_cpu(int w, int h) if(display->draw_ready()) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { display->draw(device); if(display_outdated && (time_dt() - reset_time) > params.text_timeout) @@ -306,7 +303,7 @@ void Session::run_cpu() thread_scoped_lock buffers_lock(buffers->mutex); thread_scoped_lock display_lock(display->mutex); - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); delayed_reset.do_reset = false; } @@ -389,7 +386,7 @@ void Session::run_cpu() if(delayed_reset.do_reset) { /* reset rendering if request from main thread */ delayed_reset.do_reset = false; - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); } else if(need_tonemap) { /* tonemap only if we do not reset, we don't we don't @@ -438,23 +435,23 @@ void Session::run() progress.set_update(); } -bool Session::draw(int w, int h) +bool Session::draw(BufferParams& buffer_params) { if(device_use_gl) - return draw_gpu(w, h); + return draw_gpu(buffer_params); else - return draw_cpu(w, h); + return draw_cpu(buffer_params); } -void Session::reset_(int w, int h, int samples) +void Session::reset_(BufferParams& buffer_params, int samples) { - if(w != buffers->width || h != buffers->height) { + if(buffer_params.modified(buffers->params)) { gpu_draw_ready = false; - buffers->reset(device, w, h); - display->reset(device, w, h); + buffers->reset(device, buffer_params); + display->reset(device, buffer_params); } - tile_manager.reset(w, h, samples); + tile_manager.reset(buffer_params, samples); start_time = time_dt(); preview_time = 0.0; @@ -462,12 +459,12 @@ void Session::reset_(int w, int h, int samples) sample = 0; } -void Session::reset(int w, int h, int samples) +void Session::reset(BufferParams& buffer_params, int samples) { if(device_use_gl) - reset_gpu(w, h, samples); + reset_gpu(buffer_params, samples); else - reset_cpu(w, h, samples); + reset_cpu(buffer_params, samples); } void Session::set_samples(int samples) @@ -514,14 +511,18 @@ void Session::update_scene() progress.set_status("Updating Scene"); - /* update camera if dimensions changed for progressive render */ + /* update camera if dimensions changed for progressive render. the camera + knows nothing about progressive or cropped rendering, it just gets the + image dimensions passed in */ Camera *cam = scene->camera; - int w = tile_manager.state.width; - int h = tile_manager.state.height; - - if(cam->width != w || cam->height != h) { - cam->width = w; - cam->height = h; + float progressive_x = tile_manager.state.width/(float)tile_manager.params.width; + float progressive_y = tile_manager.state.height/(float)tile_manager.params.height; + int width = tile_manager.params.full_width*progressive_x; + int height = tile_manager.params.full_height*progressive_y; + + if(width != cam->width || height != cam->height) { + cam->width = width; + cam->height = height; cam->tag_update(); } @@ -573,14 +574,16 @@ void Session::path_trace(Tile& tile) /* add path trace task */ DeviceTask task(DeviceTask::PATH_TRACE); - task.x = tile.x; - task.y = tile.y; + task.x = tile_manager.state.full_x + tile.x; + task.y = tile_manager.state.full_y + tile.y; task.w = tile.w; task.h = tile.h; task.buffer = buffers->buffer.device_pointer; task.rng_state = buffers->rng_state.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; device->task_add(task); } @@ -590,14 +593,16 @@ void Session::tonemap() /* add tonemap task */ DeviceTask task(DeviceTask::TONEMAP); - task.x = 0; - task.y = 0; + task.x = tile_manager.state.full_x; + task.y = tile_manager.state.full_y; task.w = tile_manager.state.width; task.h = tile_manager.state.height; task.rgba = display->rgba.device_pointer; task.buffer = buffers->buffer.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; if(task.w > 0 && task.h > 0) { device->task_add(task); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index ce7f420096a..89979b8c451 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -19,6 +19,7 @@ #ifndef __SESSION_H__ #define __SESSION_H__ +#include "buffers.h" #include "device.h" #include "tile.h" @@ -27,6 +28,7 @@ CCL_NAMESPACE_BEGIN +class BufferParams; class Device; class DeviceScene; class DisplayBuffer; @@ -106,11 +108,11 @@ public: ~Session(); void start(); - bool draw(int w, int h); + bool draw(BufferParams& params); void wait(); bool ready_to_reset(); - void reset(int w, int h, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); void set_pause(bool pause); @@ -118,7 +120,7 @@ protected: struct DelayedReset { thread_mutex mutex; bool do_reset; - int w, h; + BufferParams params; int samples; } delayed_reset; @@ -129,15 +131,15 @@ protected: void tonemap(); void path_trace(Tile& tile); - void reset_(int w, int h, int samples); + void reset_(BufferParams& params, int samples); void run_cpu(); - bool draw_cpu(int w, int h); - void reset_cpu(int w, int h, int samples); + bool draw_cpu(BufferParams& params); + void reset_cpu(BufferParams& params, int samples); void run_gpu(); - bool draw_gpu(int w, int h); - void reset_gpu(int w, int h, int samples); + bool draw_gpu(BufferParams& params); + void reset_gpu(BufferParams& params, int samples); TileManager tile_manager; bool device_use_gl; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index ba437e74874..b118a7ba478 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -28,21 +28,21 @@ TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int mi tile_size = tile_size_; min_size = min_size_; - reset(0, 0, 0); + BufferParams buffer_params; + reset(buffer_params, 0); } TileManager::~TileManager() { } -void TileManager::reset(int width_, int height_, int samples_) +void TileManager::reset(BufferParams& params_, int samples_) { - full_width = width_; - full_height = height_; + params = params_; start_resolution = 1; - int w = width_, h = height_; + int w = params.width, h = params.height; if(min_size != INT_MAX) { while(w*h > min_size*min_size) { @@ -55,6 +55,8 @@ void TileManager::reset(int width_, int height_, int samples_) samples = samples_; + state.full_x = 0; + state.full_y = 0; state.width = 0; state.height = 0; state.sample = -1; @@ -70,8 +72,8 @@ void TileManager::set_samples(int samples_) void TileManager::set_tiles() { int resolution = state.resolution; - int image_w = max(1, full_width/resolution); - int image_h = max(1, full_height/resolution); + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); int tile_w = (image_w + tile_size - 1)/tile_size; int tile_h = (image_h + tile_size - 1)/tile_size; int sub_w = image_w/tile_w; @@ -90,6 +92,8 @@ void TileManager::set_tiles() } } + state.full_x = params.full_x/resolution; + state.full_y = params.full_y/resolution; state.width = image_w; state.height = image_h; } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 5cd16eb8afa..76863d23498 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -21,6 +21,7 @@ #include +#include "buffers.h" #include "util_list.h" CCL_NAMESPACE_BEGIN @@ -39,7 +40,10 @@ public: class TileManager { public: + BufferParams params; struct State { + int full_x; + int full_y; int width; int height; int sample; @@ -50,7 +54,7 @@ public: TileManager(bool progressive, int samples, int tile_size, int min_size); ~TileManager(); - void reset(int width, int height, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); bool next(); bool done(); @@ -63,8 +67,6 @@ protected: int tile_size; int min_size; - int full_width; - int full_height; int start_resolution; }; -- cgit v1.2.3