diff options
author | Campbell Barton <ideasman42@gmail.com> | 2011-12-21 02:08:24 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2011-12-21 02:08:24 +0400 |
commit | 1fef05084e674e4e41388f63b95a2c71aec20a16 (patch) | |
tree | af2ac86bdefeea596b5474c05ef94788e5fd683d | |
parent | deb3b7e282c9fed045201ca39d682baf3a281ffe (diff) | |
parent | 2d1de2e78d619ffd999876768a9466d083984a9f (diff) |
svn merge ^/trunk/blender -r42761:42776
41 files changed, 390 insertions, 208 deletions
diff --git a/intern/cycles/app/cycles_test.cpp b/intern/cycles/app/cycles_test.cpp index 27e53ded6db..83816727404 100644 --- a/intern/cycles/app/cycles_test.cpp +++ b/intern/cycles/app/cycles_test.cpp @@ -82,10 +82,21 @@ static void session_print_status() session_print(status); } +static BufferParams session_buffer_params() +{ + BufferParams buffer_params; + buffer_params.width = options.width; + buffer_params.height = options.height; + buffer_params.full_width = options.width; + buffer_params.full_height = options.height; + + return buffer_params; +} + static void session_init() { options.session = new Session(options.session_params); - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); options.session->scene = options.scene; if(options.session_params.background && !options.quiet) @@ -151,7 +162,7 @@ static void display_info(Progress& progress) static void display() { - options.session->draw(options.width, options.height); + options.session->draw(session_buffer_params()); display_info(options.session->progress); } @@ -162,13 +173,13 @@ static void resize(int width, int height) options.height= height; if(options.session) - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); } void keyboard(unsigned char key) { if(key == 'r') - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); else if(key == 27) // escape options.session->progress.set_cancel("Cancelled"); } diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 2fedd2c0afa..60b77b23f25 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -62,11 +62,7 @@ def render(engine): def update(engine, data, scene): import bcycles - if scene.render.use_border: - engine.report({'ERROR'}, "Border rendering not supported yet") - free(engine) - else: - bcycles.sync(engine.session) + bcycles.sync(engine.session) def draw(engine, region, v3d, rv3d): diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index 442a8f62bfd..9777de14b1e 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -287,5 +287,29 @@ void BlenderSync::sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int blender_camera_sync(scene->camera, &bcam, width, height); } +BufferParams BlenderSync::get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height) +{ + BufferParams params; + + params.full_width = width; + params.full_height = height; + + /* border render */ + BL::RenderSettings r = b_scene.render(); + + if(!b_rv3d && r.use_border()) { + params.full_x = r.border_min_x()*width; + params.full_y = r.border_min_y()*height; + params.width = (int)(r.border_max_x()*width) - params.full_x; + params.height = (int)(r.border_max_y()*height) - params.full_y; + } + else { + params.width = width; + params.height = height; + } + + return params; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 4433b1e24f9..1803dd36beb 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -100,7 +100,9 @@ void BlenderSession::create_session() session->set_pause(BlenderSync::get_session_pause(b_scene, background)); /* start rendering */ - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); session->start(); } @@ -135,7 +137,10 @@ void BlenderSession::write_render_result() if(!pixels) return; - struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + int w = buffer_params.width, h = buffer_params.height; + + struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h); PointerRNA rrptr; RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr); BL::RenderResult rr(rrptr); @@ -188,8 +193,10 @@ void BlenderSession::synchronize() session->scene->mutex.unlock(); /* reset if needed */ - if(scene->need_reset()) - session->reset(width, height, session_params.samples); + if(scene->need_reset()) { + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + session->reset(buffer_params, session_params.samples); + } } bool BlenderSession::draw(int w, int h) @@ -225,7 +232,9 @@ bool BlenderSession::draw(int w, int h) /* reset if requested */ if(reset) { SessionParams session_params = BlenderSync::get_session_params(b_scene, background); - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); } } @@ -233,7 +242,9 @@ bool BlenderSession::draw(int w, int h) update_status_progress(); /* draw */ - return !session->draw(width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + return !session->draw(buffer_params); } void BlenderSession::get_status(string& status, string& substatus) diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 83c7f70fd59..824904cd81d 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -62,6 +62,7 @@ public: static SceneParams get_scene_params(BL::Scene b_scene, bool background); static SessionParams get_session_params(BL::Scene b_scene, bool background); static bool get_session_pause(BL::Scene b_scene, bool background); + static BufferParams get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height); private: /* sync */ diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index f43ccffe461..6ebc359fdb3 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -24,6 +24,7 @@ #include "util_cuda.h" #include "util_debug.h" +#include "util_foreach.h" #include "util_math.h" #include "util_opencl.h" #include "util_opengl.h" @@ -41,7 +42,31 @@ DeviceTask::DeviceTask(Type type_) { } -void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num) +void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size) +{ + int num; + + if(type == DISPLACE) { + num = (displace_w + max_size - 1)/max_size; + } + else { + max_size = max(1, max_size/w); + num = (h + max_size - 1)/max_size; + } + + split(tasks, num); +} + +void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num) +{ + list<DeviceTask> tasks; + split(tasks, num); + + foreach(DeviceTask& task, tasks) + queue.push(task); +} + +void DeviceTask::split(list<DeviceTask>& tasks, int num) { if(type == DISPLACE) { num = min(displace_w, num); @@ -55,7 +80,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num) task.displace_x = tx; task.displace_w = tw; - tasks.push(task); + tasks.push_back(task); } } else { @@ -70,7 +95,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num) task.y = ty; task.h = th; - tasks.push(task); + tasks.push_back(task); } } } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 5b87b11b6b8..a6a81e7b326 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -23,6 +23,7 @@ #include "device_memory.h" +#include "util_list.h" #include "util_string.h" #include "util_thread.h" #include "util_types.h" @@ -60,13 +61,17 @@ public: device_ptr buffer; int sample; int resolution; + int offset, stride; device_ptr displace_input; device_ptr displace_offset; int displace_x, displace_w; DeviceTask(Type type = PATH_TRACE); + + void split(list<DeviceTask>& tasks, int num); void split(ThreadQueue<DeviceTask>& tasks, int num); + void split_max_size(list<DeviceTask>& tasks, int max_size); }; /* Device */ diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 990b7cb94b0..a45a4fb69f6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -162,7 +162,8 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -173,7 +174,8 @@ public: { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -192,14 +194,16 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } else #endif { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 177c90ba2df..2a49d4fae4c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -221,7 +221,7 @@ public: cuDeviceComputeCapability(&major, &minor, cuDevId); if(major <= 1 && minor <= 2) { - cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor)); + cuda_error(string_printf("CUDA device supported only with compute capability 1.3 or up, found %d.%d.", major, minor)); return false; } } @@ -253,9 +253,9 @@ public: #if defined(WITH_CUDA_BINARIES) && defined(_WIN32) if(major <= 1 && minor <= 2) - cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor)); + cuda_error(string_printf("CUDA device supported only compute capability 1.3 or up, found %d.%d.", major, minor)); else - cuda_error(string_printf("CUDA binary kernel for this graphics card shader model (%d.%d) not found.", major, minor)); + cuda_error(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); return ""; #else /* if not, find CUDA compiler */ @@ -520,6 +520,12 @@ public: cuda_assert(cuParamSeti(cuPathTrace, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuPathTrace, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ @@ -581,6 +587,12 @@ public: cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuFilmConvert, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 8eaaebc6629..6014dd0fdb7 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -25,6 +25,7 @@ #include "device.h" #include "device_intern.h" +#include "util_foreach.h" #include "util_map.h" #include "util_math.h" #include "util_md5.h" @@ -52,6 +53,7 @@ public: map<string, device_memory*> mem_map; device_ptr null_mem; bool device_initialized; + string platform_name; const char *opencl_error_string(cl_int err) { @@ -175,6 +177,10 @@ public: if(opencl_error(ciErr)) return; + char name[256]; + clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL); + platform_name = name; + cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr); if(opencl_error(ciErr)) return; @@ -277,14 +283,11 @@ public: { string build_options = " -cl-fast-relaxed-math "; - /* Full Shading only on NVIDIA cards at the moment */ - char vendor[256]; - - clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL); - string name = vendor; - - if(name == "NVIDIA CUDA") - build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ "; + /* full shading only on NVIDIA cards at the moment */ + if(platform_name == "NVIDIA CUDA") + build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; + if(platform_name == "Apple") + build_options += " -D__CL_NO_FLOAT3__ "; return build_options; } @@ -541,6 +544,8 @@ public: cl_int d_w = task.w; cl_int d_h = task.h; cl_int d_sample = task.sample; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -559,6 +564,8 @@ public: ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); @@ -611,6 +618,8 @@ public: cl_int d_h = task.h; cl_int d_sample = task.sample; cl_int d_resolution = task.resolution; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -630,6 +639,8 @@ public: ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); @@ -649,12 +660,24 @@ public: opencl_assert(clFinish(cqCommandQueue)); } - void task_add(DeviceTask& task) + void task_add(DeviceTask& maintask) { - if(task.type == DeviceTask::TONEMAP) - tonemap(task); - else if(task.type == DeviceTask::PATH_TRACE) - path_trace(task); + list<DeviceTask> tasks; + + /* arbitrary limit to work around apple ATI opencl issue */ + if(platform_name == "Apple") + maintask.split_max_size(tasks, 76800); + else + tasks.push_back(maintask); + + DeviceTask task; + + foreach(DeviceTask& task, tasks) { + if(task.type == DeviceTask::TONEMAP) + tonemap(task); + else if(task.type == DeviceTask::PATH_TRACE) + path_trace(task); + } } void task_wait() diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index e17544bf7af..939a74660a1 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -143,7 +143,7 @@ endif() #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl) #add_custom_command( # OUTPUT ${KERNEL_PREPROCESSED} -# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED} +# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} # DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index c00bc3fe957..90eb7a2513f 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -36,7 +36,7 @@ __kernel void kernel_ocl_path_trace( #include "kernel_textures.h" int sample, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -50,7 +50,7 @@ __kernel void kernel_ocl_path_trace( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } __kernel void kernel_ocl_tonemap( @@ -63,7 +63,7 @@ __kernel void kernel_ocl_tonemap( #include "kernel_textures.h" int sample, int resolution, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -77,7 +77,7 @@ __kernel void kernel_ocl_tonemap( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 52a3852aa01..b4c3839dbd0 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -204,16 +204,16 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t /* Path Tracing */ -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 75415a00b00..71fc7ac3197 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -26,22 +26,22 @@ #include "kernel_path.h" #include "kernel_displace.h" -extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_path_trace(NULL, buffer, rng_state, sample, x, y); + kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride); } extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 700ee49c5f2..78247504b39 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -36,13 +36,17 @@ bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height); -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #ifdef WITH_OPTIMIZED_KERNEL -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #endif diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 9cdc2f1f865..2dbdd076891 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f); - ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff); - ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff); + ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff); + ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff); #endif #ifdef __CAMERA_CLIPPING__ @@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa #ifdef __RAY_DIFFERENTIALS__ /* ray differential */ - ray->dP.dx = kernel_data.cam.dx; - ray->dP.dy = kernel_data.cam.dy; + ray->dP.dx = float4_to_float3(kernel_data.cam.dx); + ray->dP.dy = float4_to_float3(kernel_data.cam.dy); ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 5515966807b..9fbd8566ecd 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -25,12 +25,21 @@ /* no namespaces in opencl */ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END -#define WITH_OPENCL + +#ifdef __CL_NO_FLOAT3__ +#define float3 float4 +#endif + +#ifdef __CL_NOINLINE__ +#define __noinline __attribute__((noinline)) +#else +#define __noinline +#endif /* in opencl all functions are device functions, so leave this empty */ #define __device -#define __device_inline -#define __device_noinline +#define __device_inline __device +#define __device_noinline __device __noinline /* no assert in opencl */ #define kernel_assert(cond) @@ -68,7 +77,11 @@ __device float kernel_tex_interp_(__global float *data, int width, float x) #endif #define make_float2(x, y) ((float2)(x, y)) +#ifdef __CL_NO_FLOAT3__ +#define make_float3(x, y, z) ((float4)(x, y, z, 0.0)) +#else #define make_float3(x, y, z) ((float3)(x, y, z)) +#endif #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #define make_int2(x, y) ((int2)(x, y)) #define make_int3(x, y, z) ((int3)(x, y, z)) diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 4373701452e..cd8acc9647a 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -48,10 +48,9 @@ __device uchar4 film_float_to_byte(float4 color) return result; } -__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y) +__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - int w = kernel_data.cam.width; - int index = x + y*w; + int index = offset + x + y*stride; float4 irradiance = buffer[index]; float4 float_result = film_map(kg, irradiance, sample); diff --git a/intern/cycles/kernel/kernel_optimized.cpp b/intern/cycles/kernel/kernel_optimized.cpp index 85a2b798a62..ea43e01ab58 100644 --- a/intern/cycles/kernel/kernel_optimized.cpp +++ b/intern/cycles/kernel/kernel_optimized.cpp @@ -35,16 +35,16 @@ CCL_NAMESPACE_BEGIN /* Path Tracing */ -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index c609f6f13fe..05707f31352 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN #ifdef __MODIFY_TP__ -__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int sample) +__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int offset, int stride, int sample) { /* modify throughput to influence path termination probability, to avoid darker regions receiving fewer samples than lighter regions. also RGB @@ -45,7 +45,7 @@ __device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global f const float minL = 0.1f; if(sample >= minsample) { - float3 L = buffer[x + y*kernel_data.cam.width]; + float3 L = buffer[offset + x + y*stride]; float3 Lmin = make_float3(minL, minL, minL); float correct = (float)(sample+1)/(float)sample; @@ -379,7 +379,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R return make_float4(L.x, L.y, L.z, 1.0f - Ltransparent); } -__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y) +__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y, int offset, int stride) { /* initialize random numbers */ RNG rng; @@ -387,7 +387,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl float filter_u; float filter_v; - path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v); + path_rng_init(kg, rng_state, sample, &rng, x, y, offset, stride, &filter_u, &filter_v); /* sample camera ray */ Ray ray; @@ -399,7 +399,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl /* integrate */ #ifdef __MODIFY_TP__ - float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, sample); + float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, offset, stride, sample); float4 L = kernel_path_integrate(kg, &rng, sample, ray, throughput)/throughput; #else float3 throughput = make_float3(1.0f, 1.0f, 1.0f); @@ -407,14 +407,14 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl #endif /* accumulate result in output buffer */ - int index = x + y*kernel_data.cam.width; + int index = offset + x + y*stride; if(sample == 0) buffer[index] = L; else buffer[index] += L; - path_rng_end(kg, rng_state, rng, x, y); + path_rng_end(kg, rng_state, rng, x, y, offset, stride); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index ba97ab3e3b6..41301ebd3dc 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -123,7 +123,7 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime #endif } -__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { #ifdef __SOBOL_FULL_SCREEN__ uint px, py; @@ -138,7 +138,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x; *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y; #else - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -147,7 +147,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, #endif } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* nothing to do */ } @@ -163,10 +163,10 @@ __device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension) return (float)*rng * (1.0f/(float)0xFFFFFFFF); } -__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { /* load state */ - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -174,10 +174,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam *fy = path_rng(kg, rng, sample, PRNG_FILTER_V); } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* store state for next sample */ - rng_state[x + y*kernel_data.cam.width] = rng; + rng_state[offset + x + y*stride] = rng; } #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d9bd645b16d..ea73f87a8a5 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -295,29 +295,24 @@ typedef struct ShaderData { #endif } ShaderData; -/* Constrant Kernel Data */ +/* Constrant Kernel Data + * + * These structs are passed from CPU to various devices, and the struct layout + * must match exactly. Structs are padded to ensure 16 byte alignment, and we + * do not use float3 because its size may not be the same on all devices. */ typedef struct KernelCamera { /* type */ int ortho; - int pad; - - /* size */ - int width, height; + int pad1, pad2, pad3; /* matrices */ Transform cameratoworld; Transform rastertocamera; /* differentials */ - float3 dx; -#ifndef WITH_OPENCL - float pad1; -#endif - float3 dy; -#ifndef WITH_OPENCL - float pad2; -#endif + float4 dx; + float4 dy; /* depth of field */ float aperturesize; @@ -358,10 +353,6 @@ typedef struct KernelBackground { typedef struct KernelSunSky { /* sun direction in spherical and cartesian */ float theta, phi, pad3, pad4; - float3 dir; -#ifndef WITH_OPENCL - float pad; -#endif /* perez function parameters */ float zenith_Y, zenith_x, zenith_y, pad2; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index acdddb475d0..29141b25b59 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -36,8 +36,6 @@ CCL_NAMESPACE_BEGIN RenderBuffers::RenderBuffers(Device *device_) { device = device_; - width = 0; - height = 0; } RenderBuffers::~RenderBuffers() @@ -58,24 +56,23 @@ void RenderBuffers::device_free() } } -void RenderBuffers::reset(Device *device, int width_, int height_) +void RenderBuffers::reset(Device *device, BufferParams& params_) { - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate buffer */ - buffer.resize(width, height); + buffer.resize(params.width, params.height); device->mem_alloc(buffer, MEM_READ_WRITE); device->mem_zero(buffer); /* allocate rng state */ - rng_state.resize(width, height); + rng_state.resize(params.width, params.height); - uint *init_state = rng_state.resize(width, height); - int x, y; + uint *init_state = rng_state.resize(params.width, params.height); + int x, y, width = params.width, height = params.height; for(x=0; x<width; x++) for(y=0; y<height; y++) @@ -92,11 +89,11 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample) device->mem_copy_from(buffer, 0, buffer.memory_size()); - float4 *out = new float4[width*height]; + float4 *out = new float4[params.width*params.height]; float4 *in = (float4*)buffer.data_pointer; float scale = 1.0f/(float)sample; - for(int i = width*height - 1; i >= 0; i--) { + for(int i = params.width*params.height - 1; i >= 0; i--) { float4 rgba = in[i]*scale; rgba.x = rgba.x*exposure; @@ -117,8 +114,6 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample) DisplayBuffer::DisplayBuffer(Device *device_) { device = device_; - width = 0; - height = 0; draw_width = 0; draw_height = 0; transparent = true; /* todo: determine from background */ @@ -137,28 +132,27 @@ void DisplayBuffer::device_free() } } -void DisplayBuffer::reset(Device *device, int width_, int height_) +void DisplayBuffer::reset(Device *device, BufferParams& params_) { draw_width = 0; draw_height = 0; - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate display pixels */ - rgba.resize(width, height); + rgba.resize(params.width, params.height); device->pixels_alloc(rgba); } -void DisplayBuffer::draw_set(int width_, int height_) +void DisplayBuffer::draw_set(int width, int height) { - assert(width_ <= width && height_ <= height); + assert(width <= params.width && height <= params.height); - draw_width = width_; - draw_height = height_; + draw_width = width; + draw_height = height; } void DisplayBuffer::draw_transparency_grid() @@ -175,11 +169,11 @@ void DisplayBuffer::draw_transparency_grid() }; glColor4ub(50, 50, 50, 255); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glEnable(GL_POLYGON_STIPPLE); glColor4ub(55, 55, 55, 255); glPolygonStipple(checker_stipple_sml); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glDisable(GL_POLYGON_STIPPLE); } @@ -189,7 +183,7 @@ void DisplayBuffer::draw(Device *device) if(transparent) draw_transparency_grid(); - device->draw_pixels(rgba, 0, draw_width, draw_height, width, height, transparent); + device->draw_pixels(rgba, 0, draw_width, draw_height, params.width, params.height, transparent); } } diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index d5eb8d7fa2f..66bd03c8893 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -30,12 +30,49 @@ CCL_NAMESPACE_BEGIN class Device; struct float4; +/* Buffer Parameters + Size of render buffer and how it fits in the full image (border render). */ + +class BufferParams { +public: + /* width/height of the physical buffer */ + int width; + int height; + + /* offset into and width/height of the full buffer */ + int full_x; + int full_y; + int full_width; + int full_height; + + BufferParams() + { + width = 0; + height = 0; + + full_x = 0; + full_y = 0; + full_width = 0; + full_height = 0; + } + + bool modified(const BufferParams& params) + { + return !(full_x == params.full_x + && full_y == params.full_y + && width == params.width + && height == params.height + && full_width == params.full_width + && full_height == params.full_height); + } +}; + /* Render Buffers */ class RenderBuffers { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* float buffer */ device_vector<float4> buffer; /* random number generator state */ @@ -46,7 +83,7 @@ public: RenderBuffers(Device *device); ~RenderBuffers(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); float4 *copy_from_device(float exposure, int sample); protected: @@ -62,8 +99,8 @@ protected: class DisplayBuffer { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* dimensions for how much of the buffer is actually ready for display. with progressive render we can be using only a subset of the buffer. if these are zero, it means nothing can be drawn yet */ @@ -78,7 +115,7 @@ public: DisplayBuffer(Device *device); ~DisplayBuffer(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); void write(Device *device, const string& filename); void draw_set(int width, int height); diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index e88c0a388bc..a83ae81844c 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -72,8 +72,9 @@ void Camera::update() if(!need_update) return; + /* ndc to raster */ Transform screentocamera; - Transform ndctoraster = transform_scale((float)width, (float)height, 1.0f); + Transform ndctoraster = transform_scale(width, height, 1.0f); /* raster to screen */ Transform screentoraster = ndctoraster * @@ -148,13 +149,9 @@ void Camera::device_update(Device *device, DeviceScene *dscene) /* type */ kcam->ortho = ortho; - /* size */ - kcam->width = width; - kcam->height = height; - /* store differentials */ - kcam->dx = dx; - kcam->dy = dy; + kcam->dx = float3_to_float4(dx); + kcam->dy = float3_to_float4(dy); /* clipping */ kcam->nearclip = nearclip; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 7d873221cd6..81d156a079d 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb ksunsky->theta = theta; ksunsky->phi = phi; - ksunsky->dir = dir; float theta2 = theta*theta; float theta3 = theta*theta*theta; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 42b4a2bb7e4..26c4dbfbb7a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -51,8 +51,6 @@ Session::Session(const SessionParams& params_) sample = 0; delayed_reset.do_reset = false; - delayed_reset.w = 0; - delayed_reset.h = 0; delayed_reset.samples = 0; display_outdated = false; @@ -108,7 +106,7 @@ bool Session::ready_to_reset() /* GPU Session */ -void Session::reset_gpu(int w, int h, int samples) +void Session::reset_gpu(BufferParams& buffer_params, int samples) { /* block for buffer acces and reset immediately. we can't do this in the thread, because we need to allocate an OpenGL buffer, and @@ -119,7 +117,7 @@ void Session::reset_gpu(int w, int h, int samples) display_outdated = true; reset_time = time_dt(); - reset_(w, h, samples); + reset_(buffer_params, samples); gpu_need_tonemap = false; gpu_need_tonemap_cond.notify_all(); @@ -127,7 +125,7 @@ void Session::reset_gpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_gpu(int w, int h) +bool Session::draw_gpu(BufferParams& buffer_params) { /* block for buffer access */ thread_scoped_lock display_lock(display->mutex); @@ -136,7 +134,7 @@ bool Session::draw_gpu(int w, int h) if(gpu_draw_ready) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { /* for CUDA we need to do tonemapping still, since we can only access GL buffers from the main thread */ if(gpu_need_tonemap) { @@ -261,15 +259,14 @@ void Session::run_gpu() /* CPU Session */ -void Session::reset_cpu(int w, int h, int samples) +void Session::reset_cpu(BufferParams& buffer_params, int samples) { thread_scoped_lock reset_lock(delayed_reset.mutex); display_outdated = true; reset_time = time_dt(); - delayed_reset.w = w; - delayed_reset.h = h; + delayed_reset.params = buffer_params; delayed_reset.samples = samples; delayed_reset.do_reset = true; device->task_cancel(); @@ -277,7 +274,7 @@ void Session::reset_cpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_cpu(int w, int h) +bool Session::draw_cpu(BufferParams& buffer_params) { thread_scoped_lock display_lock(display->mutex); @@ -285,7 +282,7 @@ bool Session::draw_cpu(int w, int h) if(display->draw_ready()) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { display->draw(device); if(display_outdated && (time_dt() - reset_time) > params.text_timeout) @@ -306,7 +303,7 @@ void Session::run_cpu() thread_scoped_lock buffers_lock(buffers->mutex); thread_scoped_lock display_lock(display->mutex); - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); delayed_reset.do_reset = false; } @@ -389,7 +386,7 @@ void Session::run_cpu() if(delayed_reset.do_reset) { /* reset rendering if request from main thread */ delayed_reset.do_reset = false; - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); } else if(need_tonemap) { /* tonemap only if we do not reset, we don't we don't @@ -438,23 +435,23 @@ void Session::run() progress.set_update(); } -bool Session::draw(int w, int h) +bool Session::draw(BufferParams& buffer_params) { if(device_use_gl) - return draw_gpu(w, h); + return draw_gpu(buffer_params); else - return draw_cpu(w, h); + return draw_cpu(buffer_params); } -void Session::reset_(int w, int h, int samples) +void Session::reset_(BufferParams& buffer_params, int samples) { - if(w != buffers->width || h != buffers->height) { + if(buffer_params.modified(buffers->params)) { gpu_draw_ready = false; - buffers->reset(device, w, h); - display->reset(device, w, h); + buffers->reset(device, buffer_params); + display->reset(device, buffer_params); } - tile_manager.reset(w, h, samples); + tile_manager.reset(buffer_params, samples); start_time = time_dt(); preview_time = 0.0; @@ -462,12 +459,12 @@ void Session::reset_(int w, int h, int samples) sample = 0; } -void Session::reset(int w, int h, int samples) +void Session::reset(BufferParams& buffer_params, int samples) { if(device_use_gl) - reset_gpu(w, h, samples); + reset_gpu(buffer_params, samples); else - reset_cpu(w, h, samples); + reset_cpu(buffer_params, samples); } void Session::set_samples(int samples) @@ -514,14 +511,18 @@ void Session::update_scene() progress.set_status("Updating Scene"); - /* update camera if dimensions changed for progressive render */ + /* update camera if dimensions changed for progressive render. the camera + knows nothing about progressive or cropped rendering, it just gets the + image dimensions passed in */ Camera *cam = scene->camera; - int w = tile_manager.state.width; - int h = tile_manager.state.height; - - if(cam->width != w || cam->height != h) { - cam->width = w; - cam->height = h; + float progressive_x = tile_manager.state.width/(float)tile_manager.params.width; + float progressive_y = tile_manager.state.height/(float)tile_manager.params.height; + int width = tile_manager.params.full_width*progressive_x; + int height = tile_manager.params.full_height*progressive_y; + + if(width != cam->width || height != cam->height) { + cam->width = width; + cam->height = height; cam->tag_update(); } @@ -573,14 +574,16 @@ void Session::path_trace(Tile& tile) /* add path trace task */ DeviceTask task(DeviceTask::PATH_TRACE); - task.x = tile.x; - task.y = tile.y; + task.x = tile_manager.state.full_x + tile.x; + task.y = tile_manager.state.full_y + tile.y; task.w = tile.w; task.h = tile.h; task.buffer = buffers->buffer.device_pointer; task.rng_state = buffers->rng_state.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; device->task_add(task); } @@ -590,14 +593,16 @@ void Session::tonemap() /* add tonemap task */ DeviceTask task(DeviceTask::TONEMAP); - task.x = 0; - task.y = 0; + task.x = tile_manager.state.full_x; + task.y = tile_manager.state.full_y; task.w = tile_manager.state.width; task.h = tile_manager.state.height; task.rgba = display->rgba.device_pointer; task.buffer = buffers->buffer.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; if(task.w > 0 && task.h > 0) { device->task_add(task); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index ce7f420096a..89979b8c451 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -19,6 +19,7 @@ #ifndef __SESSION_H__ #define __SESSION_H__ +#include "buffers.h" #include "device.h" #include "tile.h" @@ -27,6 +28,7 @@ CCL_NAMESPACE_BEGIN +class BufferParams; class Device; class DeviceScene; class DisplayBuffer; @@ -106,11 +108,11 @@ public: ~Session(); void start(); - bool draw(int w, int h); + bool draw(BufferParams& params); void wait(); bool ready_to_reset(); - void reset(int w, int h, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); void set_pause(bool pause); @@ -118,7 +120,7 @@ protected: struct DelayedReset { thread_mutex mutex; bool do_reset; - int w, h; + BufferParams params; int samples; } delayed_reset; @@ -129,15 +131,15 @@ protected: void tonemap(); void path_trace(Tile& tile); - void reset_(int w, int h, int samples); + void reset_(BufferParams& params, int samples); void run_cpu(); - bool draw_cpu(int w, int h); - void reset_cpu(int w, int h, int samples); + bool draw_cpu(BufferParams& params); + void reset_cpu(BufferParams& params, int samples); void run_gpu(); - bool draw_gpu(int w, int h); - void reset_gpu(int w, int h, int samples); + bool draw_gpu(BufferParams& params); + void reset_gpu(BufferParams& params, int samples); TileManager tile_manager; bool device_use_gl; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index ba437e74874..b118a7ba478 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -28,21 +28,21 @@ TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int mi tile_size = tile_size_; min_size = min_size_; - reset(0, 0, 0); + BufferParams buffer_params; + reset(buffer_params, 0); } TileManager::~TileManager() { } -void TileManager::reset(int width_, int height_, int samples_) +void TileManager::reset(BufferParams& params_, int samples_) { - full_width = width_; - full_height = height_; + params = params_; start_resolution = 1; - int w = width_, h = height_; + int w = params.width, h = params.height; if(min_size != INT_MAX) { while(w*h > min_size*min_size) { @@ -55,6 +55,8 @@ void TileManager::reset(int width_, int height_, int samples_) samples = samples_; + state.full_x = 0; + state.full_y = 0; state.width = 0; state.height = 0; state.sample = -1; @@ -70,8 +72,8 @@ void TileManager::set_samples(int samples_) void TileManager::set_tiles() { int resolution = state.resolution; - int image_w = max(1, full_width/resolution); - int image_h = max(1, full_height/resolution); + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); int tile_w = (image_w + tile_size - 1)/tile_size; int tile_h = (image_h + tile_size - 1)/tile_size; int sub_w = image_w/tile_w; @@ -90,6 +92,8 @@ void TileManager::set_tiles() } } + state.full_x = params.full_x/resolution; + state.full_y = params.full_y/resolution; state.width = image_w; state.height = image_h; } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 5cd16eb8afa..76863d23498 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -21,6 +21,7 @@ #include <limits.h> +#include "buffers.h" #include "util_list.h" CCL_NAMESPACE_BEGIN @@ -39,7 +40,10 @@ public: class TileManager { public: + BufferParams params; struct State { + int full_x; + int full_y; int width; int height; int sample; @@ -50,7 +54,7 @@ public: TileManager(bool progressive, int samples, int tile_size, int min_size); ~TileManager(); - void reset(int width, int height, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); bool next(); bool done(); @@ -63,8 +67,6 @@ protected: int tile_size; int min_size; - int full_width; - int full_height; int start_resolution; }; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 7c56f0fbb12..0a1d8ff4555 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a) return make_float3(a.x, a.y, a.z); } +__device_inline float4 float3_to_float4(const float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + #ifndef __KERNEL_GPU__ __device_inline void print_float3(const char *label, const float3& a) diff --git a/source/blender/blenkernel/BKE_modifier.h b/source/blender/blenkernel/BKE_modifier.h index 3063d9bc40a..d6c48fcb458 100644 --- a/source/blender/blenkernel/BKE_modifier.h +++ b/source/blender/blenkernel/BKE_modifier.h @@ -66,6 +66,11 @@ typedef enum { * unless it's a mesh and can be exploded -> curve can also emit particles */ eModifierTypeType_DeformOrConstruct, + + /* Like eModifierTypeType_Nonconstructive, but does not affect the geometry + * of the object, rather some of its CustomData layers. + * E.g. UVProject and WeightVG modifiers. */ + eModifierTypeType_NonGeometrical, } ModifierTypeType; typedef enum { @@ -312,6 +317,7 @@ int modifier_supportsMapping(struct ModifierData *md); int modifier_couldBeCage(struct Scene *scene, struct ModifierData *md); int modifier_isCorrectableDeformed(struct ModifierData *md); int modifier_sameTopology(ModifierData *md); +int modifier_nonGeometrical(ModifierData *md); int modifier_isEnabled(struct Scene *scene, struct ModifierData *md, int required_mode); void modifier_setError(struct ModifierData *md, const char *format, ...) #ifdef __GNUC__ diff --git a/source/blender/blenkernel/intern/modifier.c b/source/blender/blenkernel/intern/modifier.c index f09be8c34ad..5a389019519 100644 --- a/source/blender/blenkernel/intern/modifier.c +++ b/source/blender/blenkernel/intern/modifier.c @@ -239,7 +239,14 @@ int modifier_couldBeCage(struct Scene *scene, ModifierData *md) int modifier_sameTopology(ModifierData *md) { ModifierTypeInfo *mti = modifierType_getInfo(md->type); - return ( mti->type == eModifierTypeType_OnlyDeform || mti->type == eModifierTypeType_Nonconstructive); + return ELEM3(mti->type, eModifierTypeType_OnlyDeform, eModifierTypeType_Nonconstructive, + eModifierTypeType_NonGeometrical); +} + +int modifier_nonGeometrical(ModifierData *md) +{ + ModifierTypeInfo *mti = modifierType_getInfo(md->type); + return (mti->type == eModifierTypeType_NonGeometrical); } void modifier_setError(ModifierData *md, const char *format, ...) diff --git a/source/blender/editors/interface/interface_templates.c b/source/blender/editors/interface/interface_templates.c index 8afd4e64b03..b6c1606ec6b 100644 --- a/source/blender/editors/interface/interface_templates.c +++ b/source/blender/editors/interface/interface_templates.c @@ -845,7 +845,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif uiLayoutSetOperatorContext(row, WM_OP_INVOKE_DEFAULT); uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply"), 0, "apply_as", MODIFIER_APPLY_DATA); - if (modifier_sameTopology(md)) + if (modifier_sameTopology(md) && !modifier_nonGeometrical(md)) uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply as Shape"), 0, "apply_as", MODIFIER_APPLY_SHAPE); } @@ -853,7 +853,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif uiBlockSetButLock(block, ob && ob->id.lib, ERROR_LIBDATA_MESSAGE); if (!ELEM5(md->type, eModifierType_Fluidsim, eModifierType_Softbody, eModifierType_ParticleSystem, eModifierType_Cloth, eModifierType_Smoke)) - uiItemO(row, TIP_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy"); + uiItemO(row, IFACE_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy"); } /* result is the layout block inside the box, that we return so that modifier settings can be drawn */ diff --git a/source/blender/editors/object/object_modifier.c b/source/blender/editors/object/object_modifier.c index ec96bde13a0..a140888a602 100644 --- a/source/blender/editors/object/object_modifier.c +++ b/source/blender/editors/object/object_modifier.c @@ -464,7 +464,7 @@ static int modifier_apply_shape(ReportList *reports, Scene *scene, Object *ob, M Key *key=me->key; KeyBlock *kb; - if(!modifier_sameTopology(md)) { + if(!modifier_sameTopology(md) || mti->type == eModifierTypeType_NonGeometrical) { BKE_report(reports, RPT_ERROR, "Only deforming modifiers can be applied to Shapes"); return 0; } @@ -512,7 +512,7 @@ static int modifier_apply_obdata(ReportList *reports, Scene *scene, Object *ob, Mesh *me = ob->data; MultiresModifierData *mmd= find_multires_modifier_before(scene, md); - if( me->key) { + if(me->key && mti->type != eModifierTypeType_NonGeometrical) { BKE_report(reports, RPT_ERROR, "Modifier cannot be applied to Mesh with Shape Keys"); return 0; } diff --git a/source/blender/modifiers/intern/MOD_uvproject.c b/source/blender/modifiers/intern/MOD_uvproject.c index bcbb0ab82e8..d7c78492f92 100644 --- a/source/blender/modifiers/intern/MOD_uvproject.c +++ b/source/blender/modifiers/intern/MOD_uvproject.c @@ -408,7 +408,7 @@ ModifierTypeInfo modifierType_UVProject = { /* name */ "UVProject", /* structName */ "UVProjectModifierData", /* structSize */ sizeof(UVProjectModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh | eModifierTypeFlag_SupportsMapping | eModifierTypeFlag_SupportsEditmode diff --git a/source/blender/modifiers/intern/MOD_weightvgedit.c b/source/blender/modifiers/intern/MOD_weightvgedit.c index 95862f5f002..473692f123d 100644 --- a/source/blender/modifiers/intern/MOD_weightvgedit.c +++ b/source/blender/modifiers/intern/MOD_weightvgedit.c @@ -254,7 +254,7 @@ ModifierTypeInfo modifierType_WeightVGEdit = { /* name */ "VertexWeightEdit", /* structName */ "WeightVGEditModifierData", /* structSize */ sizeof(WeightVGEditModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, diff --git a/source/blender/modifiers/intern/MOD_weightvgmix.c b/source/blender/modifiers/intern/MOD_weightvgmix.c index 34e73adb4b4..316080ba4a7 100644 --- a/source/blender/modifiers/intern/MOD_weightvgmix.c +++ b/source/blender/modifiers/intern/MOD_weightvgmix.c @@ -386,7 +386,7 @@ ModifierTypeInfo modifierType_WeightVGMix = { /* name */ "VertexWeightMix", /* structName */ "WeightVGMixModifierData", /* structSize */ sizeof(WeightVGMixModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, diff --git a/source/blender/modifiers/intern/MOD_weightvgproximity.c b/source/blender/modifiers/intern/MOD_weightvgproximity.c index 2a8b639f2cf..2a4b700ce1d 100644 --- a/source/blender/modifiers/intern/MOD_weightvgproximity.c +++ b/source/blender/modifiers/intern/MOD_weightvgproximity.c @@ -520,7 +520,7 @@ ModifierTypeInfo modifierType_WeightVGProximity = { /* name */ "VertexWeightProximity", /* structName */ "WeightVGProximityModifierData", /* structSize */ sizeof(WeightVGProximityModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, diff --git a/source/blender/python/mathutils/mathutils_Vector.c b/source/blender/python/mathutils/mathutils_Vector.c index 25760910c4e..bd121b6177f 100644 --- a/source/blender/python/mathutils/mathutils_Vector.c +++ b/source/blender/python/mathutils/mathutils_Vector.c @@ -1486,11 +1486,11 @@ static PyObject *Vector_isub(PyObject *v1, PyObject *v2) * note: vector/matrix multiplication IS NOT COMMUTATIVE!!!! * note: assume read callbacks have been done first. */ -int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat) +int column_vector_multiplication(float r_vec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat) { float vec_cpy[MAX_DIMENSIONS]; double dot = 0.0f; - int x, y, z = 0; + int row, col, z = 0; if (mat->num_col != vec->size) { if (mat->num_col == 4 && vec->size == 3) { @@ -1507,13 +1507,13 @@ int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec, memcpy(vec_cpy, vec->vec, vec->size * sizeof(float)); - rvec[3] = 1.0f; + r_vec[3] = 1.0f; - for (x = 0; x < mat->num_row; x++) { - for (y = 0; y < mat->num_col; y++) { - dot += (double)(MATRIX_ITEM(mat, y, x) * vec_cpy[y]); + for (row = 0; row < mat->num_row; row++) { + for (col = 0; col < mat->num_col; col++) { + dot += (double)(MATRIX_ITEM(mat, row, col) * vec_cpy[col]); } - rvec[z++] = (float)dot; + r_vec[z++] = (float)dot; dot = 0.0f; } @@ -2634,7 +2634,7 @@ static int row_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject *v //muliplication for (x = 0; x < mat->num_col; x++) { for (y = 0; y < mat->num_row; y++) { - dot += MATRIX_ITEM(mat, x, y) * vec_cpy[y]; + dot += MATRIX_ITEM(mat, y, x) * vec_cpy[y]; } rvec[z++] = (float)dot; dot = 0.0f; diff --git a/source/blender/render/intern/source/external_engine.c b/source/blender/render/intern/source/external_engine.c index 2b44bad82ab..dc2de5fb450 100644 --- a/source/blender/render/intern/source/external_engine.c +++ b/source/blender/render/intern/source/external_engine.c @@ -190,6 +190,11 @@ void RE_engine_end_result(RenderEngine *engine, RenderResult *result) if(!result) return; + + result->tilerect.xmin += re->disprect.xmin; + result->tilerect.xmax += re->disprect.xmin; + result->tilerect.ymin += re->disprect.ymin; + result->tilerect.ymax += re->disprect.ymin; /* merge. on break, don't merge in result for preview renders, looks nicer */ if(!(re->test_break(re->tbh) && (re->r.scemode & R_PREVIEWBUTS))) |