From 2e3035dd80ff3c69c38195f10c0ab9efdd6ed3ec Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 21 Jun 2013 13:05:08 +0000 Subject: Cycles OpenCL: make displacement and world importance sampling work. --- intern/cycles/device/device_cuda.cpp | 6 ++--- intern/cycles/device/device_opencl.cpp | 40 ++++++++++++++++++++++++++++++++++ intern/cycles/kernel/kernel.cl | 25 ++++++++++++++++++--- intern/cycles/kernel/kernel_displace.h | 2 +- intern/cycles/kernel/kernel_types.h | 4 ++-- intern/cycles/render/light.cpp | 4 ++-- 6 files changed, 70 insertions(+), 11 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 27c54af5153..6b0c9120bd9 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -704,7 +704,7 @@ public: CUfunction cuDisplace; CUdeviceptr d_input = cuda_device_ptr(task.shader_input); - CUdeviceptr d_offset = cuda_device_ptr(task.shader_output); + CUdeviceptr d_output = cuda_device_ptr(task.shader_output); /* get kernel function */ cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader")) @@ -715,8 +715,8 @@ public: cuda_assert(cuParamSetv(cuDisplace, offset, &d_input, sizeof(d_input))) offset += sizeof(d_input); - cuda_assert(cuParamSetv(cuDisplace, offset, &d_offset, sizeof(d_offset))) - offset += sizeof(d_offset); + cuda_assert(cuParamSetv(cuDisplace, offset, &d_output, sizeof(d_output))) + offset += sizeof(d_output); int shader_eval_type = task.shader_eval_type; offset = align_up(offset, __alignof(shader_eval_type)); diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 2ee4ffaca17..4f61f771df1 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -318,6 +318,7 @@ public: cl_program cpProgram; cl_kernel ckPathTraceKernel; cl_kernel ckFilmConvertKernel; + cl_kernel ckShaderKernel; cl_int ciErr; typedef map*> ConstMemMap; @@ -427,6 +428,7 @@ public: cpProgram = NULL; ckPathTraceKernel = NULL; ckFilmConvertKernel = NULL; + ckShaderKernel = NULL; null_mem = 0; device_initialized = false; @@ -760,6 +762,10 @@ public: if(opencl_error(ciErr)) return false; + ckShaderKernel = clCreateKernel(cpProgram, "kernel_ocl_shader", &ciErr); + if(opencl_error(ciErr)) + return false; + return true; } @@ -1009,11 +1015,45 @@ public: enqueue_kernel(ckFilmConvertKernel, d_w, d_h); } + void shader(DeviceTask& task) + { + /* cast arguments to cl types */ + cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); + cl_mem d_input = CL_MEM_PTR(task.shader_input); + cl_mem d_output = CL_MEM_PTR(task.shader_output); + cl_int d_shader_eval_type = task.shader_eval_type; + cl_int d_shader_x = task.shader_x; + cl_int d_shader_w = task.shader_w; + + /* sample arguments */ + cl_uint narg = 0; + ciErr = 0; + + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data); + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input); + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output); + +#define KERNEL_TEX(type, ttype, name) \ + ciErr |= set_kernel_arg_mem(ckShaderKernel, &narg, #name); +#include "kernel_textures.h" + + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type); + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x); + ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w); + + opencl_assert(ciErr); + + enqueue_kernel(ckShaderKernel, task.shader_w, 1); + } + void thread_run(DeviceTask *task) { if(task->type == DeviceTask::TONEMAP) { tonemap(*task, task->buffer, task->rgba); } + else if(task->type == DeviceTask::SHADER) { + shader(*task); + } else if(task->type == DeviceTask::PATH_TRACE) { RenderTile tile; diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 22cb806b8e0..a745f5843fc 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -25,6 +25,7 @@ #include "kernel_film.h" #include "kernel_path.h" +#include "kernel_displace.h" __kernel void kernel_ocl_path_trace( __constant KernelData *data, @@ -80,10 +81,28 @@ __kernel void kernel_ocl_tonemap( kernel_film_tonemap(kg, rgba, buffer, sample, x, y, offset, stride); } -/*__kernel void kernel_ocl_shader(__global uint4 *input, __global float *output, int type, int sx) +__kernel void kernel_ocl_shader( + __constant KernelData *data, + __global uint4 *input, + __global float4 *output, + +#define KERNEL_TEX(type, ttype, name) \ + __global type *name, +#include "kernel_textures.h" + + int type, int sx, int sw) { + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "kernel_textures.h" + int x = sx + get_global_id(0); - kernel_shader_evaluate(input, output, (ShaderEvalType)type, x); -}*/ + if(x < sx + sw) + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x); +} diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index c7fd03e7603..ae2e35e8d93 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN -__device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *output, ShaderEvalType type, int i) +__device void kernel_shader_evaluate(KernelGlobals *kg, __global uint4 *input, __global float4 *output, ShaderEvalType type, int i) { ShaderData sd; uint4 in = input[i]; diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a9f1831fc7d..3bd0d5c3561 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -150,10 +150,10 @@ CCL_NAMESPACE_BEGIN /* Shader Evaluation */ -enum ShaderEvalType { +typedef enum ShaderEvalType { SHADER_EVAL_DISPLACE, SHADER_EVAL_BACKGROUND -}; +} ShaderEvalType; /* Path Tracing * note we need to keep the u/v pairs at even values */ diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 2f92b957929..5ac21ed5996 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -30,7 +30,7 @@ CCL_NAMESPACE_BEGIN -static void dump_background_pixels(Device *device, DeviceScene *dscene, int res, vector& pixels) +static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector& pixels) { /* create input */ int width = res; @@ -433,7 +433,7 @@ void LightManager::device_update_background(Device *device, DeviceScene *dscene, assert(res > 0); vector pixels; - dump_background_pixels(device, dscene, res, pixels); + shade_background_pixels(device, dscene, res, pixels); if(progress.get_cancel()) return; -- cgit v1.2.3