From 28f7bfa8dfd691a1af7966ed3f8358479f069adf Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 6 Jul 2012 11:31:40 +0000 Subject: * Added OpenCL implementation of the Defocus node * Always disable two phase compositing during rendering - At Mind - --- .../blender/compositor/intern/COM_compositor.cpp | 2 +- .../compositor/operations/COM_OpenCLKernels.cl | 62 +++++++++++++++++++ .../compositor/operations/COM_OpenCLKernels.cl.h | 70 ++++++++++++++++++++-- .../COM_VariableSizeBokehBlurOperation.cpp | 29 +++++++++ .../COM_VariableSizeBokehBlurOperation.h | 2 +- 5 files changed, 159 insertions(+), 6 deletions(-) (limited to 'source/blender/compositor') diff --git a/source/blender/compositor/intern/COM_compositor.cpp b/source/blender/compositor/intern/COM_compositor.cpp index 9e48334bcca..ab64f8f7bf1 100644 --- a/source/blender/compositor/intern/COM_compositor.cpp +++ b/source/blender/compositor/intern/COM_compositor.cpp @@ -57,7 +57,7 @@ void COM_execute(RenderData *rd, bNodeTree *editingtree, int rendering) /* set progress bar to 0% and status to init compositing*/ editingtree->progress(editingtree->prh, 0.0); - bool twopass = (editingtree->flag&NTREE_TWO_PASS) > 0 || rendering; + bool twopass = (editingtree->flag&NTREE_TWO_PASS) > 0 && !rendering; /* initialize execution system */ if (twopass) { ExecutionSystem *system = new ExecutionSystem(rd, editingtree, rendering, twopass); diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl index 0f8e543de7f..ce197915360 100644 --- a/source/blender/compositor/operations/COM_OpenCLKernels.cl +++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl @@ -51,6 +51,68 @@ __kernel void bokehBlurKernel(__read_only image2d_t boundingBox, __read_only ima write_imagef(output, coords, color); } +//KERNEL --- DEFOCUS /VARIABLESIZEBOKEHBLUR --- +__kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2d_t bokehImage, + __read_only image2d_t inputDepth, __read_only image2d_t inputSize, + __write_only image2d_t output, int2 offsetInput, int2 offsetOutput, + int step, int maxBlur, float threshold, int2 dimension, int2 offset) +{ + float4 color = {1.0f, 0.0f, 0.0f, 1.0f}; + int2 coords = {get_global_id(0), get_global_id(1)}; + coords += offset; + const int2 realCoordinate = coords + offsetOutput; + + float4 readColor; + float4 bokeh; + float tempSize; + float tempDepth; + float4 multiplier_accum = {1.0f, 1.0f, 1.0f, 1.0f}; + float4 color_accum; + + int minx = max(realCoordinate.s0 - maxBlur, 0); + int miny = max(realCoordinate.s1 - maxBlur, 0); + int maxx = min(realCoordinate.s0 + maxBlur, dimension.s0); + int maxy = min(realCoordinate.s1 + maxBlur, dimension.s1); + + { + int2 inputCoordinate = realCoordinate - offsetInput; + float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0; + float depth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0 + threshold; + color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate); + + for (int ny = miny; ny < maxy; ny += step) { + for (int nx = minx; nx < maxx; nx += step) { + if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) { + inputCoordinate.s0 = nx - offsetInput.s0; + inputCoordinate.s1 = ny - offsetInput.s1; + tempDepth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0; + if (tempDepth < depth) { + tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0; + + if ((size > threshold && tempSize > threshold) || tempSize <= threshold) { + float dx = nx - realCoordinate.s0; + float dy = ny - realCoordinate.s1; + if (dx != 0 || dy != 0) { + if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) { + float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize}; + bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv); + readColor = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate); + color_accum += bokeh*readColor; + multiplier_accum += bokeh; + } + } + } + } + } + } + } + } + + color = color_accum * (1.0f / multiplier_accum); + write_imagef(output, coords, color); +} + + // KERNEL --- DILATE --- __kernel void dilateKernel(__read_only image2d_t inputImage, __write_only image2d_t output, int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension, diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl.h b/source/blender/compositor/operations/COM_OpenCLKernels.cl.h index e064b7511cb..ca66ab85802 100644 --- a/source/blender/compositor/operations/COM_OpenCLKernels.cl.h +++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl.h @@ -16,7 +16,7 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope " coords += offset;\n" \ " float tempBoundingBox;\n" \ " float4 color = {0.0f,0.0f,0.0f,0.0f};\n" \ -" float4 multiplier = {0.0f,0.0f,0.0f,0.0f};\n" \ +" float4 multiplyer = {0.0f,0.0f,0.0f,0.0f};\n" \ " float4 bokeh;\n" \ " const float radius2 = radius*2.0f;\n" \ " const int2 realCoordinate = coords + offsetOutput;\n" \ @@ -40,10 +40,10 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope " uv.x = ((realCoordinate.x-nx)/radius2)*bokehImageDim.x+bokehImageCenter.x;\n" \ " bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \ " color += bokeh * read_imagef(inputImage, SAMPLER_NEAREST, inputXy);\n" \ -" multiplier += bokeh;\n" \ +" multiplyer += bokeh;\n" \ " }\n" \ " }\n" \ -" color /= multiplier;\n" \ +" color /= multiplyer;\n" \ "\n" \ " } else {\n" \ " int2 imageCoordinates = realCoordinate - offsetInput;\n" \ @@ -53,6 +53,68 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope " write_imagef(output, coords, color);\n" \ "}\n" \ "\n" \ +"//KERNEL --- DEFOCUS /VARIABLESIZEBOKEHBLUR ---\n" \ +"__kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2d_t bokehImage,\n" \ +" __read_only image2d_t inputDepth, __read_only image2d_t inputSize,\n" \ +" __write_only image2d_t output, int2 offsetInput, int2 offsetOutput,\n" \ +" int step, int maxBlur, float threshold, int2 dimension, int2 offset)\n" \ +"{\n" \ +" float4 color = {1.0f, 0.0f, 0.0f, 1.0f};\n" \ +" int2 coords = {get_global_id(0), get_global_id(1)};\n" \ +" coords += offset;\n" \ +" const int2 realCoordinate = coords + offsetOutput;\n" \ +"\n" \ +" float4 readColor;\n" \ +" float4 bokeh;\n" \ +" float tempSize;\n" \ +" float tempDepth;\n" \ +" float4 multiplier_accum = {1.0f, 1.0f, 1.0f, 1.0f};\n" \ +" float4 color_accum;\n" \ +"\n" \ +" int minx = max(realCoordinate.s0 - maxBlur, 0);\n" \ +" int miny = max(realCoordinate.s1 - maxBlur, 0);\n" \ +" int maxx = min(realCoordinate.s0 + maxBlur, dimension.s0);\n" \ +" int maxy = min(realCoordinate.s1 + maxBlur, dimension.s1);\n" \ +"\n" \ +" {\n" \ +" int2 inputCoordinate = realCoordinate - offsetInput;\n" \ +" float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ +" float depth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0 + threshold;\n" \ +" color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \ +"\n" \ +" for (int ny = miny; ny < maxy; ny += step) {\n" \ +" for (int nx = minx; nx < maxx; nx += step) {\n" \ +" if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \ +" inputCoordinate.s0 = nx - offsetInput.s0;\n" \ +" inputCoordinate.s1 = ny - offsetInput.s1;\n" \ +" tempDepth = read_imagef(inputDepth, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ +" if (tempDepth < depth) {\n" \ +" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ +"\n" \ +" if ((size > threshold && tempSize > threshold) || tempSize <= threshold) {\n" \ +" float dx = nx - realCoordinate.s0;\n" \ +" float dy = ny - realCoordinate.s1;\n" \ +" if (dx != 0 || dy != 0) {\n" \ +" if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \ +" float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \ +" bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \ +" readColor = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \ +" color_accum += bokeh*readColor;\n" \ +" multiplier_accum += bokeh;\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +" }\n" \ +"\n" \ +" color = color_accum * (1.0f / multiplier_accum);\n" \ +" write_imagef(output, coords, color);\n" \ +"}\n" \ +"\n" \ +"\n" \ "// KERNEL --- DILATE ---\n" \ "__kernel void dilateKernel(__read_only image2d_t inputImage, __write_only image2d_t output,\n" \ " int2 offsetInput, int2 offsetOutput, int scope, int distanceSquared, int2 dimension,\n" \ @@ -70,9 +132,9 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope " int2 inputXy;\n" \ "\n" \ " for (ny = minXY.y, inputXy.y = ny - offsetInput.y ; ny < maxXY.y ; ny ++, inputXy.y++) {\n" \ +" const float deltaY = (realCoordinate.y - ny);\n" \ " for (nx = minXY.x, inputXy.x = nx - offsetInput.x; nx < maxXY.x ; nx ++, inputXy.x++) {\n" \ " const float deltaX = (realCoordinate.x - nx);\n" \ -" const float deltaY = (realCoordinate.y - ny);\n" \ " const float measuredDistance = deltaX*deltaX+deltaY*deltaY;\n" \ " if (measuredDistance <= distanceSquared) {\n" \ " value = max(value, read_imagef(inputImage, SAMPLER_NEAREST, inputXy).s0);\n" \ diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp index 1368476e9b4..7ddcb78b61f 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp @@ -22,6 +22,7 @@ #include "COM_VariableSizeBokehBlurOperation.h" #include "BLI_math.h" +#include "COM_OpenCLDevice.h" extern "C" { #include "RE_pipeline.h" @@ -38,6 +39,7 @@ VariableSizeBokehBlurOperation::VariableSizeBokehBlurOperation() : NodeOperation #endif this->addOutputSocket(COM_DT_COLOR); this->setComplex(true); + this->setOpenCL(true); this->m_inputProgram = NULL; this->m_inputBokehProgram = NULL; @@ -128,6 +130,33 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me } +static cl_kernel defocusKernel = 0; +void VariableSizeBokehBlurOperation::executeOpenCL(OpenCLDevice* device, + MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, + MemoryBuffer **inputMemoryBuffers, list *clMemToCleanUp, + list *clKernelsToCleanUp) +{ + if (!defocusKernel) { + defocusKernel = device->COM_clCreateKernel("defocusKernel", NULL); + } + cl_int step = this->getStep(); + cl_int maxBlur = this->m_maxBlur; + cl_float threshold = this->m_threshold; + + device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram); + device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 1, -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputBokehProgram); + device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 2, 5, clMemToCleanUp, inputMemoryBuffers, this->m_inputDepthProgram); + device->COM_clAttachMemoryBufferToKernelParameter(defocusKernel, 3, -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputSizeProgram); + device->COM_clAttachOutputMemoryBufferToKernelParameter(defocusKernel, 4, clOutputBuffer); + device->COM_clAttachMemoryBufferOffsetToKernelParameter(defocusKernel, 6, outputMemoryBuffer); + clSetKernelArg(defocusKernel, 7, sizeof(cl_int), &step); + clSetKernelArg(defocusKernel, 8, sizeof(cl_int), &maxBlur); + clSetKernelArg(defocusKernel, 9, sizeof(cl_float), &threshold); + device->COM_clAttachSizeToKernelParameter(defocusKernel, 10, this); + + device->COM_clEnqueueRange(defocusKernel, outputMemoryBuffer, 11, this); +} + void VariableSizeBokehBlurOperation::deinitExecution() { this->m_inputProgram = NULL; diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h index 4bf597ff831..8e5589fafec 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h @@ -62,7 +62,7 @@ public: void setThreshold(float threshold) { this->m_threshold = threshold; } - + void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list *clMemToCleanUp, list *clKernelsToCleanUp); }; #ifdef COM_DEFOCUS_SEARCH -- cgit v1.2.3