Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-10-06 16:59:26 +0400
committerSergey Sharybin <sergey.vfx@gmail.com>2014-10-06 18:21:37 +0400
commit247b869967812891f6b77585184a3b09f3f16a18 (patch)
treef3d6231787556630762e6d9e46dc6ba3bc2514d0
parenta9521c428eeacd7fa05ee60badf4d04361787252 (diff)
Compositor: implement OpenCL backend for gaussian blur
Pretty much straightforward change which gives around 30% speedup on my laptop and around 2x speedup on desktop in the BI (which uses gts580). Tested with huge blurs (like 10% of blur) which was rather common during Caminandes. For now OpenCL is only limited for blur size more than 100 pixels. This is a bit experimental still, feedback is welcome. Reviewers: jbakker, lukastoenne Subscribers: ton Differential Revision: https://developer.blender.org/D576
-rw-r--r--source/blender/compositor/nodes/COM_BlurNode.cpp2
-rw-r--r--source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp27
-rw-r--r--source/blender/compositor/operations/COM_GaussianXBlurOperation.h11
-rw-r--r--source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp27
-rw-r--r--source/blender/compositor/operations/COM_GaussianYBlurOperation.h11
-rw-r--r--source/blender/compositor/operations/COM_OpenCLKernels.cl63
6 files changed, 139 insertions, 2 deletions
diff --git a/source/blender/compositor/nodes/COM_BlurNode.cpp b/source/blender/compositor/nodes/COM_BlurNode.cpp
index 76e52c14685..f3d0c33d3b3 100644
--- a/source/blender/compositor/nodes/COM_BlurNode.cpp
+++ b/source/blender/compositor/nodes/COM_BlurNode.cpp
@@ -105,6 +105,7 @@ void BlurNode::convertToOperations(NodeConverter &converter, const CompositorCon
GaussianXBlurOperation *operationx = new GaussianXBlurOperation();
operationx->setData(data);
operationx->setQuality(quality);
+ operationx->checkOpenCL();
converter.addOperation(operationx);
converter.mapInputSocket(getInputSocket(1), operationx->getInputSocket(1));
@@ -112,6 +113,7 @@ void BlurNode::convertToOperations(NodeConverter &converter, const CompositorCon
GaussianYBlurOperation *operationy = new GaussianYBlurOperation();
operationy->setData(data);
operationy->setQuality(quality);
+ operationy->checkOpenCL();
converter.addOperation(operationy);
converter.mapInputSocket(getInputSocket(1), operationy->getInputSocket(1));
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
index 0aefba3bb7c..0838d281de7 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
@@ -21,6 +21,7 @@
*/
#include "COM_GaussianXBlurOperation.h"
+#include "COM_OpenCLDevice.h"
#include "BLI_math.h"
#include "MEM_guardedalloc.h"
@@ -124,6 +125,32 @@ void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *d
mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
}
+void GaussianXBlurOperation::executeOpenCL(OpenCLDevice *device,
+ MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+ MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+ list<cl_kernel> *clKernelsToCleanUp)
+{
+ cl_kernel gaussianXBlurOperationKernel = device->COM_clCreateKernel("gaussianXBlurOperationKernel", NULL);
+ cl_int filter_size = this->m_filtersize;
+
+ cl_mem gausstab = clCreateBuffer(device->getContext(),
+ CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+ sizeof(float) * (this->m_filtersize * 2 + 1),
+ this->m_gausstab,
+ NULL);
+
+ device->COM_clAttachMemoryBufferToKernelParameter(gaussianXBlurOperationKernel, 0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+ device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianXBlurOperationKernel, 2, clOutputBuffer);
+ device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianXBlurOperationKernel, 3, outputMemoryBuffer);
+ clSetKernelArg(gaussianXBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
+ device->COM_clAttachSizeToKernelParameter(gaussianXBlurOperationKernel, 5, this);
+ clSetKernelArg(gaussianXBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
+
+ device->COM_clEnqueueRange(gaussianXBlurOperationKernel, outputMemoryBuffer, 7, this);
+
+ clReleaseMemObject(gausstab);
+}
+
void GaussianXBlurOperation::deinitExecution()
{
BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
index e391320a007..d7ae8b1e3dc 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
@@ -40,7 +40,12 @@ public:
* @brief the inner loop of this program
*/
void executePixel(float output[4], int x, int y, void *data);
-
+
+ void executeOpenCL(OpenCLDevice *device,
+ MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+ MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+ list<cl_kernel> *clKernelsToCleanUp);
+
/**
* @brief initialize the execution
*/
@@ -53,5 +58,9 @@ public:
void *initializeTileData(rcti *rect);
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
+
+ void checkOpenCL() {
+ this->setOpenCL(m_data.sizex >= 128);
+ }
};
#endif
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
index a05a1ab6a23..6172f954087 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
@@ -21,6 +21,7 @@
*/
#include "COM_GaussianYBlurOperation.h"
+#include "COM_OpenCLDevice.h"
#include "BLI_math.h"
#include "MEM_guardedalloc.h"
@@ -126,6 +127,32 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d
mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
}
+void GaussianYBlurOperation::executeOpenCL(OpenCLDevice *device,
+ MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+ MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+ list<cl_kernel> *clKernelsToCleanUp)
+{
+ cl_kernel gaussianYBlurOperationKernel = device->COM_clCreateKernel("gaussianYBlurOperationKernel", NULL);
+ cl_int filter_size = this->m_filtersize;
+
+ cl_mem gausstab = clCreateBuffer(device->getContext(),
+ CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+ sizeof(float) * (this->m_filtersize * 2 + 1),
+ this->m_gausstab,
+ NULL);
+
+ device->COM_clAttachMemoryBufferToKernelParameter(gaussianYBlurOperationKernel, 0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+ device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianYBlurOperationKernel, 2, clOutputBuffer);
+ device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianYBlurOperationKernel, 3, outputMemoryBuffer);
+ clSetKernelArg(gaussianYBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
+ device->COM_clAttachSizeToKernelParameter(gaussianYBlurOperationKernel, 5, this);
+ clSetKernelArg(gaussianYBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
+
+ device->COM_clEnqueueRange(gaussianYBlurOperationKernel, outputMemoryBuffer, 7, this);
+
+ clReleaseMemObject(gausstab);
+}
+
void GaussianYBlurOperation::deinitExecution()
{
BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
index 22b6562077d..4b5751c0968 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
@@ -40,7 +40,12 @@ public:
* the inner loop of this program
*/
void executePixel(float output[4], int x, int y, void *data);
-
+
+ void executeOpenCL(OpenCLDevice *device,
+ MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+ MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+ list<cl_kernel> *clKernelsToCleanUp);
+
/**
* @brief initialize the execution
*/
@@ -53,5 +58,9 @@ public:
void *initializeTileData(rcti *rect);
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
+
+ void checkOpenCL() {
+ this->setOpenCL(m_data.sizex >= 128);
+ }
};
#endif
diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl
index 00b3825d8b3..1b965eb8659 100644
--- a/source/blender/compositor/operations/COM_OpenCLKernels.cl
+++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl
@@ -250,3 +250,66 @@ __kernel void directionalBlurKernel(__read_only image2d_t inputImage, __write_o
write_imagef(output, coords, col);
}
+
+// KERNEL --- GAUSSIAN BLUR ---
+__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage,
+ int2 offsetInput,
+ __write_only image2d_t output,
+ int2 offsetOutput,
+ int filter_size,
+ int2 dimension,
+ __global float *gausstab,
+ int2 offset)
+{
+ float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+ int2 coords = {get_global_id(0), get_global_id(1)};
+ coords += offset;
+ const int2 realCoordinate = coords + offsetOutput;
+ int2 inputCoordinate = realCoordinate - offsetInput;
+ float weight = 0.0f;
+
+ int xmin = max(realCoordinate.x - filter_size, 0) - offsetInput.x;
+ int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - offsetInput.x;
+
+ for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < xmax; ++nx, ++i) {
+ float w = gausstab[i];
+ inputCoordinate.x = nx;
+ color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
+ weight += w;
+ }
+
+ color *= (1.0f / weight);
+
+ write_imagef(output, coords, color);
+}
+
+__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage,
+ int2 offsetInput,
+ __write_only image2d_t output,
+ int2 offsetOutput,
+ int filter_size,
+ int2 dimension,
+ __global float *gausstab,
+ int2 offset)
+{
+ float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+ int2 coords = {get_global_id(0), get_global_id(1)};
+ coords += offset;
+ const int2 realCoordinate = coords + offsetOutput;
+ int2 inputCoordinate = realCoordinate - offsetInput;
+ float weight = 0.0f;
+
+ int ymin = max(realCoordinate.y - filter_size, 0) - offsetInput.y;
+ int ymax = min(realCoordinate.y + filter_size + 1, dimension.y) - offsetInput.y;
+
+ for (int ny = ymin, i = max(filter_size - realCoordinate.y, 0); ny < ymax; ++ny, ++i) {
+ float w = gausstab[i];
+ inputCoordinate.y = ny;
+ color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
+ weight += w;
+ }
+
+ color *= (1.0f / weight);
+
+ write_imagef(output, coords, color);
+}