Compositor: implement OpenCL backend for gaussian blur

Pretty much straightforward change which gives around 30% speedup on my laptop and around 2x speedup on desktop in the BI (which uses gts580). Tested with huge blurs (like 10% of blur) which was rather common during Caminandes. For now OpenCL is only limited for blur size more than 100 pixels. This is a bit experimental still, feedback is welcome. Reviewers: jbakker, lukastoenne Subscribers: ton Differential Revision: https://developer.blender.org/D576
author: Sergey Sharybin <sergey.vfx@gmail.com> 2014-10-06 16:59:26 +0400
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2014-10-06 18:21:37 +0400
commit: 247b869967812891f6b77585184a3b09f3f16a18 (patch)
tree: f3d6231787556630762e6d9e46dc6ba3bc2514d0 /source/blender/compositor/operations/COM_OpenCLKernels.cl
parent: a9521c428eeacd7fa05ee60badf4d04361787252 (diff)
1 files changed, 63 insertions, 0 deletions
diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl
index 00b3825d8b3..1b965eb8659 100644
--- a/source/blender/compositor/operations/COM_OpenCLKernels.cl
+++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl
@@ -250,3 +250,66 @@ __kernel void directionalBlurKernel(__read_only image2d_t inputImage,  __write_o
 
 	write_imagef(output, coords, col);
 }
+
+// KERNEL --- GAUSSIAN BLUR ---
+__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+	int2 coords = {get_global_id(0), get_global_id(1)};
+	coords += offset;
+	const int2 realCoordinate = coords + offsetOutput;
+	int2 inputCoordinate = realCoordinate - offsetInput;
+	float weight = 0.0f;
+
+	int xmin = max(realCoordinate.x - filter_size,     0) - offsetInput.x;
+	int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - offsetInput.x;
+
+	for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < xmax; ++nx, ++i) {
+		float w = gausstab[i];
+		inputCoordinate.x = nx;
+		color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
+		weight += w;
+	}
+
+	color *= (1.0f / weight);
+
+	write_imagef(output, coords, color);
+}
+
+__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+	int2 coords = {get_global_id(0), get_global_id(1)};
+	coords += offset;
+	const int2 realCoordinate = coords + offsetOutput;
+	int2 inputCoordinate = realCoordinate - offsetInput;
+	float weight = 0.0f;
+
+	int ymin = max(realCoordinate.y - filter_size,     0) - offsetInput.y;
+	int ymax = min(realCoordinate.y + filter_size + 1, dimension.y) - offsetInput.y;
+
+	for (int ny = ymin, i = max(filter_size - realCoordinate.y, 0); ny < ymax; ++ny, ++i) {
+		float w = gausstab[i];
+		inputCoordinate.y = ny;
+		color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
+		weight += w;
+	}
+
+	color *= (1.0f / weight);
+
+	write_imagef(output, coords, color);
+}
author	Sergey Sharybin <sergey.vfx@gmail.com>	2014-10-06 16:59:26 +0400
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2014-10-06 18:21:37 +0400
commit	247b869967812891f6b77585184a3b09f3f16a18 (patch)
tree	f3d6231787556630762e6d9e46dc6ba3bc2514d0 /source/blender/compositor/operations/COM_OpenCLKernels.cl
parent	a9521c428eeacd7fa05ee60badf4d04361787252 (diff)