diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-10-06 16:59:26 +0400 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2014-10-06 18:21:37 +0400 |
commit | 247b869967812891f6b77585184a3b09f3f16a18 (patch) | |
tree | f3d6231787556630762e6d9e46dc6ba3bc2514d0 /source/blender/compositor/operations/COM_OpenCLKernels.cl | |
parent | a9521c428eeacd7fa05ee60badf4d04361787252 (diff) |
Compositor: implement OpenCL backend for gaussian blur
Pretty much straightforward change which gives around 30%
speedup on my laptop and around 2x speedup on desktop in
the BI (which uses gts580). Tested with huge blurs (like
10% of blur) which was rather common during Caminandes.
For now OpenCL is only limited for blur size more than
100 pixels.
This is a bit experimental still, feedback is welcome.
Reviewers: jbakker, lukastoenne
Subscribers: ton
Differential Revision: https://developer.blender.org/D576
Diffstat (limited to 'source/blender/compositor/operations/COM_OpenCLKernels.cl')
-rw-r--r-- | source/blender/compositor/operations/COM_OpenCLKernels.cl | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl index 00b3825d8b3..1b965eb8659 100644 --- a/source/blender/compositor/operations/COM_OpenCLKernels.cl +++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl @@ -250,3 +250,66 @@ __kernel void directionalBlurKernel(__read_only image2d_t inputImage, __write_o write_imagef(output, coords, col); } + +// KERNEL --- GAUSSIAN BLUR --- +__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage, + int2 offsetInput, + __write_only image2d_t output, + int2 offsetOutput, + int filter_size, + int2 dimension, + __global float *gausstab, + int2 offset) +{ + float4 color = {0.0f, 0.0f, 0.0f, 0.0f}; + int2 coords = {get_global_id(0), get_global_id(1)}; + coords += offset; + const int2 realCoordinate = coords + offsetOutput; + int2 inputCoordinate = realCoordinate - offsetInput; + float weight = 0.0f; + + int xmin = max(realCoordinate.x - filter_size, 0) - offsetInput.x; + int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - offsetInput.x; + + for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < xmax; ++nx, ++i) { + float w = gausstab[i]; + inputCoordinate.x = nx; + color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w; + weight += w; + } + + color *= (1.0f / weight); + + write_imagef(output, coords, color); +} + +__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage, + int2 offsetInput, + __write_only image2d_t output, + int2 offsetOutput, + int filter_size, + int2 dimension, + __global float *gausstab, + int2 offset) +{ + float4 color = {0.0f, 0.0f, 0.0f, 0.0f}; + int2 coords = {get_global_id(0), get_global_id(1)}; + coords += offset; + const int2 realCoordinate = coords + offsetOutput; + int2 inputCoordinate = realCoordinate - offsetInput; + float weight = 0.0f; + + int ymin = max(realCoordinate.y - filter_size, 0) - offsetInput.y; + int ymax = min(realCoordinate.y + filter_size + 1, dimension.y) - offsetInput.y; + + for (int ny = ymin, i = max(filter_size - realCoordinate.y, 0); ny < ymax; ++ny, ++i) { + float w = gausstab[i]; + inputCoordinate.y = ny; + color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w; + weight += w; + } + + color *= (1.0f / weight); + + write_imagef(output, coords, color); +} |