From 4fb850c72ee15cf0a305079e8fb6d23f180fb419 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Wed, 11 Jul 2012 20:51:00 +0000 Subject: Compositor: re-optimized the Defocus node. * localized MemoryBuffers * removed read(x,y) calls * shuffled some lines in the execute pixel * added a readNoCheck function to the memorybuffer (only use this when you are certain you are reading a pixel inside the memorybuffer. --- .../blender/compositor/intern/COM_MemoryBuffer.h | 7 +++ .../compositor/operations/COM_OpenCLKernels.cl | 24 ++++---- .../compositor/operations/COM_OpenCLKernels.cl.h | 20 +++---- .../COM_VariableSizeBokehBlurOperation.cpp | 66 +++++++++++++++------- .../COM_VariableSizeBokehBlurOperation.h | 4 ++ 5 files changed, 79 insertions(+), 42 deletions(-) diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.h b/source/blender/compositor/intern/COM_MemoryBuffer.h index 51a45efc051..eed0c796cd8 100644 --- a/source/blender/compositor/intern/COM_MemoryBuffer.h +++ b/source/blender/compositor/intern/COM_MemoryBuffer.h @@ -140,6 +140,13 @@ public: } } + inline void readNoCheck(float result[4], int x, int y) { + const int dx = x - this->m_rect.xmin; + const int dy = y - this->m_rect.ymin; + const int offset = (this->m_chunkWidth * dy + dx) * COM_NUMBER_OF_CHANNELS; + copy_v4_v4(result, &this->m_buffer[offset]); + } + void writePixel(int x, int y, const float color[4]); void addPixel(int x, int y, const float color[4]); inline void readCubic(float result[4], float x, float y) diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl index 41838e41fba..cbbb4d0b3f2 100644 --- a/source/blender/compositor/operations/COM_OpenCLKernels.cl +++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl @@ -101,16 +101,16 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2 float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0; color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate); - for (int ny = miny; ny < maxy; ny += step) { - for (int nx = minx; nx < maxx; nx += step) { - if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) { - inputCoordinate.s0 = nx - offsetInput.s0; - inputCoordinate.s1 = ny - offsetInput.s1; - tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0; - if (size > threshold && tempSize > threshold) { - float dx = nx - realCoordinate.s0; - float dy = ny - realCoordinate.s1; - if (dx != 0 || dy != 0) { + if (size > threshold) { + for (int ny = miny; ny < maxy; ny += step) { + inputCoordinate.s1 = ny - offsetInput.s1; + float dy = ny - realCoordinate.s1; + for (int nx = minx; nx < maxx; nx += step) { + float dx = nx - realCoordinate.s0; + if (dx != 0 || dy != 0) { + inputCoordinate.s0 = nx - offsetInput.s0; + tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0; + if (tempSize > threshold) { if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) { float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize}; bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv); @@ -121,8 +121,8 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2 } } } - } - } + } + } } color = color_accum * (1.0f / multiplier_accum); diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl.h b/source/blender/compositor/operations/COM_OpenCLKernels.cl.h index d57aa1366de..cc18039c5b1 100644 --- a/source/blender/compositor/operations/COM_OpenCLKernels.cl.h +++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl.h @@ -103,16 +103,16 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/*\n" \ " float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ " color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \ "\n" \ -" for (int ny = miny; ny < maxy; ny += step) {\n" \ -" for (int nx = minx; nx < maxx; nx += step) {\n" \ -" if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \ -" inputCoordinate.s0 = nx - offsetInput.s0;\n" \ -" inputCoordinate.s1 = ny - offsetInput.s1;\n" \ -" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ -" if (size > threshold && tempSize > threshold) {\n" \ -" float dx = nx - realCoordinate.s0;\n" \ -" float dy = ny - realCoordinate.s1;\n" \ -" if (dx != 0 || dy != 0) {\n" \ +" if (size > threshold) {\n" \ +" for (int ny = miny; ny < maxy; ny += step) {\n" \ +" inputCoordinate.s1 = ny - offsetInput.s1;\n" \ +" float dy = ny - realCoordinate.s1;\n" \ +" for (int nx = minx; nx < maxx; nx += step) {\n" \ +" float dx = nx - realCoordinate.s0;\n" \ +" if (dx != 0 || dy != 0) {\n" \ +" inputCoordinate.s0 = nx - offsetInput.s0;\n" \ +" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \ +" if (tempSize > threshold) {\n" \ " if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \ " float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \ " bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \ diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp index 61538fde258..5d17526185b 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.cpp @@ -62,8 +62,29 @@ void VariableSizeBokehBlurOperation::initExecution() QualityStepHelper::initExecution(COM_QH_INCREASE); } +void *VariableSizeBokehBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers) +{ + MemoryBuffer** result = new MemoryBuffer*[3]; + result[0] = (MemoryBuffer*)this->m_inputProgram->initializeTileData(rect, memoryBuffers); + result[1] = (MemoryBuffer*)this->m_inputBokehProgram->initializeTileData(rect, memoryBuffers); + result[2] = (MemoryBuffer*)this->m_inputSizeProgram->initializeTileData(rect, memoryBuffers); + return result; +} + +void VariableSizeBokehBlurOperation::deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data) +{ + MemoryBuffer** result = (MemoryBuffer**)data; + delete[] result; +} + void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, MemoryBuffer *inputBuffers[], void *data) { + MemoryBuffer** buffers = (MemoryBuffer**)data; + MemoryBuffer* inputProgramBuffer = buffers[0]; + MemoryBuffer* inputBokehBuffer = buffers[1]; + MemoryBuffer* inputSizeBuffer = buffers[2]; + float* inputSizeFloatBuffer = inputSizeBuffer->getBuffer(); + float* inputProgramFloatBuffer = inputProgramBuffer->getBuffer(); float readColor[4]; float bokeh[4]; float tempSize[4]; @@ -84,32 +105,37 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me int maxy = MIN2(y + this->m_maxBlur, m_height); #endif { - this->m_inputSizeProgram->read(tempSize, x, y, COM_PS_NEAREST, inputBuffers); - this->m_inputProgram->read(readColor, x, y, COM_PS_NEAREST, inputBuffers); + inputSizeBuffer->readNoCheck(tempSize, x, y); + inputProgramBuffer->readNoCheck(readColor, x, y); + add_v4_v4(color_accum, readColor); add_v4_fl(multiplier_accum, 1.0f); float sizeCenter = tempSize[0]; - for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) { - for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) { - if (nx >= 0 && nx < this->getWidth() && ny >= 0 && ny < getHeight()) { - this->m_inputSizeProgram->read(tempSize, nx, ny, COM_PS_NEAREST, inputBuffers); - float size = tempSize[0]; - float fsize = fabsf(size); - if (sizeCenter > this->m_threshold && size > this->m_threshold) { - float dx = nx - x; - float dy = ny - y; - if (nx == x && ny == y) { - } - else if (fsize > fabsf(dx) && fsize > fabsf(dy)) { - float u = (256 + (dx/size) * 256); - float v = (256 + (dy/size) * 256); - this->m_inputBokehProgram->read(bokeh, u, v, COM_PS_NEAREST, inputBuffers); - this->m_inputProgram->read(readColor, nx, ny, COM_PS_NEAREST, inputBuffers); - madd_v4_v4v4(color_accum, bokeh, readColor); - add_v4_v4(multiplier_accum, bokeh); + const int addXStep = QualityStepHelper::getStep()*COM_NUMBER_OF_CHANNELS; + + if (sizeCenter > this->m_threshold) { + for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) { + float dy = ny - y; + int offsetNy = ny * inputSizeBuffer->getWidth() * COM_NUMBER_OF_CHANNELS; + int offsetNxNy = offsetNy + (minx*COM_NUMBER_OF_CHANNELS); + for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) { + if (nx != x || ny != y) + { + float size = inputSizeFloatBuffer[offsetNxNy]; + if (size > this->m_threshold) { + float fsize = fabsf(size); + float dx = nx - x; + if (fsize > fabsf(dx) && fsize > fabsf(dy)) { + float u = (256.0f + (dx/size) * 256.0f); + float v = (256.0f + (dy/size) * 256.0f); + inputBokehBuffer->readNoCheck(bokeh, u, v); + madd_v4_v4v4(color_accum, bokeh, &inputProgramFloatBuffer[offsetNxNy]); + add_v4_v4(multiplier_accum, bokeh); + } } } + offsetNxNy += addXStep; } } } diff --git a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h index 6c9196c3eab..0ecfb5a542c 100644 --- a/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h +++ b/source/blender/compositor/operations/COM_VariableSizeBokehBlurOperation.h @@ -50,6 +50,10 @@ public: */ void initExecution(); + void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers); + + void deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data); + /** * Deinitialize the execution */ -- cgit v1.2.3