From 4c971d572785c63a4c715210308664941c5c8f26 Mon Sep 17 00:00:00 2001 From: Monique Dewanchand Date: Wed, 30 Jan 2013 15:43:13 +0000 Subject: Patch by erwin94 [#34015] dilate/erode multithreading another patch for the dilate/erode step method, still without any functional changes. This time it keeps the general algorithm but uses the tile system to make it multithreaded. I could not measure a speedup on my 2-core laptop, but hope that it will be faster for more cores. The immediate speedup that is very visible though is that tiles come in as soon as they are calculated and a dilate/erode node does not block the whole image to be calculated. till then, David. --- .../operations/COM_DilateErodeOperation.cpp | 335 +++++++++++---------- .../operations/COM_DilateErodeOperation.h | 2 +- 2 files changed, 184 insertions(+), 153 deletions(-) (limited to 'source/blender/compositor/operations') diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.cpp b/source/blender/compositor/operations/COM_DilateErodeOperation.cpp index ecc618a5346..ecb4ef93e9b 100644 --- a/source/blender/compositor/operations/COM_DilateErodeOperation.cpp +++ b/source/blender/compositor/operations/COM_DilateErodeOperation.cpp @@ -323,124 +323,147 @@ DilateStepOperation::DilateStepOperation() : NodeOperation() void DilateStepOperation::initExecution() { this->m_inputProgram = this->getInputSocketReader(0); - this->m_cached_buffer = NULL; - this->initMutex(); +} + + +// small helper to pass data from initializeTileData to executePixel +typedef struct tile_info { + rcti rect; + int width; + float *buffer; +} tile_info; + +static tile_info *create_cache(int xmin, int xmax, int ymin, int ymax) +{ + tile_info *result = (tile_info *)MEM_mallocN(sizeof(tile_info), "dilate erode tile"); + result->rect.xmin = xmin; + result->rect.xmax = xmax; + result->rect.ymin = ymin; + result->rect.ymax = ymax; + result->width = xmax - xmin; + result->buffer = (float *)MEM_callocN(sizeof(float) * (ymax - ymin) * result->width, "dilate erode cache"); + return result; } void *DilateStepOperation::initializeTileData(rcti *rect) { - if (this->m_cached_buffer != NULL) { - return this->m_cached_buffer; - } - lockMutex(); - if (this->m_cached_buffer == NULL) { - MemoryBuffer *buffer = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL); - float *rectf = buffer->convertToValueBuffer(); - int x, y, i; - int bwidth = buffer->getWidth(); - int bheight = buffer->getHeight(); - - /* - The following is based on the van Herk/Gil-Werman algorithm for morphology operations. - */ - int half_window = this->m_iterations; - int window = half_window * 2 + 1; - float *temp = (float *)MEM_mallocN((2 * window - 1) * sizeof(float), "dilate erode temp"); - float *buf = (float *)MEM_mallocN((max(bwidth, bheight) + 5 * half_window) * sizeof(float), "dilate erode buf"); - - for (y = 0; y < bheight; y++) { - for (x = 0; x < window - 1; x++) { - buf[x] = -MAXFLOAT; - } - for (x = 0; x < bwidth; x++) { - buf[x + window - 1] = rectf[bwidth * y + x]; - } - for (x = bwidth + window - 1; x < bwidth + 5 * half_window; x++) { - buf[x] = -MAXFLOAT; - } + MemoryBuffer *tile = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL); + int x, y, i; + int width = tile->getWidth(); + int height = tile->getHeight(); + float *buffer = tile->getBuffer(); + + int half_window = this->m_iterations; + int window = half_window * 2 + 1; + + int xmin = max(0, rect->xmin - half_window); + int ymin = max(0, rect->ymin - half_window); + int xmax = min(width, rect->xmax + half_window); + int ymax = min(height, rect->ymax + half_window); + + int bwidth = rect->xmax - rect->xmin; + int bheight = rect->ymax - rect->ymin; + + // Note: Cache buffer has original tilesize width, but new height. + // We have to calculate the additional rows in the first pass, + // to have valid data available for the second pass. + tile_info *result = create_cache(rect->xmin, rect->xmax, ymin, ymax); + float *rectf = result->buffer; + + // temp holds maxima for every step in the algorithm, buf holds a + // single row or column of input values, padded with MAXFLOATs to + // simplify the logic. + float *temp = (float *)MEM_mallocN(sizeof(float) * (2 * window - 1), "dilate erode temp"); + float *buf = (float *)MEM_mallocN(sizeof(float) * (max(bwidth, bheight) + 5 * half_window), "dilate erode buf"); + + // The following is based on the van Herk/Gil-Werman algorithm for morphology operations. + // first pass, horizontal dilate/erode + for (y = ymin; y < ymax; y++) { + for (x = 0; x < bwidth + 5 * half_window; x++) { + buf[x] = -MAXFLOAT; + } + for (x = xmin; x < xmax; ++x) { + buf[x - rect->xmin + window - 1] = buffer[4*(y * width + x)]; + } - for (i = 0; i < (bwidth + 3 * half_window) / window; i++) { - int start = (i + 1) * window - 1; + for (i = 0; i < (bwidth + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; - temp[window - 1] = buf[start]; - for (x = 1; x < window; x++) { - temp[window - 1 - x] = max(temp[window - x], buf[start - x]); - temp[window - 1 + x] = max(temp[window + x - 2], buf[start + x]); - } + temp[window - 1] = buf[start]; + for (x = 1; x < window; x++) { + temp[window - 1 - x] = max(temp[window - x], buf[start - x]); + temp[window - 1 + x] = max(temp[window + x - 2], buf[start + x]); + } - start = half_window + (i - 1) * window + 1; - for (x = -min(0, start); x < window - max(0, start + window - bwidth); x++) { - rectf[bwidth * y + (start + x)] = max(temp[x], temp[x + window - 1]); - } + start = half_window + (i - 1) * window + 1; + for (x = -min(0, start); x < window - max(0, start + window - bwidth); x++) { + rectf[bwidth * (y-ymin) + (start + x)] = max(temp[x], temp[x + window - 1]); } } + } - for (x = 0; x < bwidth; x++) { - for (y = 0; y < window - 1; y++) { - buf[y] = -MAXFLOAT; - } - for (y = 0; y < bheight; y++) { - buf[y + window - 1] = rectf[bwidth * y + x]; - } - for (y = bheight + window - 1; y < bheight + 5 * half_window; y++) { - buf[y] = -MAXFLOAT; - } + // second pass, vertical dilate/erode + for (x = 0; x < bwidth; x++) { + for (y = 0; y < bheight + 5 * half_window; y++) { + buf[y] = -MAXFLOAT; + } + for (y = ymin; y < ymax; y++) { + buf[y - rect->ymin + window - 1] = rectf[(y-ymin) * bwidth + x]; + } - for (i = 0; i < (bheight + 3 * half_window) / window; i++) { - int start = (i + 1) * window - 1; + for (i = 0; i < (bheight + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; - temp[window - 1] = buf[start]; - for (y = 1; y < window; y++) { - temp[window - 1 - y] = max(temp[window - y], buf[start - y]); - temp[window - 1 + y] = max(temp[window + y - 2], buf[start + y]); - } + temp[window - 1] = buf[start]; + for (y = 1; y < window; y++) { + temp[window - 1 - y] = max(temp[window - y], buf[start - y]); + temp[window - 1 + y] = max(temp[window + y - 2], buf[start + y]); + } - start = half_window + (i - 1) * window + 1; - for (y = -min(0, start); y < window - max(0, start + window - bheight); y++) { - rectf[bwidth * (y + start) + x] = max(temp[y], temp[y + window - 1]); - } + start = half_window + (i - 1) * window + 1; + for (y = -min(0, start); y < window - max(0, start + window - bheight); y++) { + rectf[bwidth * (y + start + (rect->ymin-ymin)) + x] = max(temp[y], temp[y + window - 1]); } } - - MEM_freeN(temp); - MEM_freeN(buf); - this->m_cached_buffer = rectf; } - unlockMutex(); - return this->m_cached_buffer; + + MEM_freeN(temp); + MEM_freeN(buf); + + return result; } void DilateStepOperation::executePixel(float output[4], int x, int y, void *data) { - output[0] = this->m_cached_buffer[y * this->getWidth() + x]; + tile_info *tile = (tile_info *)data; + int nx = x - tile->rect.xmin; + int ny = y - tile->rect.ymin; + output[0] = tile->buffer[tile->width * ny + nx]; } void DilateStepOperation::deinitExecution() { this->m_inputProgram = NULL; - this->deinitMutex(); - if (this->m_cached_buffer) { - MEM_freeN(this->m_cached_buffer); - this->m_cached_buffer = NULL; - } +} + +void DilateStepOperation::deinitializeTileData(rcti *rect, void *data) +{ + tile_info *tile = (tile_info *)data; + MEM_freeN(tile->buffer); + MEM_freeN(tile); } bool DilateStepOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) { - if (this->m_cached_buffer) { - return false; - } - else { - rcti newInput; - - newInput.xmax = getWidth(); - newInput.xmin = 0; - newInput.ymax = getHeight(); - newInput.ymin = 0; - - return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); - } + rcti newInput; + int it = this->m_iterations; + newInput.xmax = input->xmax + it; + newInput.xmin = input->xmin - it; + newInput.ymax = input->ymax + it; + newInput.ymin = input->ymin - it; + + return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output); } // Erode step @@ -451,80 +474,88 @@ ErodeStepOperation::ErodeStepOperation() : DilateStepOperation() void *ErodeStepOperation::initializeTileData(rcti *rect) { - if (this->m_cached_buffer != NULL) { - return this->m_cached_buffer; - } - lockMutex(); - if (this->m_cached_buffer == NULL) { - MemoryBuffer *buffer = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL); - float *rectf = buffer->convertToValueBuffer(); - int x, y, i; - int bwidth = buffer->getWidth(); - int bheight = buffer->getHeight(); - - int half_window = this->m_iterations; - int window = half_window * 2 + 1; - float *temp = (float *)MEM_mallocN((2 * window - 1) * sizeof(float), "dilate erode temp"); - float *buf = (float *)MEM_mallocN((max(bwidth, bheight) + 5 * half_window) * sizeof(float), "dilate erode buf"); - - for (y = 0; y < bheight; y++) { - for (x = 0; x < window - 1; x++) { - buf[x] = MAXFLOAT; - } - for (x = 0; x < bwidth; x++) { - buf[x + window - 1] = rectf[bwidth * y + x]; - } - for (x = bwidth + window - 1; x < bwidth + 5 * half_window; x++) { - buf[x] = MAXFLOAT; - } + MemoryBuffer *tile = (MemoryBuffer *)this->m_inputProgram->initializeTileData(NULL); + int x, y, i; + int width = tile->getWidth(); + int height = tile->getHeight(); + float *buffer = tile->getBuffer(); + + int half_window = this->m_iterations; + int window = half_window * 2 + 1; + + int xmin = max(0, rect->xmin - half_window); + int ymin = max(0, rect->ymin - half_window); + int xmax = min(width, rect->xmax + half_window); + int ymax = min(height, rect->ymax + half_window); + + int bwidth = rect->xmax - rect->xmin; + int bheight = rect->ymax - rect->ymin; + + // Note: Cache buffer has original tilesize width, but new height. + // We have to calculate the additional rows in the first pass, + // to have valid data available for the second pass. + tile_info *result = create_cache(rect->xmin, rect->xmax, ymin, ymax); + float *rectf = result->buffer; + + // temp holds maxima for every step in the algorithm, buf holds a + // single row or column of input values, padded with MAXFLOATs to + // simplify the logic. + float *temp = (float *)MEM_mallocN(sizeof(float) * (2 * window - 1), "dilate erode temp"); + float *buf = (float *)MEM_mallocN(sizeof(float) * (max(bwidth, bheight) + 5 * half_window), "dilate erode buf"); + + // The following is based on the van Herk/Gil-Werman algorithm for morphology operations. + // first pass, horizontal dilate/erode + for (y = ymin; y < ymax; y++) { + for (x = 0; x < bwidth + 5 * half_window; x++) { + buf[x] = MAXFLOAT; + } + for (x = xmin; x < xmax; ++x) { + buf[x - rect->xmin + window - 1] = buffer[4*(y * width + x)]; + } - for (i = 0; i < (bwidth + 3 * half_window) / window; i++) { - int start = (i + 1) * window - 1; + for (i = 0; i < (bwidth + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; - temp[window - 1] = buf[start]; - for (x = 1; x < window; x++) { - temp[window - 1 - x] = min(temp[window - x], buf[start - x]); - temp[window - 1 + x] = min(temp[window + x - 2], buf[start + x]); - } + temp[window - 1] = buf[start]; + for (x = 1; x < window; x++) { + temp[window - 1 - x] = min(temp[window - x], buf[start - x]); + temp[window - 1 + x] = min(temp[window + x - 2], buf[start + x]); + } - start = half_window + (i - 1) * window + 1; - for (x = -min(0, start); x < window - max(0, start + window - bwidth); x++) { - rectf[bwidth * y + (start + x)] = min(temp[x], temp[x + window - 1]); - } + start = half_window + (i - 1) * window + 1; + for (x = -min(0, start); x < window - max(0, start + window - bwidth); x++) { + rectf[bwidth * (y-ymin) + (start + x)] = min(temp[x], temp[x + window - 1]); } } + } - for (x = 0; x < bwidth; x++) { - for (y = 0; y < window - 1; y++) { - buf[y] = MAXFLOAT; - } - for (y = 0; y < bheight; y++) { - buf[y + window - 1] = rectf[bwidth * y + x]; - } - for (y = bheight + window - 1; y < bheight + 5 * half_window; y++) { - buf[y] = MAXFLOAT; - } + // second pass, vertical dilate/erode + for (x = 0; x < bwidth; x++) { + for (y = 0; y < bheight + 5 * half_window; y++) { + buf[y] = MAXFLOAT; + } + for (y = ymin; y < ymax; y++) { + buf[y - rect->ymin + window - 1] = rectf[(y-ymin) * bwidth + x]; + } - for (i = 0; i < (bheight + 3 * half_window) / window; i++) { - int start = (i + 1) * window - 1; + for (i = 0; i < (bheight + 3 * half_window) / window; i++) { + int start = (i + 1) * window - 1; - temp[window - 1] = buf[start]; - for (y = 1; y < window; y++) { - temp[window - 1 - y] = min(temp[window - y], buf[start - y]); - temp[window - 1 + y] = min(temp[window + y - 2], buf[start + y]); - } + temp[window - 1] = buf[start]; + for (y = 1; y < window; y++) { + temp[window - 1 - y] = min(temp[window - y], buf[start - y]); + temp[window - 1 + y] = min(temp[window + y - 2], buf[start + y]); + } - start = half_window + (i - 1) * window + 1; - for (y = -min(0, start); y < window - max(0, start + window - bheight); y++) { - rectf[bwidth * (y + start) + x] = min(temp[y], temp[y + window - 1]); - } + start = half_window + (i - 1) * window + 1; + for (y = -min(0, start); y < window - max(0, start + window - bheight); y++) { + rectf[bwidth * (y + start + (rect->ymin-ymin)) + x] = min(temp[y], temp[y + window - 1]); } } - - MEM_freeN(temp); - MEM_freeN(buf); - this->m_cached_buffer = rectf; } - unlockMutex(); - return this->m_cached_buffer; + + MEM_freeN(temp); + MEM_freeN(buf); + + return result; } diff --git a/source/blender/compositor/operations/COM_DilateErodeOperation.h b/source/blender/compositor/operations/COM_DilateErodeOperation.h index 47480d47c3b..51bad81d0ca 100644 --- a/source/blender/compositor/operations/COM_DilateErodeOperation.h +++ b/source/blender/compositor/operations/COM_DilateErodeOperation.h @@ -128,7 +128,6 @@ protected: int m_iterations; - float *m_cached_buffer; public: DilateStepOperation(); @@ -147,6 +146,7 @@ public: * Deinitialize the execution */ void deinitExecution(); + void deinitializeTileData(rcti *rect, void *data); void setIterations(int iterations) { this->m_iterations = iterations; } -- cgit v1.2.3