diff options
author | Jeroen Bakker <j.bakker@atmind.nl> | 2012-06-08 13:17:07 +0400 |
---|---|---|
committer | Jeroen Bakker <j.bakker@atmind.nl> | 2012-06-08 13:17:07 +0400 |
commit | de7fe937ff24121ce8c66af902639cd96244a55f (patch) | |
tree | 625821742e2a136e10fe1cd76a81303766b8633f /source/blender/compositor/intern | |
parent | 95641388471d178552fea26bb477c13536bd58ef (diff) |
* Added OpenCL kernel for bokeh blur
* Uncomment COM_OPENCL_ENABLED from COM_defines.h to test
Diffstat (limited to 'source/blender/compositor/intern')
10 files changed, 184 insertions, 94 deletions
diff --git a/source/blender/compositor/intern/COM_ExecutionGroup.cpp b/source/blender/compositor/intern/COM_ExecutionGroup.cpp index 0ebf9af207b..44b3c8dafbb 100644 --- a/source/blender/compositor/intern/COM_ExecutionGroup.cpp +++ b/source/blender/compositor/intern/COM_ExecutionGroup.cpp @@ -57,7 +57,7 @@ ExecutionGroup::ExecutionGroup() this->chunksFinished = 0; } -int ExecutionGroup::getRenderPriotrity() +CompositorPriority ExecutionGroup::getRenderPriotrity() { return this->getOutputNodeOperation()->getRenderPriority(); } @@ -401,47 +401,11 @@ MemoryBuffer** ExecutionGroup::getInputBuffersOpenCL(int chunkNumber) return memoryBuffers; } -// @todo: for opencl the memory buffers size needs to be same as the needed size -// currently this method is not called, but will be when opencl nodes are created MemoryBuffer *ExecutionGroup::constructConsolidatedMemoryBuffer(MemoryProxy *memoryProxy, rcti *rect) { - // find all chunks inside the rect - // determine minxchunk, minychunk, maxxchunk, maxychunk where x and y are chunknumbers - float chunkSizef = this->chunkSize; - - int indexx, indexy; - - const int minxchunk = floor(rect->xmin/chunkSizef); - const int maxxchunk = ceil((rect->xmax-1)/chunkSizef); - const int minychunk = floor(rect->ymin/chunkSizef); - const int maxychunk = ceil((rect->ymax-1)/chunkSizef); - - if (maxxchunk== minxchunk+1 && maxychunk == minychunk+1) { - MemoryBuffer *result =memoryProxy->getBuffer(); - return result; - } - - rcti chunkRect; - chunkRect.xmin = minxchunk*this->chunkSize; - chunkRect.xmax = maxxchunk*this->chunkSize; - chunkRect.ymin = minychunk*this->chunkSize; - chunkRect.ymax = maxychunk*this->chunkSize; - - CLAMP(chunkRect.xmin, 0, (int)this->width); - CLAMP(chunkRect.xmax, 0, (int)this->width); - CLAMP(chunkRect.ymin, 0, (int)this->height); - CLAMP(chunkRect.ymax, 0, (int)this->height); - - MemoryBuffer *result = new MemoryBuffer(memoryProxy, &chunkRect); - - for (indexx = max(minxchunk, 0); indexx<min((int)this->numberOfXChunks, maxxchunk) ; indexx++) { - for (indexy = max(minychunk, 0); indexy<min((int)this->numberOfYChunks, maxychunk) ; indexy++) { - /* int chunkNumber = indexx+indexy*this->numberOfXChunks; */ /* UNUSED */ - MemoryBuffer *chunkBuffer = memoryProxy->getBuffer(); - result->copyContentFrom(chunkBuffer); - } - } - + MemoryBuffer* imageBuffer = memoryProxy->getBuffer(); + MemoryBuffer* result = new MemoryBuffer(memoryProxy, rect); + result->copyContentFrom(imageBuffer); return result; } @@ -487,14 +451,14 @@ void ExecutionGroup::determineChunkRect(rcti *rect, const unsigned int chunkNumb MemoryBuffer *ExecutionGroup::allocateOutputBuffer(int chunkNumber, rcti *rect) { - MemoryBuffer *outputBuffer = NULL; - // output allocation is only valid when our outputoperation is a memorywriter + // we asume that this method is only called from complex execution groups. NodeOperation * operation = this->getOutputNodeOperation(); if (operation->isWriteBufferOperation()) { -/* WriteBufferOperation *writeOperation = (WriteBufferOperation*)operation; */ /* UNUSED */ -// @todo outputBuffer = MemoryManager::allocateMemoryBuffer(writeOperation->getMemoryProxy(), chunkNumber, rect); + WriteBufferOperation *writeOperation = (WriteBufferOperation*)operation; + MemoryBuffer *buffer = new MemoryBuffer(writeOperation->getMemoryProxy(), rect); + return buffer; } - return outputBuffer; + return NULL; } @@ -600,11 +564,6 @@ void ExecutionGroup::determineDependingMemoryProxies(vector<MemoryProxy*> *memor } } -bool ExecutionGroup::operator ==(const ExecutionGroup & executionGroup) const -{ - return this->getOutputNodeOperation() == executionGroup.getOutputNodeOperation(); -} - bool ExecutionGroup::isOpenCL() { return this->openCL; diff --git a/source/blender/compositor/intern/COM_ExecutionGroup.h b/source/blender/compositor/intern/COM_ExecutionGroup.h index cbdc9bb1787..416a78eb8b8 100644 --- a/source/blender/compositor/intern/COM_ExecutionGroup.h +++ b/source/blender/compositor/intern/COM_ExecutionGroup.h @@ -167,13 +167,7 @@ private: * @param operation the operation to be added */ bool canContainOperation(NodeOperation *operation); - - /** - * @brief get the Render priority of this ExecutionGroup - * @see ExecutionSystem.execute - */ - int getRenderPriotrity(); - + /** * @brief calculate the actual chunk size of this execution group. * @note A chunk size is an unsigned int that is both the height and width of a chunk. @@ -396,17 +390,21 @@ public: * @see determineChunkSize() */ void determineChunkRect(rcti *rect, const unsigned int chunkNumber) const; - - - bool operator ==(const ExecutionGroup &executionGroup) const; - - /** + + /** * @brief can this ExecutionGroup be scheduled on an OpenCLDevice * @see WorkScheduler.schedule */ bool isOpenCL(); - + void setChunksize(int chunksize) {this->chunkSize = chunksize;} + + /** + * @brief get the Render priority of this ExecutionGroup + * @see ExecutionSystem.execute + */ + CompositorPriority getRenderPriotrity(); + }; #endif diff --git a/source/blender/compositor/intern/COM_ExecutionSystem.cpp b/source/blender/compositor/intern/COM_ExecutionSystem.cpp index 96d2a6f4434..8c0b37a0685 100644 --- a/source/blender/compositor/intern/COM_ExecutionSystem.cpp +++ b/source/blender/compositor/intern/COM_ExecutionSystem.cpp @@ -127,20 +127,9 @@ void ExecutionSystem::execute() WorkScheduler::start(this->context); - - vector<ExecutionGroup*> executionGroups; - this->findOutputExecutionGroup(&executionGroups); - - /* start execution of the ExecutionGroups based on priority of their output node */ - for (int priority = 9 ; priority>=0 ; priority--) { - for (index = 0 ; index < executionGroups.size(); index ++) { - ExecutionGroup *group = executionGroups[index]; - NodeOperation *output = group->getOutputNodeOperation(); - if (output->getRenderPriority() == priority) { - group->execute(this); - } - } - } + executeGroups(COM_PRIORITY_HIGH); + executeGroups(COM_PRIORITY_MEDIUM); + executeGroups(COM_PRIORITY_LOW); WorkScheduler::finish(); WorkScheduler::stop(); @@ -155,6 +144,18 @@ void ExecutionSystem::execute() } } +void ExecutionSystem::executeGroups(CompositorPriority priority) +{ + int index; + vector<ExecutionGroup*> executionGroups; + this->findOutputExecutionGroup(&executionGroups, priority); + + for (index = 0 ; index < executionGroups.size(); index ++) { + ExecutionGroup *group = executionGroups[index]; + group->execute(this); + } +} + void ExecutionSystem::addOperation(NodeOperation *operation) { ExecutionSystemHelper::addOperation(this->operations, operation); @@ -304,6 +305,17 @@ void ExecutionSystem::determineActualSocketDataTypes(vector<NodeBase*> &nodes) } } +void ExecutionSystem::findOutputExecutionGroup(vector<ExecutionGroup*> *result, CompositorPriority priority) const +{ + unsigned int index; + for (index = 0 ; index < this->groups.size() ; index ++) { + ExecutionGroup *group = this->groups[index]; + if (group->isOutputExecutionGroup() && group->getRenderPriotrity() == priority) { + result->push_back(group); + } + } +} + void ExecutionSystem::findOutputExecutionGroup(vector<ExecutionGroup*> *result) const { unsigned int index; diff --git a/source/blender/compositor/intern/COM_ExecutionSystem.h b/source/blender/compositor/intern/COM_ExecutionSystem.h index 85fec8b6145..510e58ba1bb 100644 --- a/source/blender/compositor/intern/COM_ExecutionSystem.h +++ b/source/blender/compositor/intern/COM_ExecutionSystem.h @@ -141,6 +141,11 @@ private: //methods /** * find all execution group with output nodes */ + void findOutputExecutionGroup(vector<ExecutionGroup*> *result, CompositorPriority priority) const; + + /** + * find all execution group with output nodes + */ void findOutputExecutionGroup(vector<ExecutionGroup*> *result) const; public: @@ -224,6 +229,8 @@ private: * @param nodes list of nodes or operations to do the data type determination */ void determineActualSocketDataTypes(vector<NodeBase*> &nodes); + + void executeGroups(CompositorPriority priority); }; #endif diff --git a/source/blender/compositor/intern/COM_Node.cpp b/source/blender/compositor/intern/COM_Node.cpp index ba5e21d53ae..264725b4b2c 100644 --- a/source/blender/compositor/intern/COM_Node.cpp +++ b/source/blender/compositor/intern/COM_Node.cpp @@ -83,23 +83,20 @@ void Node::addSetValueOperation(ExecutionSystem *graph, InputSocket *inputsocket graph->addOperation(operation); } -void Node::addPreviewOperation(ExecutionSystem *system, OutputSocket *outputSocket, int priority) +void Node::addPreviewOperation(ExecutionSystem *system, OutputSocket *outputSocket) { -#ifdef COM_PREVIEW_ENABLED PreviewOperation *operation = new PreviewOperation(); system->addOperation(operation); operation->setbNode(this->getbNode()); operation->setbNodeTree(system->getContext().getbNodeTree()); - operation->setPriority(priority); this->addLink(system, outputSocket, operation->getInputSocket(0)); -#endif } -void Node::addPreviewOperation(ExecutionSystem *system, InputSocket *inputSocket, int priority) +void Node::addPreviewOperation(ExecutionSystem *system, InputSocket *inputSocket) { if (inputSocket->isConnected()) { OutputSocket *outputsocket = inputSocket->getConnection()->getFromSocket(); - this->addPreviewOperation(system, outputsocket, priority); + this->addPreviewOperation(system, outputsocket); } } diff --git a/source/blender/compositor/intern/COM_Node.h b/source/blender/compositor/intern/COM_Node.h index 2666d0a6980..23744adf642 100644 --- a/source/blender/compositor/intern/COM_Node.h +++ b/source/blender/compositor/intern/COM_Node.h @@ -120,8 +120,8 @@ protected: Node(); - void addPreviewOperation(ExecutionSystem *system, InputSocket *inputSocket, int priority); - void addPreviewOperation(ExecutionSystem *system, OutputSocket *inputSocket, int priority); + void addPreviewOperation(ExecutionSystem *system, InputSocket *inputSocket); + void addPreviewOperation(ExecutionSystem *system, OutputSocket *outputSocket); bNodeSocket *getEditorInputSocket(int editorNodeInputSocketIndex); bNodeSocket *getEditorOutputSocket(int editorNodeOutputSocketIndex); diff --git a/source/blender/compositor/intern/COM_NodeOperation.cpp b/source/blender/compositor/intern/COM_NodeOperation.cpp index fae652e39d7..650e4af5ae0 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.cpp +++ b/source/blender/compositor/intern/COM_NodeOperation.cpp @@ -124,3 +124,111 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti * input, ReadBufferOpe return false; } } + +cl_mem NodeOperation::COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader* reader) +{ + cl_int error; + MemoryBuffer* result = (MemoryBuffer*)reader->initializeTileData(NULL, inputMemoryBuffers); + + const cl_image_format imageFormat = { + CL_RGBA, + CL_FLOAT + }; + + cl_mem clBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(), + result->getHeight(), 0, result->getBuffer(), &error); + + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + if (error == CL_SUCCESS) cleanup->push_back(clBuffer); + + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + + COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); + return clBuffer; +} + +void NodeOperation::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) +{ + if (offsetIndex != -1) { + cl_int error; + rcti* rect = memoryBuffer->getRect(); + cl_int2 offset = {rect->xmin, rect->ymin}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void NodeOperation::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex) +{ + if (offsetIndex != -1) { + cl_int error; + cl_int2 offset = {this->getWidth(), this->getHeight()}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void NodeOperation::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) +{ + cl_int error; + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) { + cl_int error; + const size_t size[] = {outputMemoryBuffer->getWidth(),outputMemoryBuffer->getHeight()}; + + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex) { + cl_int error; + const int width = outputMemoryBuffer->getWidth(); + const int height = outputMemoryBuffer->getHeight(); + int offsetx; + int offsety; + const int localSize = 32; + size_t size[2]; + cl_int2 offset; + + for (offsety = 0 ; offsety < height; offsety+=localSize) { + offset[1] = offsety; + if (offsety+localSize < height) { + size[1] = localSize; + } else { + size[1] = height - offsety; + } + for (offsetx = 0 ; offsetx < width ; offsetx+=localSize) { + if (offsetx+localSize < width) { + size[0] = localSize; + } else { + size[0] = width - offsetx; + } + offset[0] = offsetx; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + clFlush(queue); + } + } +} + +cl_kernel NodeOperation::COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) +{ + cl_int error; + cl_kernel kernel = clCreateKernel(program, kernelname, &error) ; + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); + } + else { + if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel); + } + return kernel; + +} diff --git a/source/blender/compositor/intern/COM_NodeOperation.h b/source/blender/compositor/intern/COM_NodeOperation.h index 73ba5b472d7..2219907b0c8 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.h +++ b/source/blender/compositor/intern/COM_NodeOperation.h @@ -139,8 +139,10 @@ public: * @param rect the rectangle of the chunk (location and size) * @param chunkNumber the chunkNumber to be calculated * @param memoryBuffers all input MemoryBuffer's needed + * @param outputBuffer the outputbuffer to write to */ - virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, unsigned int chunkNumber, MemoryBuffer** memoryBuffers) {} + virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, + unsigned int chunkNumber, MemoryBuffer** memoryBuffers, MemoryBuffer* outputBuffer) {} /** * @brief custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevice @@ -207,9 +209,9 @@ public: /** * @brief get the render priority of this node. * @note only applicable for output operations like ViewerOperation - * @return [0:9] 9 is highest priority + * @return CompositorPriority */ - virtual const int getRenderPriority() const {return 0;} + virtual const CompositorPriority getRenderPriority() const {return COM_PRIORITY_LOW;} /** * @brief can this NodeOperation be scheduled on an OpenCLDevice @@ -242,6 +244,13 @@ protected: */ void setOpenCL(bool openCL) {this->openCL = openCL;} + static cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader* reader); + static void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers); + static void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer); + void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex); + static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer* outputMemoryBuffer); + static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex); + cl_kernel COM_clCreateKernel(cl_program program, const char* kernelname, list<cl_kernel> *clKernelsToCleanUp); }; diff --git a/source/blender/compositor/intern/COM_OpenCLDevice.cpp b/source/blender/compositor/intern/COM_OpenCLDevice.cpp index 692b96f40b3..e6d3789b06d 100644 --- a/source/blender/compositor/intern/COM_OpenCLDevice.cpp +++ b/source/blender/compositor/intern/COM_OpenCLDevice.cpp @@ -56,10 +56,10 @@ void OpenCLDevice::execute(WorkPackage *work) MemoryBuffer ** inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber); MemoryBuffer * outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect); - executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect, chunkNumber, inputBuffers); + executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect, + chunkNumber, inputBuffers, outputBuffer); + + delete outputBuffer; executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers); - if (outputBuffer != NULL) { - outputBuffer->setCreatedState(); - } } diff --git a/source/blender/compositor/intern/COM_WorkScheduler.cpp b/source/blender/compositor/intern/COM_WorkScheduler.cpp index 80b91b2364c..172107f720b 100644 --- a/source/blender/compositor/intern/COM_WorkScheduler.cpp +++ b/source/blender/compositor/intern/COM_WorkScheduler.cpp @@ -28,7 +28,7 @@ #include "COM_OpenCLDevice.h" #include "OCL_opencl.h" #include "stdio.h" -#include "COM_OpenCLKernels.cl.cpp" +#include "COM_OpenCLKernels.cl.h" #include "BKE_global.h" #if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD @@ -260,7 +260,7 @@ void WorkScheduler::initialize() if (totalNumberOfDevices > 0) { context = clCreateContext(NULL, totalNumberOfDevices, cldevices, clContextError, NULL, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - program = clCreateProgramWithSource(context, 1, &sourcecode, 0, &error); + program = clCreateProgramWithSource(context, 1, &clkernelstoh_COM_OpenCLKernels_cl, 0, &error); error = clBuildProgram(program, totalNumberOfDevices, cldevices, 0, 0, 0); if (error != CL_SUCCESS) { cl_int error2; |