diff options
author | Monique Dewanchand <m.dewanchand@atmind.nl> | 2012-06-21 00:05:21 +0400 |
---|---|---|
committer | Monique Dewanchand <m.dewanchand@atmind.nl> | 2012-06-21 00:05:21 +0400 |
commit | 82bad4bd6cb9f285ad541ccc0b91f16784f73c6b (patch) | |
tree | d5a6244f4e15a0f42b49685471bdbb4236fcd698 /source/blender/compositor/intern | |
parent | cf129d8cb5e5f03e613af8ad9174a004d7d34b01 (diff) |
Refactoring of tiles opencl implementation:
- Moved methods from NodeOperation to OpenCLDevice
- Added check on Nvidia for local size
Diffstat (limited to 'source/blender/compositor/intern')
-rw-r--r-- | source/blender/compositor/intern/COM_Device.h | 4 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_Node.h | 1 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_NodeOperation.cpp | 115 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_NodeOperation.h | 17 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_OpenCLDevice.cpp | 122 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_OpenCLDevice.h | 23 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_WorkPackage.h | 2 | ||||
-rw-r--r-- | source/blender/compositor/intern/COM_WorkScheduler.cpp | 5 |
8 files changed, 150 insertions, 139 deletions
diff --git a/source/blender/compositor/intern/COM_Device.h b/source/blender/compositor/intern/COM_Device.h index 08fdb5bb578..2a86382a191 100644 --- a/source/blender/compositor/intern/COM_Device.h +++ b/source/blender/compositor/intern/COM_Device.h @@ -23,11 +23,7 @@ #ifndef _COM_Device_h #define _COM_Device_h -#include "COM_ExecutionSystem.h" #include "COM_WorkPackage.h" -#include "COM_NodeOperation.h" -#include "BLI_rect.h" -#include "COM_MemoryBuffer.h" /** * @brief Abstract class for device implementations to be used by the Compositor. diff --git a/source/blender/compositor/intern/COM_Node.h b/source/blender/compositor/intern/COM_Node.h index 12baa26cd6e..090b1455440 100644 --- a/source/blender/compositor/intern/COM_Node.h +++ b/source/blender/compositor/intern/COM_Node.h @@ -29,6 +29,7 @@ #include "COM_CompositorContext.h" #include "DNA_node_types.h" #include "BKE_text.h" +#include "COM_ExecutionSystem.h" #include <vector> #include <string> diff --git a/source/blender/compositor/intern/COM_NodeOperation.cpp b/source/blender/compositor/intern/COM_NodeOperation.cpp index b39b1758051..33989fa5787 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.cpp +++ b/source/blender/compositor/intern/COM_NodeOperation.cpp @@ -140,118 +140,3 @@ bool NodeOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOper return false; } } - -cl_mem NodeOperation::COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader) -{ - cl_int error; - MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers); - - const cl_image_format imageFormat = { - CL_RGBA, - CL_FLOAT - }; - - cl_mem clBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(), - result->getHeight(), 0, result->getBuffer(), &error); - - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - if (error == CL_SUCCESS) cleanup->push_back(clBuffer); - - error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - - COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); - return clBuffer; -} - -void NodeOperation::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) -{ - if (offsetIndex != -1) { - cl_int error; - rcti *rect = memoryBuffer->getRect(); - cl_int2 offset = {rect->xmin, rect->ymin}; - - error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - } -} - -void NodeOperation::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex) -{ - if (offsetIndex != -1) { - cl_int error; - cl_int2 offset = {this->getWidth(), this->getHeight()}; - - error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - } -} - -void NodeOperation::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) -{ - cl_int error; - error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } -} - -void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) -{ - cl_int error; - const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()}; - - error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } -} - -void NodeOperation::COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex) -{ - cl_int error; - const int width = outputMemoryBuffer->getWidth(); - const int height = outputMemoryBuffer->getHeight(); - int offsetx; - int offsety; - const int localSize = 32; - size_t size[2]; - cl_int2 offset; - - bool breaked = false; - for (offsety = 0; offsety < height && (!breaked); offsety += localSize) { - offset[1] = offsety; - if (offsety + localSize < height) { - size[1] = localSize; - } - else { - size[1] = height - offsety; - } - for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) { - if (offsetx + localSize < width) { - size[0] = localSize; - } - else { - size[0] = width - offsetx; - } - offset[0] = offsetx; - - error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, size, 0, 0, 0, NULL); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - clFlush(queue); - if (isBreaked()) { - breaked = false; - } - } - } -} - -cl_kernel NodeOperation::COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) -{ - cl_int error; - cl_kernel kernel = clCreateKernel(program, kernelname, &error); - if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } - else { - if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel); - } - return kernel; - -} diff --git a/source/blender/compositor/intern/COM_NodeOperation.h b/source/blender/compositor/intern/COM_NodeOperation.h index 30731572712..f96b994685a 100644 --- a/source/blender/compositor/intern/COM_NodeOperation.h +++ b/source/blender/compositor/intern/COM_NodeOperation.h @@ -22,9 +22,7 @@ #ifndef _COM_NodeOperation_h #define _COM_NodeOperation_h - -class NodeOperation; - +class OpenCLDevice; #include "COM_Node.h" #include <string> #include <sstream> @@ -150,7 +148,7 @@ public: * @param memoryBuffers all input MemoryBuffer's needed * @param outputBuffer the outputbuffer to write to */ - virtual void executeOpenCLRegion(cl_context context, cl_program program, cl_command_queue queue, rcti *rect, + virtual void executeOpenCLRegion(OpenCLDevice* device, rcti *rect, unsigned int chunkNumber, MemoryBuffer **memoryBuffers, MemoryBuffer *outputBuffer) {} /** @@ -165,7 +163,7 @@ public: * @param clMemToCleanUp all created cl_mem references must be added to this list. Framework will clean this after execution * @param clKernelsToCleanUp all created cl_kernel references must be added to this list. Framework will clean this after execution */ - virtual void executeOpenCL(cl_context context, cl_program program, cl_command_queue queue, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {} + virtual void executeOpenCL(OpenCLDevice* device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp, list<cl_kernel> *clKernelsToCleanUp) {} virtual void deinitExecution(); bool isResolutionSet() { @@ -272,15 +270,6 @@ protected: * @brief set if this NodeOperation can be scheduled on a OpenCLDevice */ void setOpenCL(bool openCL) { this->openCL = openCL; } - - static cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_context context, cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader); - static void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers); - static void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer); - void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex); - static void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer); - void COM_clEnqueueRange(cl_command_queue queue, cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex); - cl_kernel COM_clCreateKernel(cl_program program, const char *kernelname, list<cl_kernel> *clKernelsToCleanUp); - }; #endif diff --git a/source/blender/compositor/intern/COM_OpenCLDevice.cpp b/source/blender/compositor/intern/COM_OpenCLDevice.cpp index 9d005804098..c9d27b8543c 100644 --- a/source/blender/compositor/intern/COM_OpenCLDevice.cpp +++ b/source/blender/compositor/intern/COM_OpenCLDevice.cpp @@ -23,13 +23,15 @@ #include "COM_OpenCLDevice.h" #include "COM_WorkScheduler.h" +typedef enum COM_VendorID {NVIDIA=0x10DE, AMD=0x1002} COM_VendorID; -OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program) +OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId) { this->device = device; this->context = context; this->program = program; this->queue = NULL; + this->vendorID = vendorId; } bool OpenCLDevice::initialize() @@ -56,10 +58,126 @@ void OpenCLDevice::execute(WorkPackage *work) MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber); MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect); - executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect, + executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect, chunkNumber, inputBuffers, outputBuffer); delete outputBuffer; executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers); } + +cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader) +{ + cl_int error; + MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers); + + const cl_image_format imageFormat = { + CL_RGBA, + CL_FLOAT + }; + + cl_mem clBuffer = clCreateImage2D(this->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(), + result->getHeight(), 0, result->getBuffer(), &error); + + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + if (error == CL_SUCCESS) cleanup->push_back(clBuffer); + + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + + COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); + return clBuffer; +} + +void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) +{ + if (offsetIndex != -1) { + cl_int error; + rcti *rect = memoryBuffer->getRect(); + cl_int2 offset = {rect->xmin, rect->ymin}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation) +{ + if (offsetIndex != -1) { + cl_int error; + cl_int2 offset = {operation->getWidth(), operation->getHeight()}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) +{ + cl_int error; + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) +{ + cl_int error; + const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()}; + + error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation) +{ + cl_int error; + const int width = outputMemoryBuffer->getWidth(); + const int height = outputMemoryBuffer->getHeight(); + int offsetx; + int offsety; + int localSize = 1024; + size_t size[2]; + cl_int2 offset; + + if (this->vendorID == NVIDIA){localSize = 32;} + bool breaked = false; + for (offsety = 0; offsety < height && (!breaked); offsety += localSize) { + offset[1] = offsety; + if (offsety + localSize < height) { + size[1] = localSize; + } + else { + size[1] = height - offsety; + } + for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) { + if (offsetx + localSize < width) { + size[0] = localSize; + } + else { + size[0] = width - offsetx; + } + offset[0] = offsetx; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + clFlush(this->queue); + if (operation->isBreaked()) { + breaked = false; + } + } + } +} + +cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) +{ + cl_int error; + cl_kernel kernel = clCreateKernel(this->program, kernelname, &error); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + else { + if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel); + } + return kernel; + +} diff --git a/source/blender/compositor/intern/COM_OpenCLDevice.h b/source/blender/compositor/intern/COM_OpenCLDevice.h index 83ce8cec811..d132f330651 100644 --- a/source/blender/compositor/intern/COM_OpenCLDevice.h +++ b/source/blender/compositor/intern/COM_OpenCLDevice.h @@ -29,7 +29,6 @@ class OpenCLDevice; #include "OCL_opencl.h" #include "COM_WorkScheduler.h" - /** * @brief device representing an GPU OpenCL device. * an instance of this class represents a single cl_device @@ -55,13 +54,21 @@ private: * @brief opencl command queue */ cl_command_queue queue; + + /** + * @brief opencl vendor ID + */ + cl_int vendorID; + public: /** * @brief constructor with opencl device * @param context * @param device + * @param program + * @param vendorID */ - OpenCLDevice(cl_context context, cl_device_id device, cl_program program); + OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId); /** @@ -83,6 +90,18 @@ public: * @param work the WorkPackage to execute */ void execute(WorkPackage *work); + + cl_context getContext(){return this->context;} + + cl_command_queue getQueue(){return this->queue;} + + cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader); + void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers); + void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer); + void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation); + void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer); + void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation); + cl_kernel COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp); }; #endif diff --git a/source/blender/compositor/intern/COM_WorkPackage.h b/source/blender/compositor/intern/COM_WorkPackage.h index 18d83cc151c..fed87186d20 100644 --- a/source/blender/compositor/intern/COM_WorkPackage.h +++ b/source/blender/compositor/intern/COM_WorkPackage.h @@ -24,7 +24,7 @@ class WorkPackage; #ifndef _COM_WorkPackage_h_ #define _COM_WorkPackage_h_ - +class ExecutionGroup; #include "COM_ExecutionGroup.h" /** diff --git a/source/blender/compositor/intern/COM_WorkScheduler.cpp b/source/blender/compositor/intern/COM_WorkScheduler.cpp index a410c28f47d..12c0f28ec9b 100644 --- a/source/blender/compositor/intern/COM_WorkScheduler.cpp +++ b/source/blender/compositor/intern/COM_WorkScheduler.cpp @@ -257,7 +257,10 @@ void WorkScheduler::initialize() unsigned int indexDevices; for (indexDevices = 0; indexDevices < totalNumberOfDevices; indexDevices++) { cl_device_id device = cldevices[indexDevices]; - OpenCLDevice *clDevice = new OpenCLDevice(context, device, program); + cl_int vendorID = 0; + cl_int error = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, NULL); + if (error!= CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + OpenCLDevice *clDevice = new OpenCLDevice(context, device, program, vendorID); clDevice->initialize(), gpudevices.push_back(clDevice); if (G.f & G_DEBUG) { |