diff options
author | Monique Dewanchand <m.dewanchand@atmind.nl> | 2012-06-21 00:05:21 +0400 |
---|---|---|
committer | Monique Dewanchand <m.dewanchand@atmind.nl> | 2012-06-21 00:05:21 +0400 |
commit | 82bad4bd6cb9f285ad541ccc0b91f16784f73c6b (patch) | |
tree | d5a6244f4e15a0f42b49685471bdbb4236fcd698 /source/blender/compositor/intern/COM_OpenCLDevice.cpp | |
parent | cf129d8cb5e5f03e613af8ad9174a004d7d34b01 (diff) |
Refactoring of tiles opencl implementation:
- Moved methods from NodeOperation to OpenCLDevice
- Added check on Nvidia for local size
Diffstat (limited to 'source/blender/compositor/intern/COM_OpenCLDevice.cpp')
-rw-r--r-- | source/blender/compositor/intern/COM_OpenCLDevice.cpp | 122 |
1 files changed, 120 insertions, 2 deletions
diff --git a/source/blender/compositor/intern/COM_OpenCLDevice.cpp b/source/blender/compositor/intern/COM_OpenCLDevice.cpp index 9d005804098..c9d27b8543c 100644 --- a/source/blender/compositor/intern/COM_OpenCLDevice.cpp +++ b/source/blender/compositor/intern/COM_OpenCLDevice.cpp @@ -23,13 +23,15 @@ #include "COM_OpenCLDevice.h" #include "COM_WorkScheduler.h" +typedef enum COM_VendorID {NVIDIA=0x10DE, AMD=0x1002} COM_VendorID; -OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program) +OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId) { this->device = device; this->context = context; this->program = program; this->queue = NULL; + this->vendorID = vendorId; } bool OpenCLDevice::initialize() @@ -56,10 +58,126 @@ void OpenCLDevice::execute(WorkPackage *work) MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber); MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(chunkNumber, &rect); - executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this->context, this->program, this->queue, &rect, + executionGroup->getOutputNodeOperation()->executeOpenCLRegion(this, &rect, chunkNumber, inputBuffers, outputBuffer); delete outputBuffer; executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers); } + +cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, list<cl_mem> *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader) +{ + cl_int error; + MemoryBuffer *result = (MemoryBuffer *)reader->initializeTileData(NULL, inputMemoryBuffers); + + const cl_image_format imageFormat = { + CL_RGBA, + CL_FLOAT + }; + + cl_mem clBuffer = clCreateImage2D(this->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, result->getWidth(), + result->getHeight(), 0, result->getBuffer(), &error); + + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + if (error == CL_SUCCESS) cleanup->push_back(clBuffer); + + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + + COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); + return clBuffer; +} + +void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) +{ + if (offsetIndex != -1) { + cl_int error; + rcti *rect = memoryBuffer->getRect(); + cl_int2 offset = {rect->xmin, rect->ymin}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation* operation) +{ + if (offsetIndex != -1) { + cl_int error; + cl_int2 offset = {operation->getWidth(), operation->getHeight()}; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + } +} + +void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) +{ + cl_int error; + error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) +{ + cl_int error; + const size_t size[] = {outputMemoryBuffer->getWidth(), outputMemoryBuffer->getHeight()}; + + error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } +} + +void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation* operation) +{ + cl_int error; + const int width = outputMemoryBuffer->getWidth(); + const int height = outputMemoryBuffer->getHeight(); + int offsetx; + int offsety; + int localSize = 1024; + size_t size[2]; + cl_int2 offset; + + if (this->vendorID == NVIDIA){localSize = 32;} + bool breaked = false; + for (offsety = 0; offsety < height && (!breaked); offsety += localSize) { + offset[1] = offsety; + if (offsety + localSize < height) { + size[1] = localSize; + } + else { + size[1] = height - offsety; + } + for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) { + if (offsetx + localSize < width) { + size[0] = localSize; + } + else { + size[0] = width - offsetx; + } + offset[0] = offsetx; + + error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + error = clEnqueueNDRangeKernel(this->queue, kernel, 2, NULL, size, 0, 0, 0, NULL); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + clFlush(this->queue); + if (operation->isBreaked()) { + breaked = false; + } + } + } +} + +cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, list<cl_kernel> *clKernelsToCleanUp) +{ + cl_int error; + cl_kernel kernel = clCreateKernel(this->program, kernelname, &error); + if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } + else { + if (clKernelsToCleanUp) clKernelsToCleanUp->push_back(kernel); + } + return kernel; + +} |