/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2011, Blender Foundation. */ #include "COM_OpenCLDevice.h" #include "COM_WorkScheduler.h" namespace blender::compositor { enum COM_VendorID { NVIDIA = 0x10DE, AMD = 0x1002 }; const cl_image_format IMAGE_FORMAT_COLOR = { CL_RGBA, CL_FLOAT, }; const cl_image_format IMAGE_FORMAT_VECTOR = { CL_RGB, CL_FLOAT, }; const cl_image_format IMAGE_FORMAT_VALUE = { CL_R, CL_FLOAT, }; OpenCLDevice::OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId) { this->m_device = device; this->m_context = context; this->m_program = program; this->m_queue = nullptr; this->m_vendorID = vendorId; cl_int error; this->m_queue = clCreateCommandQueue(this->m_context, this->m_device, 0, &error); } OpenCLDevice::OpenCLDevice(OpenCLDevice &&other) noexcept : m_context(other.m_context), m_device(other.m_device), m_program(other.m_program), m_queue(other.m_queue), m_vendorID(other.m_vendorID) { other.m_queue = nullptr; } OpenCLDevice::~OpenCLDevice() { if (this->m_queue) { clReleaseCommandQueue(this->m_queue); } } void OpenCLDevice::execute(WorkPackage *work_package) { const unsigned int chunkNumber = work_package->chunk_number; ExecutionGroup *executionGroup = work_package->execution_group; MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber); MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(work_package->rect); executionGroup->getOutputOperation()->executeOpenCLRegion( this, &work_package->rect, chunkNumber, inputBuffers, outputBuffer); delete outputBuffer; executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers); } cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader) { return COM_clAttachMemoryBufferToKernelParameter(kernel, parameterIndex, offsetIndex, cleanup, inputMemoryBuffers, (ReadBufferOperation *)reader); } const cl_image_format *OpenCLDevice::determineImageFormat(MemoryBuffer *memoryBuffer) { switch (memoryBuffer->get_num_channels()) { case 1: return &IMAGE_FORMAT_VALUE; break; case 3: return &IMAGE_FORMAT_VECTOR; break; case 4: return &IMAGE_FORMAT_COLOR; break; default: BLI_assert(!"Unsupported num_channels."); } return &IMAGE_FORMAT_COLOR; } cl_mem OpenCLDevice::COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list *cleanup, MemoryBuffer **inputMemoryBuffers, ReadBufferOperation *reader) { cl_int error; MemoryBuffer *result = reader->getInputMemoryBuffer(inputMemoryBuffers); const cl_image_format *imageFormat = determineImageFormat(result); cl_mem clBuffer = clCreateImage2D(this->m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageFormat, result->getWidth(), result->getHeight(), 0, result->getBuffer(), &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } if (error == CL_SUCCESS) { cleanup->push_back(clBuffer); } error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } COM_clAttachMemoryBufferOffsetToKernelParameter(kernel, offsetIndex, result); return clBuffer; } void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffer) { if (offsetIndex != -1) { cl_int error; const rcti &rect = memoryBuffer->get_rect(); cl_int2 offset = {{rect.xmin, rect.ymin}}; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } } void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation) { if (offsetIndex != -1) { cl_int error; cl_int2 offset = {{(cl_int)operation->getWidth(), (cl_int)operation->getHeight()}}; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } } void OpenCLDevice::COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer) { cl_int error; error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer) { cl_int error; const size_t size[] = { (size_t)outputMemoryBuffer->getWidth(), (size_t)outputMemoryBuffer->getHeight(), }; error = clEnqueueNDRangeKernel( this->m_queue, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } } void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer, int offsetIndex, NodeOperation *operation) { cl_int error; const int width = outputMemoryBuffer->getWidth(); const int height = outputMemoryBuffer->getHeight(); int offsetx; int offsety; int localSize = 1024; size_t size[2]; cl_int2 offset; if (this->m_vendorID == NVIDIA) { localSize = 32; } bool breaked = false; for (offsety = 0; offsety < height && (!breaked); offsety += localSize) { offset.s[1] = offsety; if (offsety + localSize < height) { size[1] = localSize; } else { size[1] = height - offsety; } for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) { if (offsetx + localSize < width) { size[0] = localSize; } else { size[0] = width - offsetx; } offset.s[0] = offsetx; error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } error = clEnqueueNDRangeKernel( this->m_queue, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } clFlush(this->m_queue); if (operation->isBraked()) { breaked = false; } } } } cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname, std::list *clKernelsToCleanUp) { cl_int error; cl_kernel kernel = clCreateKernel(this->m_program, kernelname, &error); if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); } else { if (clKernelsToCleanUp) { clKernelsToCleanUp->push_back(kernel); } } return kernel; } } // namespace blender::compositor