1 files changed, 394 insertions, 0 deletions
diff --git a/source/blender/compositor/intern/COM_WorkScheduler.cc b/source/blender/compositor/intern/COM_WorkScheduler.cc
new file mode 100644
index 00000000000..a70b6ba4abe
--- /dev/null
+++ b/source/blender/compositor/intern/COM_WorkScheduler.cc
@@ -0,0 +1,394 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2011, Blender Foundation.
+ */
+
+#include <cstdio>
+#include <list>
+
+#include "COM_CPUDevice.h"
+#include "COM_OpenCLDevice.h"
+#include "COM_OpenCLKernels.cl.h"
+#include "COM_WorkScheduler.h"
+#include "COM_WriteBufferOperation.h"
+#include "COM_compositor.h"
+#include "clew.h"
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_threads.h"
+#include "PIL_time.h"
+
+#include "BKE_global.h"
+
+#if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
+#  ifndef DEBUG /* test this so we dont get warnings in debug builds */
+#    warning COM_CURRENT_THREADING_MODEL COM_TM_NOTHREAD is activated. Use only for debugging.
+#  endif
+#elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+/* do nothing - default */
+#else
+#  error COM_CURRENT_THREADING_MODEL No threading model selected
+#endif
+
+static ThreadLocal(CPUDevice *) g_thread_device;
+static struct {
+  /** \brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
+   */
+  std::vector<CPUDevice *> cpu_devices;
+
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+  /** \brief list of all thread for every CPUDevice in cpudevices a thread exists. */
+  ListBase cpu_threads;
+  bool cpu_initialized = false;
+  /** \brief all scheduled work for the cpu */
+  ThreadQueue *cpu_queue;
+  ThreadQueue *gpu_queue;
+#  ifdef COM_OPENCL_ENABLED
+  cl_context opencl_context;
+  cl_program opencl_program;
+  /** \brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice is
+   * created. */
+  std::vector<OpenCLDevice *> gpu_devices;
+  /** \brief list of all thread for every GPUDevice in cpudevices a thread exists. */
+  ListBase gpu_threads;
+  /** \brief all scheduled work for the GPU. */
+  bool opencl_active = false;
+  bool opencl_initialized = false;
+#  endif
+#endif
+
+} g_work_scheduler;
+
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+void *WorkScheduler::thread_execute_cpu(void *data)
+{
+  CPUDevice *device = (CPUDevice *)data;
+  WorkPackage *work;
+  BLI_thread_local_set(g_thread_device, device);
+  while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.cpu_queue))) {
+    device->execute(work);
+    delete work;
+  }
+
+  return nullptr;
+}
+
+void *WorkScheduler::thread_execute_gpu(void *data)
+{
+  Device *device = (Device *)data;
+  WorkPackage *work;
+
+  while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.gpu_queue))) {
+    device->execute(work);
+    delete work;
+  }
+
+  return nullptr;
+}
+#endif
+
+void WorkScheduler::schedule(ExecutionGroup *group, int chunkNumber)
+{
+  WorkPackage *package = new WorkPackage(group, chunkNumber);
+#if COM_CURRENT_THREADING_MODEL == COM_TM_NOTHREAD
+  CPUDevice device(0);
+  device.execute(package);
+  delete package;
+#elif COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+#  ifdef COM_OPENCL_ENABLED
+  if (group->isOpenCL() && g_work_scheduler.opencl_active) {
+    BLI_thread_queue_push(g_work_scheduler.gpu_queue, package);
+  }
+  else {
+    BLI_thread_queue_push(g_work_scheduler.cpu_queue, package);
+  }
+#  else
+  BLI_thread_queue_push(g_work_scheduler.cpu_queue, package);
+#  endif
+#endif
+}
+
+void WorkScheduler::start(CompositorContext &context)
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+  unsigned int index;
+  g_work_scheduler.cpu_queue = BLI_thread_queue_init();
+  BLI_threadpool_init(
+      &g_work_scheduler.cpu_threads, thread_execute_cpu, g_work_scheduler.cpu_devices.size());
+  for (index = 0; index < g_work_scheduler.cpu_devices.size(); index++) {
+    Device *device = g_work_scheduler.cpu_devices[index];
+    BLI_threadpool_insert(&g_work_scheduler.cpu_threads, device);
+  }
+#  ifdef COM_OPENCL_ENABLED
+  if (context.getHasActiveOpenCLDevices()) {
+    g_work_scheduler.gpu_queue = BLI_thread_queue_init();
+    BLI_threadpool_init(
+        &g_work_scheduler.gpu_threads, thread_execute_gpu, g_work_scheduler.gpu_devices.size());
+    for (index = 0; index < g_work_scheduler.gpu_devices.size(); index++) {
+      Device *device = g_work_scheduler.gpu_devices[index];
+      BLI_threadpool_insert(&g_work_scheduler.gpu_threads, device);
+    }
+    g_work_scheduler.opencl_active = true;
+  }
+  else {
+    g_work_scheduler.opencl_active = false;
+  }
+#  endif
+#endif
+}
+void WorkScheduler::finish()
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+#  ifdef COM_OPENCL_ENABLED
+  if (g_work_scheduler.opencl_active) {
+    BLI_thread_queue_wait_finish(g_work_scheduler.gpu_queue);
+    BLI_thread_queue_wait_finish(g_work_scheduler.cpu_queue);
+  }
+  else {
+    BLI_thread_queue_wait_finish(g_work_scheduler.cpu_queue);
+  }
+#  else
+  BLI_thread_queue_wait_finish(cpuqueue);
+#  endif
+#endif
+}
+void WorkScheduler::stop()
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+  BLI_thread_queue_nowait(g_work_scheduler.cpu_queue);
+  BLI_threadpool_end(&g_work_scheduler.cpu_threads);
+  BLI_thread_queue_free(g_work_scheduler.cpu_queue);
+  g_work_scheduler.cpu_queue = nullptr;
+#  ifdef COM_OPENCL_ENABLED
+  if (g_work_scheduler.opencl_active) {
+    BLI_thread_queue_nowait(g_work_scheduler.gpu_queue);
+    BLI_threadpool_end(&g_work_scheduler.gpu_threads);
+    BLI_thread_queue_free(g_work_scheduler.gpu_queue);
+    g_work_scheduler.gpu_queue = nullptr;
+  }
+#  endif
+#endif
+}
+
+bool WorkScheduler::has_gpu_devices()
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+#  ifdef COM_OPENCL_ENABLED
+  return !g_work_scheduler.gpu_devices.empty();
+#  else
+  return 0;
+#  endif
+#else
+  return 0;
+#endif
+}
+
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+static void CL_CALLBACK clContextError(const char *errinfo,
+                                       const void * /*private_info*/,
+                                       size_t /*cb*/,
+                                       void * /*user_data*/)
+{
+  printf("OPENCL error: %s\n", errinfo);
+}
+#endif
+
+void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+  /* deinitialize if number of threads doesn't match */
+  if (g_work_scheduler.cpu_devices.size() != num_cpu_threads) {
+    Device *device;
+
+    while (!g_work_scheduler.cpu_devices.empty()) {
+      device = g_work_scheduler.cpu_devices.back();
+      g_work_scheduler.cpu_devices.pop_back();
+      device->deinitialize();
+      delete device;
+    }
+    if (g_work_scheduler.cpu_initialized) {
+      BLI_thread_local_delete(g_thread_device);
+    }
+    g_work_scheduler.cpu_initialized = false;
+  }
+
+  /* initialize CPU threads */
+  if (!g_work_scheduler.cpu_initialized) {
+    for (int index = 0; index < num_cpu_threads; index++) {
+      CPUDevice *device = new CPUDevice(index);
+      device->initialize();
+      g_work_scheduler.cpu_devices.push_back(device);
+    }
+    BLI_thread_local_create(g_thread_device);
+    g_work_scheduler.cpu_initialized = true;
+  }
+
+#  ifdef COM_OPENCL_ENABLED
+  /* deinitialize OpenCL GPU's */
+  if (use_opencl && !g_work_scheduler.opencl_initialized) {
+    g_work_scheduler.opencl_context = nullptr;
+    g_work_scheduler.opencl_program = nullptr;
+
+    /* This will check for errors and skip if already initialized. */
+    if (clewInit() != CLEW_SUCCESS) {
+      return;
+    }
+
+    if (clCreateContextFromType) {
+      cl_uint numberOfPlatforms = 0;
+      cl_int error;
+      error = clGetPlatformIDs(0, nullptr, &numberOfPlatforms);
+      if (error == -1001) {
+      } /* GPU not supported */
+      else if (error != CL_SUCCESS) {
+        printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
+      }
+      if (G.f & G_DEBUG) {
+        printf("%u number of platforms\n", numberOfPlatforms);
+      }
+      cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(
+          sizeof(cl_platform_id) * numberOfPlatforms, __func__);
+      error = clGetPlatformIDs(numberOfPlatforms, platforms, nullptr);
+      unsigned int indexPlatform;
+      for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
+        cl_platform_id platform = platforms[indexPlatform];
+        cl_uint numberOfDevices = 0;
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &numberOfDevices);
+        if (numberOfDevices <= 0) {
+          continue;
+        }
+
+        cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(
+            sizeof(cl_device_id) * numberOfDevices, __func__);
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, nullptr);
+
+        g_work_scheduler.opencl_context = clCreateContext(
+            nullptr, numberOfDevices, cldevices, clContextError, nullptr, &error);
+        if (error != CL_SUCCESS) {
+          printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
+        }
+        const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, nullptr};
+        g_work_scheduler.opencl_program = clCreateProgramWithSource(
+            g_work_scheduler.opencl_context, 1, cl_str, nullptr, &error);
+        error = clBuildProgram(g_work_scheduler.opencl_program,
+                               numberOfDevices,
+                               cldevices,
+                               nullptr,
+                               nullptr,
+                               nullptr);
+        if (error != CL_SUCCESS) {
+          cl_int error2;
+          size_t ret_val_size = 0;
+          printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
+          error2 = clGetProgramBuildInfo(g_work_scheduler.opencl_program,
+                                         cldevices[0],
+                                         CL_PROGRAM_BUILD_LOG,
+                                         0,
+                                         nullptr,
+                                         &ret_val_size);
+          if (error2 != CL_SUCCESS) {
+            printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
+          }
+          char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
+          error2 = clGetProgramBuildInfo(g_work_scheduler.opencl_program,
+                                         cldevices[0],
+                                         CL_PROGRAM_BUILD_LOG,
+                                         ret_val_size,
+                                         build_log,
+                                         nullptr);
+          if (error2 != CL_SUCCESS) {
+            printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
+          }
+          build_log[ret_val_size] = '\0';
+          printf("%s", build_log);
+          MEM_freeN(build_log);
+        }
+        else {
+          unsigned int indexDevices;
+          for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) {
+            cl_device_id device = cldevices[indexDevices];
+            cl_int vendorID = 0;
+            cl_int error2 = clGetDeviceInfo(
+                device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, nullptr);
+            if (error2 != CL_SUCCESS) {
+              printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2));
+            }
+            OpenCLDevice *clDevice = new OpenCLDevice(g_work_scheduler.opencl_context,
+                                                      device,
+                                                      g_work_scheduler.opencl_program,
+                                                      vendorID);
+            clDevice->initialize();
+            g_work_scheduler.gpu_devices.push_back(clDevice);
+          }
+        }
+        MEM_freeN(cldevices);
+      }
+      MEM_freeN(platforms);
+    }
+
+    g_work_scheduler.opencl_initialized = true;
+  }
+#  endif
+#endif
+}
+
+void WorkScheduler::deinitialize()
+{
+#if COM_CURRENT_THREADING_MODEL == COM_TM_QUEUE
+  /* deinitialize CPU threads */
+  if (g_work_scheduler.cpu_initialized) {
+    Device *device;
+    while (!g_work_scheduler.cpu_devices.empty()) {
+      device = g_work_scheduler.cpu_devices.back();
+      g_work_scheduler.cpu_devices.pop_back();
+      device->deinitialize();
+      delete device;
+    }
+    BLI_thread_local_delete(g_thread_device);
+    g_work_scheduler.cpu_initialized = false;
+  }
+
+#  ifdef COM_OPENCL_ENABLED
+  /* deinitialize OpenCL GPU's */
+  if (g_work_scheduler.opencl_initialized) {
+    Device *device;
+    while (!g_work_scheduler.gpu_devices.empty()) {
+      device = g_work_scheduler.gpu_devices.back();
+      g_work_scheduler.gpu_devices.pop_back();
+      device->deinitialize();
+      delete device;
+    }
+    if (g_work_scheduler.opencl_program) {
+      clReleaseProgram(g_work_scheduler.opencl_program);
+      g_work_scheduler.opencl_program = nullptr;
+    }
+    if (g_work_scheduler.opencl_context) {
+      clReleaseContext(g_work_scheduler.opencl_context);
+      g_work_scheduler.opencl_context = nullptr;
+    }
+
+    g_work_scheduler.opencl_initialized = false;
+  }
+#  endif
+#endif
+}
+
+int WorkScheduler::current_thread_id()
+{
+  CPUDevice *device = (CPUDevice *)BLI_thread_local_get(g_thread_device);
+  return device->thread_id();
+}