Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device.cpp31
-rw-r--r--intern/cycles/device/device.h4
-rw-r--r--intern/cycles/device/device_opencl.cpp43
-rw-r--r--intern/cycles/kernel/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h19
5 files changed, 78 insertions, 21 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index f43ccffe461..6ebc359fdb3 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -24,6 +24,7 @@
#include "util_cuda.h"
#include "util_debug.h"
+#include "util_foreach.h"
#include "util_math.h"
#include "util_opencl.h"
#include "util_opengl.h"
@@ -41,7 +42,31 @@ DeviceTask::DeviceTask(Type type_)
{
}
-void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
+void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
+{
+ int num;
+
+ if(type == DISPLACE) {
+ num = (displace_w + max_size - 1)/max_size;
+ }
+ else {
+ max_size = max(1, max_size/w);
+ num = (h + max_size - 1)/max_size;
+ }
+
+ split(tasks, num);
+}
+
+void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
+{
+ list<DeviceTask> tasks;
+ split(tasks, num);
+
+ foreach(DeviceTask& task, tasks)
+ queue.push(task);
+}
+
+void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == DISPLACE) {
num = min(displace_w, num);
@@ -55,7 +80,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
task.displace_x = tx;
task.displace_w = tw;
- tasks.push(task);
+ tasks.push_back(task);
}
}
else {
@@ -70,7 +95,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
task.y = ty;
task.h = th;
- tasks.push(task);
+ tasks.push_back(task);
}
}
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index be6a3f144ed..a6a81e7b326 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -23,6 +23,7 @@
#include "device_memory.h"
+#include "util_list.h"
#include "util_string.h"
#include "util_thread.h"
#include "util_types.h"
@@ -67,7 +68,10 @@ public:
int displace_x, displace_w;
DeviceTask(Type type = PATH_TRACE);
+
+ void split(list<DeviceTask>& tasks, int num);
void split(ThreadQueue<DeviceTask>& tasks, int num);
+ void split_max_size(list<DeviceTask>& tasks, int max_size);
};
/* Device */
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 3a1d3032d6e..6014dd0fdb7 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -25,6 +25,7 @@
#include "device.h"
#include "device_intern.h"
+#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
#include "util_md5.h"
@@ -52,6 +53,7 @@ public:
map<string, device_memory*> mem_map;
device_ptr null_mem;
bool device_initialized;
+ string platform_name;
const char *opencl_error_string(cl_int err)
{
@@ -175,6 +177,10 @@ public:
if(opencl_error(ciErr))
return;
+ char name[256];
+ clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+ platform_name = name;
+
cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
if(opencl_error(ciErr))
return;
@@ -191,7 +197,7 @@ public:
{
char version[256];
- int major, minor, req_major = 1, req_minor = 0;
+ int major, minor, req_major = 1, req_minor = 1;
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
@@ -277,14 +283,11 @@ public:
{
string build_options = " -cl-fast-relaxed-math ";
- /* Full Shading only on NVIDIA cards at the moment */
- char vendor[256];
-
- clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
- string name = vendor;
-
- if(name == "NVIDIA CUDA")
- build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
+ /* full shading only on NVIDIA cards at the moment */
+ if(platform_name == "NVIDIA CUDA")
+ build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+ if(platform_name == "Apple")
+ build_options += " -D__CL_NO_FLOAT3__ ";
return build_options;
}
@@ -657,12 +660,24 @@ public:
opencl_assert(clFinish(cqCommandQueue));
}
- void task_add(DeviceTask& task)
+ void task_add(DeviceTask& maintask)
{
- if(task.type == DeviceTask::TONEMAP)
- tonemap(task);
- else if(task.type == DeviceTask::PATH_TRACE)
- path_trace(task);
+ list<DeviceTask> tasks;
+
+ /* arbitrary limit to work around apple ATI opencl issue */
+ if(platform_name == "Apple")
+ maintask.split_max_size(tasks, 76800);
+ else
+ tasks.push_back(maintask);
+
+ DeviceTask task;
+
+ foreach(DeviceTask& task, tasks) {
+ if(task.type == DeviceTask::TONEMAP)
+ tonemap(task);
+ else if(task.type == DeviceTask::PATH_TRACE)
+ path_trace(task);
+ }
}
void task_wait()
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index e17544bf7af..939a74660a1 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -143,7 +143,7 @@ endif()
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED}
-# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 5515966807b..9fbd8566ecd 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -25,12 +25,21 @@
/* no namespaces in opencl */
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
-#define WITH_OPENCL
+
+#ifdef __CL_NO_FLOAT3__
+#define float3 float4
+#endif
+
+#ifdef __CL_NOINLINE__
+#define __noinline __attribute__((noinline))
+#else
+#define __noinline
+#endif
/* in opencl all functions are device functions, so leave this empty */
#define __device
-#define __device_inline
-#define __device_noinline
+#define __device_inline __device
+#define __device_noinline __device __noinline
/* no assert in opencl */
#define kernel_assert(cond)
@@ -68,7 +77,11 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
#endif
#define make_float2(x, y) ((float2)(x, y))
+#ifdef __CL_NO_FLOAT3__
+#define make_float3(x, y, z) ((float4)(x, y, z, 0.0))
+#else
#define make_float3(x, y, z) ((float3)(x, y, z))
+#endif
#define make_float4(x, y, z, w) ((float4)(x, y, z, w))
#define make_int2(x, y) ((int2)(x, y))
#define make_int3(x, y, z) ((int3)(x, y, z))