Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2011-12-20 21:36:56 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2011-12-20 21:36:56 +0400
commit690de795803c345dc4916148f016b661c2e634e7 (patch)
treebd044d59405a438deece51480f6a29ab229c487d /intern
parent738fdc7b6f43c3e1e838bd4239b36340fa4c2e0f (diff)
Cycles: some tweaks for apple opencl with ATI cards, to get it working up to
the level of ambient occlusion render, shaders still fail. Fixes found with much help from Jens and Dalai.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device.cpp31
-rw-r--r--intern/cycles/device/device.h4
-rw-r--r--intern/cycles/device/device_opencl.cpp43
-rw-r--r--intern/cycles/kernel/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h19
5 files changed, 78 insertions, 21 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index f43ccffe461..6ebc359fdb3 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -24,6 +24,7 @@
#include "util_cuda.h"
#include "util_debug.h"
+#include "util_foreach.h"
#include "util_math.h"
#include "util_opencl.h"
#include "util_opengl.h"
@@ -41,7 +42,31 @@ DeviceTask::DeviceTask(Type type_)
{
}
-void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
+void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
+{
+ int num;
+
+ if(type == DISPLACE) {
+ num = (displace_w + max_size - 1)/max_size;
+ }
+ else {
+ max_size = max(1, max_size/w);
+ num = (h + max_size - 1)/max_size;
+ }
+
+ split(tasks, num);
+}
+
+void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
+{
+ list<DeviceTask> tasks;
+ split(tasks, num);
+
+ foreach(DeviceTask& task, tasks)
+ queue.push(task);
+}
+
+void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == DISPLACE) {
num = min(displace_w, num);
@@ -55,7 +80,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
task.displace_x = tx;
task.displace_w = tw;
- tasks.push(task);
+ tasks.push_back(task);
}
}
else {
@@ -70,7 +95,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
task.y = ty;
task.h = th;
- tasks.push(task);
+ tasks.push_back(task);
}
}
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index be6a3f144ed..a6a81e7b326 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -23,6 +23,7 @@
#include "device_memory.h"
+#include "util_list.h"
#include "util_string.h"
#include "util_thread.h"
#include "util_types.h"
@@ -67,7 +68,10 @@ public:
int displace_x, displace_w;
DeviceTask(Type type = PATH_TRACE);
+
+ void split(list<DeviceTask>& tasks, int num);
void split(ThreadQueue<DeviceTask>& tasks, int num);
+ void split_max_size(list<DeviceTask>& tasks, int max_size);
};
/* Device */
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 3a1d3032d6e..6014dd0fdb7 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -25,6 +25,7 @@
#include "device.h"
#include "device_intern.h"
+#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
#include "util_md5.h"
@@ -52,6 +53,7 @@ public:
map<string, device_memory*> mem_map;
device_ptr null_mem;
bool device_initialized;
+ string platform_name;
const char *opencl_error_string(cl_int err)
{
@@ -175,6 +177,10 @@ public:
if(opencl_error(ciErr))
return;
+ char name[256];
+ clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+ platform_name = name;
+
cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
if(opencl_error(ciErr))
return;
@@ -191,7 +197,7 @@ public:
{
char version[256];
- int major, minor, req_major = 1, req_minor = 0;
+ int major, minor, req_major = 1, req_minor = 1;
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
@@ -277,14 +283,11 @@ public:
{
string build_options = " -cl-fast-relaxed-math ";
- /* Full Shading only on NVIDIA cards at the moment */
- char vendor[256];
-
- clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
- string name = vendor;
-
- if(name == "NVIDIA CUDA")
- build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
+ /* full shading only on NVIDIA cards at the moment */
+ if(platform_name == "NVIDIA CUDA")
+ build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+ if(platform_name == "Apple")
+ build_options += " -D__CL_NO_FLOAT3__ ";
return build_options;
}
@@ -657,12 +660,24 @@ public:
opencl_assert(clFinish(cqCommandQueue));
}
- void task_add(DeviceTask& task)
+ void task_add(DeviceTask& maintask)
{
- if(task.type == DeviceTask::TONEMAP)
- tonemap(task);
- else if(task.type == DeviceTask::PATH_TRACE)
- path_trace(task);
+ list<DeviceTask> tasks;
+
+ /* arbitrary limit to work around apple ATI opencl issue */
+ if(platform_name == "Apple")
+ maintask.split_max_size(tasks, 76800);
+ else
+ tasks.push_back(maintask);
+
+ DeviceTask task;
+
+ foreach(DeviceTask& task, tasks) {
+ if(task.type == DeviceTask::TONEMAP)
+ tonemap(task);
+ else if(task.type == DeviceTask::PATH_TRACE)
+ path_trace(task);
+ }
}
void task_wait()
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index e17544bf7af..939a74660a1 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -143,7 +143,7 @@ endif()
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED}
-# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 5515966807b..9fbd8566ecd 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -25,12 +25,21 @@
/* no namespaces in opencl */
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
-#define WITH_OPENCL
+
+#ifdef __CL_NO_FLOAT3__
+#define float3 float4
+#endif
+
+#ifdef __CL_NOINLINE__
+#define __noinline __attribute__((noinline))
+#else
+#define __noinline
+#endif
/* in opencl all functions are device functions, so leave this empty */
#define __device
-#define __device_inline
-#define __device_noinline
+#define __device_inline __device
+#define __device_noinline __device __noinline
/* no assert in opencl */
#define kernel_assert(cond)
@@ -68,7 +77,11 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
#endif
#define make_float2(x, y) ((float2)(x, y))
+#ifdef __CL_NO_FLOAT3__
+#define make_float3(x, y, z) ((float4)(x, y, z, 0.0))
+#else
#define make_float3(x, y, z) ((float3)(x, y, z))
+#endif
#define make_float4(x, y, z, w) ((float4)(x, y, z, w))
#define make_int2(x, y) ((int2)(x, y))
#define make_int3(x, y, z) ((int3)(x, y, z))