Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen Bakker <jeroen@blender.org>2020-04-30 15:15:10 +0300
committerJeroen Bakker <jeroen@blender.org>2020-04-30 16:04:40 +0300
commit6121c28501eff722717fb8b777f6004fb6d4e152 (patch)
tree5f7ab035da266265b2224ce37477ef06752fdb0d
parent36bf067ddcb4e7a7612476e9badcf8d44afeee2a (diff)
Fix T75895: Unable to Compile Cycles on NAVI/Linux
This patch will add some compiler hints to break unrolling in the nestled for loops of the voronoi node. Reviewed by: Brecht van Lommel Differential Revision: https://developer.blender.org/D7574
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h1
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h1
-rw-r--r--intern/cycles/kernel/kernel_compat_optix.h1
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h21
-rw-r--r--intern/cycles/util/util_defines.h1
5 files changed, 18 insertions, 7 deletions
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 3c5a10540d5..4094e173da9 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -71,6 +71,7 @@ __device__ half __float2half(const float f)
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
+#define ccl_loop_no_unroll
/* TODO(sergey): In theory we might use references with CUDA, however
* performance impact yet to be investigated.
*/
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 4963f1cd196..35dc95ca10d 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -43,6 +43,7 @@
#define ccl_local __local
#define ccl_local_param __local
#define ccl_private __private
+#define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
#define ccl_restrict restrict
#define ccl_ref
#define ccl_align(n) __attribute__((aligned(n)))
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
index 7068acc3a32..970f5cf864c 100644
--- a/intern/cycles/kernel/kernel_compat_optix.h
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -70,6 +70,7 @@ __device__ half __float2half(const float f)
#define ccl_private
#define ccl_may_alias
#define ccl_addr_space
+#define ccl_loop_no_unroll
#define ccl_restrict __restrict__
#define ccl_ref
#define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 2ad22592eef..f0fc0068fa2 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -684,7 +684,8 @@ ccl_device void voronoi_f1_4d(float4 coord,
float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@@ -722,7 +723,8 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -2; u <= 2; u++) {
for (int k = -2; k <= 2; k++) {
- for (int j = -2; j <= 2; j++) {
+ ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
+ {
for (int i = -2; i <= 2; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@@ -765,7 +767,8 @@ ccl_device void voronoi_f2_4d(float4 coord,
float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@@ -803,7 +806,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
float minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 vectorToPoint = cellOffset +
@@ -822,7 +826,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 vectorToPoint = cellOffset +
@@ -851,7 +856,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
float minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@@ -871,7 +877,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
- for (int j = -1; j <= 1; j++) {
+ ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+ {
for (int i = -1; i <= 1; i++) {
if (i == 0 && j == 0 && k == 0 && u == 0) {
continue;
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 24a20a969ab..e8e414587fb 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -45,6 +45,7 @@
# define ccl_restrict __restrict
# define ccl_ref &
# define ccl_optional_struct_init
+# define ccl_loop_no_unroll
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)