From ceedd5bd35273c27225f84cde2948e5b774e92fe Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 30 Nov 2012 07:27:17 +0000 Subject: Fix cycles CUDA sm 1.3 build with 32 bit compiler, tweaked voronoi and brick code so that it can be uninlined. --- intern/cycles/kernel/kernel_types.h | 1 + intern/cycles/kernel/svm/svm.h | 4 ++++ intern/cycles/kernel/svm/svm_brick.h | 21 ++++++++++--------- intern/cycles/kernel/svm/svm_texture.h | 37 +++++++++++++--------------------- intern/cycles/kernel/svm/svm_voronoi.h | 9 +++------ 5 files changed, 33 insertions(+), 39 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a7bf6b28e7e..f519fd989fa 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -102,6 +102,7 @@ CCL_NAMESPACE_BEGIN #define __IMAGE_TEXTURES__ #define __EXTRA_NODES__ #define __HOLDOUT__ +#define __NORMAL_MAP__ #endif #ifdef __KERNEL_ADV_SHADING__ diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 886fce63fd4..9c79886fdca 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -401,9 +401,13 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_LIGHT_FALLOFF: svm_node_light_falloff(sd, stack, node); break; +#endif +#ifdef __ANISOTROPIC__ case NODE_TANGENT: svm_node_tangent(kg, sd, stack, node); break; +#endif +#ifdef __NORMAL_MAP__ case NODE_NORMAL_MAP: svm_node_normal_map(kg, sd, stack, node); break; diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 7e38ac84bf1..49466c07a97 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -28,9 +28,9 @@ __device_noinline float brick_noise(int n) /* fast integer noise */ return 0.5f * ((float)nn / 1073741824.0f); } -__device_noinline float svm_brick(float3 p, float scale, float mortar_size, float bias, +__device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, float bias, float brick_width, float row_height, float offset_amount, int offset_frequency, - float squash_amount, int squash_frequency, float *tint) + float squash_amount, int squash_frequency) { p *= scale; @@ -50,11 +50,12 @@ __device_noinline float svm_brick(float3 p, float scale, float mortar_size, floa x = (p.x+offset) - brick_width*bricknum; y = p.y - row_height*rownum; - *tint = clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f); + return make_float2( + clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f), - return (x < mortar_size || y < mortar_size || + (x < mortar_size || y < mortar_size || x > (brick_width - mortar_size) || - y > (row_height - mortar_size)) ? 1.0f : 0.0f; + y > (row_height - mortar_size)) ? 1.0f : 0.0f); } __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) @@ -70,8 +71,6 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack /* RNA properties */ uint offset_frequency, squash_frequency; - float tint = 0.0f; - decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL); @@ -92,9 +91,11 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack float offset_amount = __int_as_float(node3.z); float squash_amount = __int_as_float(node3.w); - float f = svm_brick(co, scale, mortar_size, bias, brick_width, row_height, - offset_amount, offset_frequency, squash_amount, squash_frequency, - &tint); + float2 f2 = svm_brick(co, scale, mortar_size, bias, brick_width, row_height, + offset_amount, offset_frequency, squash_amount, squash_frequency); + + float tint = f2.x; + float f = f2.y; if(f != 1.0f) { float facm = 1.0f - tint; diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h index 6c22d98e0df..a4f6691435c 100644 --- a/intern/cycles/kernel/svm/svm_texture.h +++ b/intern/cycles/kernel/svm/svm_texture.h @@ -42,8 +42,12 @@ __device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, fl /* Voronoi / Worley like */ -__device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, float da[4], float3 pa[4]) +__device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2) { + float da[4]; + float3 pa[4]; + NodeDistanceMetric distance_metric = NODE_VORONOI_DISTANCE_SQUARED; + /* returns distances in da and point coords in pa */ int xx, yy, zz, xi, yi, zi; @@ -105,33 +109,20 @@ __device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, flo } } } -} - -__device float voronoi_Fn(float3 p, int n) -{ - float da[4]; - float3 pa[4]; - - voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa); - - return da[n]; -} -__device float voronoi_FnFn(float3 p, int n1, int n2) -{ - float da[4]; - float3 pa[4]; + float4 result = make_float4(pa[n1].x, pa[n1].y, pa[n1].z, da[n1]); - voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa); + if(n2 != -1) + result = make_float4(pa[n2].x, pa[n2].y, pa[n2].z, da[n2]) - result; - return da[n2] - da[n1]; + return result; } -__device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0); } -__device float voronoi_F2(float3 p) { return voronoi_Fn(p, 1); } -__device float voronoi_F3(float3 p) { return voronoi_Fn(p, 2); } -__device float voronoi_F4(float3 p) { return voronoi_Fn(p, 3); } -__device float voronoi_F1F2(float3 p) { return voronoi_FnFn(p, 0, 1); } +__device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; } +__device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; } +__device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; } +__device float voronoi_F4(float3 p) { return voronoi_Fn(p, 0.0f, 3, -1).w; } +__device float voronoi_F1F2(float3 p) { return voronoi_Fn(p, 0.0f, 0, 1).w; } __device float voronoi_Cr(float3 p) { diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 7e7bd970320..55110d06f22 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -23,21 +23,18 @@ CCL_NAMESPACE_BEGIN __device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float scale, float3 p) { /* compute distance and point coordinate of 4 nearest neighbours */ - float da[4]; - float3 pa[4]; - - voronoi(p*scale, NODE_VORONOI_DISTANCE_SQUARED, 1.0f, da, pa); + float4 dpa0 = voronoi_Fn(p*scale, 1.0f, 0, -1); /* output */ float fac; float3 color; if(coloring == NODE_VORONOI_INTENSITY) { - fac = fabsf(da[0]); + fac = fabsf(dpa0.w); color = make_float3(fac, fac, fac); } else { - color = cellnoise_color(pa[0]); + color = cellnoise_color(float4_to_float3(dpa0)); fac = average(color); } -- cgit v1.2.3