Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/blender_sync.cpp2
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h8
-rw-r--r--intern/cycles/kernel/kernel_bvh.h4
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h2
-rw-r--r--intern/cycles/kernel/kernel_displace.h8
-rw-r--r--intern/cycles/kernel/kernel_globals.h4
-rw-r--r--intern/cycles/kernel/kernel_light.h4
-rw-r--r--intern/cycles/kernel/kernel_object.h2
-rw-r--r--intern/cycles/kernel/kernel_path.h2
-rw-r--r--intern/cycles/kernel/svm/svm.h6
-rw-r--r--intern/cycles/kernel/svm/svm_brick.h4
-rw-r--r--intern/cycles/kernel/svm/svm_checker.h6
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h12
-rw-r--r--intern/cycles/kernel/svm/svm_convert.h2
-rw-r--r--intern/cycles/kernel/svm/svm_fresnel.h4
-rw-r--r--intern/cycles/kernel/svm/svm_hsv.h4
-rw-r--r--intern/cycles/kernel/svm/svm_musgrave.h20
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h2
-rw-r--r--intern/cycles/kernel/svm/svm_ramp.h2
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h10
-rw-r--r--intern/cycles/kernel/svm/svm_value.h4
-rw-r--r--intern/cycles/util/util_math.h31
-rw-r--r--intern/cycles/util/util_types.h15
23 files changed, 96 insertions, 62 deletions
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index b4dbd46bd71..b324385134b 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -379,7 +379,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
params.background = background;
/* samples */
- if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU){
+ if(get_boolean(cscene, "progressive") == 0 && params.device.type == DEVICE_CPU) {
if(background) {
params.samples = get_int(cscene, "aa_samples");
}
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 844e0433bae..486de4ca65f 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -68,13 +68,13 @@ __device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro)
{
float sq;
- sq = sqrt(3.0f*(1.0f - alpha_));
+ sq = sqrtf(3.0f*(1.0f - alpha_));
return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
}
__device float bssrdf_compute_reduced_albedo(float A, float ro)
{
- const float tolerance = 1e-8;
+ const float tolerance = 1e-8f;
const int max_iteration_count = 20;
float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1;
int i;
@@ -138,8 +138,8 @@ __device float bssrdf_original(const BSSRDFParams *ss, float r)
float rr = r*r;
float sr, sv, Rdr, Rdv;
- sr = sqrt(rr + ss->zr*ss->zr);
- sv = sqrt(rr + ss->zv*ss->zv);
+ sr = sqrtf(rr + ss->zr*ss->zr);
+ sv = sqrtf(rr + ss->zv*ss->zv);
Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr);
Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv);
diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h
index aff9d586e7a..ae9677ed5cb 100644
--- a/intern/cycles/kernel/kernel_bvh.h
+++ b/intern/cycles/kernel/kernel_bvh.h
@@ -171,8 +171,8 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
/* decide which nodes to traverse next */
#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
- *traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility);
- *traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility);
+ *traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
+ *traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
#else
*traverseChild0 = (c0max >= c0min);
*traverseChild1 = (c1max >= c1min);
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 9972a63bfbb..a32b33a727a 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -88,7 +88,7 @@ template<typename T> struct texture_image {
float frac(float x, int *ix)
{
- int i = (int)x - ((x < 0.0f)? 1: 0);
+ int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
*ix = i;
return x - (float)i;
}
diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h
index 8b95e413b3f..c7fd03e7603 100644
--- a/intern/cycles/kernel/kernel_displace.h
+++ b/intern/cycles/kernel/kernel_displace.h
@@ -28,8 +28,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
/* setup shader data */
int object = in.x;
int prim = in.y;
- float u = __int_as_float(in.z);
- float v = __int_as_float(in.w);
+ float u = __uint_as_float(in.z);
+ float v = __uint_as_float(in.w);
shader_setup_from_displace(kg, &sd, object, prim, u, v);
@@ -41,8 +41,8 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
else { // SHADER_EVAL_BACKGROUND
/* setup ray */
Ray ray;
- float u = __int_as_float(in.x);
- float v = __int_as_float(in.y);
+ float u = __uint_as_float(in.x);
+ float v = __uint_as_float(in.y);
ray.P = make_float3(0.0f, 0.0f, 0.0f);
ray.D = equirectangular_to_direction(u, v);
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index abf1f5b4cb0..fbb5060c409 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -94,7 +94,7 @@ __device float lookup_table_read(KernelGlobals *kg, float x, int offset, int siz
{
x = clamp(x, 0.0f, 1.0f)*(size-1);
- int index = min((int)x, size-1);
+ int index = min(float_to_int(x), size-1);
int nindex = min(index+1, size-1);
float t = x - index;
@@ -110,7 +110,7 @@ __device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int off
{
y = clamp(y, 0.0f, 1.0f)*(ysize-1);
- int index = min((int)y, ysize-1);
+ int index = min(float_to_int(y), ysize-1);
int nindex = min(index+1, ysize-1);
float t = y - index;
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index d4d78e413d2..9f198c6c595 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -124,8 +124,8 @@ __device float background_light_pdf(KernelGlobals *kg, float3 direction)
if(sin_theta == 0.0f)
return 0.0f;
- int index_u = clamp((int)(uv.x * res), 0, res - 1);
- int index_v = clamp((int)(uv.y * res), 0, res - 1);
+ int index_u = clamp(float_to_int(uv.x * res), 0, res - 1);
+ int index_v = clamp(float_to_int(uv.y * res), 0, res - 1);
/* pdfs in V direction */
float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res);
diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/kernel_object.h
index 40aa4753daa..bb5ed50c995 100644
--- a/intern/cycles/kernel/kernel_object.h
+++ b/intern/cycles/kernel/kernel_object.h
@@ -201,7 +201,7 @@ __device_inline uint object_particle_id(KernelGlobals *kg, int object)
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
float4 f = kernel_tex_fetch(__objects, offset);
- return __float_as_int(f.w);
+ return __float_as_uint(f.w);
}
__device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index d2de9ba2b44..5915dfed08b 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -44,7 +44,7 @@
CCL_NAMESPACE_BEGIN
typedef struct PathState {
- uint flag;
+ int flag;
int bounce;
int diffuse_bounce;
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 8f1d2aa0e16..16b684e79a0 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -72,7 +72,7 @@ __device_inline float stack_load_float(float *stack, uint a)
__device_inline float stack_load_float_default(float *stack, uint a, uint value)
{
- return (a == (uint)SVM_STACK_INVALID)? __int_as_float(value): stack_load_float(stack, a);
+ return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a);
}
__device_inline void stack_store_float(float *stack, uint a, float f)
@@ -118,7 +118,7 @@ __device_inline uint4 read_node(KernelGlobals *kg, int *offset)
__device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
{
uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
- float4 f = make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w));
+ float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
(*offset)++;
return f;
}
@@ -126,7 +126,7 @@ __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
__device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
{
uint4 node = kernel_tex_fetch(__svm_nodes, offset);
- return make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w));
+ return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w));
}
__device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 49466c07a97..43dc1a2f295 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -38,14 +38,14 @@ __device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, flo
float offset = 0.0f;
float x, y;
- rownum = (int)floor(p.y / row_height);
+ rownum = floor_to_int(p.y / row_height);
if(offset_frequency && squash_frequency) {
brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */
offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */
}
- bricknum = (int)floor((p.x+offset) / brick_width);
+ bricknum = floor_to_int((p.x+offset) / brick_width);
x = (p.x+offset) - brick_width*bricknum;
y = p.y - row_height*rownum;
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index c5db0383bc5..ffac07e9cd5 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -29,9 +29,9 @@ __device_noinline float svm_checker(float3 p, float scale)
p.y = (p.y + 0.00001f)*0.9999f;
p.z = (p.z + 0.00001f)*0.9999f;
- int xi = (int)fabsf(floorf(p.x));
- int yi = (int)fabsf(floorf(p.y));
- int zi = (int)fabsf(floorf(p.z));
+ int xi = float_to_int(fabsf(floorf(p.x)));
+ int yi = float_to_int(fabsf(floorf(p.y)));
+ int zi = float_to_int(fabsf(floorf(p.z)));
return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f;
}
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index bf9823aa53e..847195134e8 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -119,8 +119,8 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
float3 N = stack_valid(data_node.y)? stack_load_float3(stack, data_node.y): sd->N;
#endif
- float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z);
- float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w);
+ float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
+ float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
switch(type) {
case CLOSURE_BSDF_DIFFUSE_ID: {
@@ -422,8 +422,8 @@ __device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *
float mix_weight = 1.0f;
#endif
- float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __int_as_float(node.z);
- //float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __int_as_float(node.w);
+ float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
+ //float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
switch(type) {
case CLOSURE_VOLUME_TRANSPARENT_ID: {
@@ -553,13 +553,13 @@ __device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight
__device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
{
- float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b));
+ float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
svm_node_closure_store_weight(sd, weight);
}
__device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData *sd, uint r, uint g, uint b)
{
- float3 weight = make_float3(__int_as_float(r), __int_as_float(g), __int_as_float(b));
+ float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
if(sd->object != ~0)
weight /= object_surface_area(kg, sd->object);
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index f74915a4bc9..0050813e2c0 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -53,7 +53,7 @@ __device void svm_node_convert(ShaderData *sd, float *stack, uint type, uint fro
}
case NODE_CONVERT_VI: {
float3 f = stack_load_float3(stack, from);
- int i = (f.x + f.y + f.z)*(1.0f/3.0f);
+ int i = (int)((f.x + f.y + f.z)*(1.0f/3.0f));
stack_store_int(stack, to, i);
break;
}
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index d5b415a87ce..492e6070dfd 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint out_offset)
{
- float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __int_as_float(ior_value);
+ float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value);
eta = fmaxf(eta, 1.0f + 1e-5f);
eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta;
@@ -37,7 +37,7 @@ __device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
{
uint blend_offset = node.y;
uint blend_value = node.z;
- float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __int_as_float(blend_value);
+ float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value);
uint type, out_offset;
decode_node_uchar4(node.w, &type, &out_offset, NULL, NULL);
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 26b6141ee3f..348f13f59f2 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -77,7 +77,7 @@ __device float3 hsv_to_rgb(float3 hsv)
h = 0.0f;
h *= 6.0f;
- i = floor(h);
+ i = floorf(h);
f = h - i;
rgb = make_float3(f, f, f);
p = v*(1.0f-s);
@@ -112,7 +112,7 @@ __device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint
/* remember: fmod doesn't work for negative numbers here */
color.x += hue + 0.5f;
- color.x = fmod(color.x, 1.0f);
+ color.x = fmodf(color.x, 1.0f);
color.y *= sat;
color.z *= val;
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 425909e59f1..f0ad19a8061 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -32,10 +32,10 @@ __device_noinline float noise_musgrave_fBm(float3 p, NodeNoiseBasis basis, float
float rmd;
float value = 0.0f;
float pwr = 1.0f;
- float pwHL = pow(lacunarity, -H);
+ float pwHL = powf(lacunarity, -H);
int i;
- for(i = 0; i < (int)octaves; i++) {
+ for(i = 0; i < float_to_int(octaves); i++) {
value += snoise(p) * pwr;
pwr *= pwHL;
p *= lacunarity;
@@ -60,10 +60,10 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
float rmd;
float value = 1.0f;
float pwr = 1.0f;
- float pwHL = pow(lacunarity, -H);
+ float pwHL = powf(lacunarity, -H);
int i;
- for(i = 0; i < (int)octaves; i++) {
+ for(i = 0; i < float_to_int(octaves); i++) {
value *= (pwr * snoise(p) + 1.0f);
pwr *= pwHL;
p *= lacunarity;
@@ -87,7 +87,7 @@ __device_noinline float noise_musgrave_multi_fractal(float3 p, NodeNoiseBasis ba
__device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset)
{
float value, increment, rmd;
- float pwHL = pow(lacunarity, -H);
+ float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
int i;
@@ -95,7 +95,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
value = offset + snoise(p);
p *= lacunarity;
- for(i = 1; i < (int)octaves; i++) {
+ for(i = 1; i < float_to_int(octaves); i++) {
increment = (snoise(p) + offset) * pwr * value;
value += increment;
pwr *= pwHL;
@@ -122,7 +122,7 @@ __device_noinline float noise_musgrave_hetero_terrain(float3 p, NodeNoiseBasis b
__device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight, rmd;
- float pwHL = pow(lacunarity, -H);
+ float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
int i;
@@ -130,7 +130,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
weight = gain * result;
p *= lacunarity;
- for(i = 1; (weight > 0.001f) && (i < (int)octaves); i++) {
+ for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if(weight > 1.0f)
weight = 1.0f;
@@ -159,7 +159,7 @@ __device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, NodeNoiseB
__device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseBasis basis, float H, float lacunarity, float octaves, float offset, float gain)
{
float result, signal, weight;
- float pwHL = pow(lacunarity, -H);
+ float pwHL = powf(lacunarity, -H);
float pwr = pwHL;
int i;
@@ -168,7 +168,7 @@ __device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, NodeNoiseB
result = signal;
weight = 1.0f;
- for(i = 1; i < (int)octaves; i++) {
+ for(i = 1; i < float_to_int(octaves); i++) {
p *= lacunarity;
weight = clamp(signal * gain, 0.0f, 1.0f);
signal = offset - fabsf(snoise(p));
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 5ead6486dd6..a55c635b679 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
__device int quick_floor(float x)
{
- return (int)x - ((x < 0) ? 1 : 0);
+ return float_to_int(x) - ((x < 0) ? 1 : 0);
}
__device float bits_to_01(uint bits)
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index d33a2dfdc74..24d6dc3c282 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -26,7 +26,7 @@ __device float4 rgb_ramp_lookup(KernelGlobals *kg, int offset, float f, bool int
f = clamp(f, 0.0f, 1.0f)*(RAMP_TABLE_SIZE-1);
/* clamp int as well in case of NaN */
- int i = clamp((int)f, 0, RAMP_TABLE_SIZE-1);
+ int i = clamp(float_to_int(f), 0, RAMP_TABLE_SIZE-1);
float t = f - (float)i;
float4 a = fetch_node_float(kg, offset+i);
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index a4f6691435c..ba5b772b3a1 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -51,9 +51,9 @@ __device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
/* returns distances in da and point coords in pa */
int xx, yy, zz, xi, yi, zi;
- xi = (int)floorf(p.x);
- yi = (int)floorf(p.y);
- zi = (int)floorf(p.z);
+ xi = floor_to_int(p.x);
+ yi = floor_to_int(p.y);
+ zi = floor_to_int(p.z);
da[0] = 1e10f;
da[1] = 1e10f;
@@ -186,7 +186,7 @@ __device float noise_wave(NodeWaveBasis wave, float a)
}
else if(wave == NODE_WAVE_SAW) {
float b = 2.0f*M_PI_F;
- int n = (int)(a / b);
+ int n = float_to_int(a / b);
a -= n*b;
if(a < 0.0f) a += b;
@@ -212,7 +212,7 @@ __device_noinline float noise_turbulence(float3 p, NodeNoiseBasis basis, float o
int i, n;
octaves = clamp(octaves, 0.0f, 16.0f);
- n = (int)octaves;
+ n = float_to_int(octaves);
for(i = 0; i <= n; i++) {
float t = noise_basis(fscale*p, basis);
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index 80cb285f80c..86d98ee67d6 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -22,14 +22,14 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
{
- stack_store_float(stack, out_offset, __int_as_float(ivalue));
+ stack_store_float(stack, out_offset, __uint_as_float(ivalue));
}
__device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset)
{
/* read extra data */
uint4 node1 = read_node(kg, offset);
- float3 p = make_float3(__int_as_float(node1.y), __int_as_float(node1.z), __int_as_float(node1.w));
+ float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
stack_store_float3(stack, out_offset, p);
}
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6fe1b2bcf54..68668d88d44 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -163,6 +163,25 @@ __device_inline float clamp(float a, float mn, float mx)
#endif
+__device_inline int float_to_int(float f)
+{
+#ifdef __KERNEL_SSE2__
+ return _mm_cvtt_ss2si(_mm_load_ss(&f));
+#else
+ return (int)f;
+#endif
+}
+
+__device_inline int floor_to_int(float f)
+{
+ return float_to_int(floorf(f));
+}
+
+__device_inline int ceil_to_int(float f)
+{
+ return float_to_int(ceilf(f));
+}
+
__device_inline float signf(float f)
{
return (f < 0.0f)? -1.0f: 1.0f;
@@ -990,23 +1009,23 @@ __device_inline void print_int4(const char *label, const int4& a)
#ifndef __KERNEL_OPENCL__
-__device_inline unsigned int as_int(uint i)
+__device_inline int as_int(uint i)
{
- union { unsigned int ui; int i; } u;
+ union { uint ui; int i; } u;
u.ui = i;
return u.i;
}
-__device_inline unsigned int as_uint(int i)
+__device_inline uint as_uint(int i)
{
- union { unsigned int ui; int i; } u;
+ union { uint ui; int i; } u;
u.i = i;
return u.ui;
}
-__device_inline unsigned int as_uint(float f)
+__device_inline uint as_uint(float f)
{
- union { unsigned int i; float f; } u;
+ union { uint i; float f; } u;
u.f = f;
return u.i;
}
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index bb6de1197e7..fe1cb61ffa9 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -70,6 +70,21 @@
#include <tmmintrin.h> /* SSE 3 */
#include <smmintrin.h> /* SSE 4 */
+#define __KERNEL_SSE2__
+#define __KERNEL_SSE3__
+#define __KERNEL_SSE4__
+
+#else
+
+#ifdef __x86_64__
+
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+
+#define __KERNEL_SSE2__
+
+#endif
+
#endif
#ifndef _WIN32