diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2011-12-20 16:25:45 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2011-12-20 16:25:45 +0400 |
commit | 40259cfe7bf8ab3fa844d87b61096562c9ea2e42 (patch) | |
tree | a241f884d68b517706dac392ca861bd1a9c3f282 /intern | |
parent | 72d2d05770a721986986c137a5cbc36cb796062f (diff) |
Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment
is working compatible between cpu and gpu.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel_camera.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 20 | ||||
-rw-r--r-- | intern/cycles/render/camera.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/render/nodes.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 5 |
5 files changed, 18 insertions, 20 deletions
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 9cdc2f1f865..2dbdd076891 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f); - ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff); - ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff); + ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff); + ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff); #endif #ifdef __CAMERA_CLIPPING__ @@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa #ifdef __RAY_DIFFERENTIALS__ /* ray differential */ - ray->dP.dx = kernel_data.cam.dx; - ray->dP.dy = kernel_data.cam.dy; + ray->dP.dx = float4_to_float3(kernel_data.cam.dx); + ray->dP.dy = float4_to_float3(kernel_data.cam.dy); ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 72ebfefbd90..ea73f87a8a5 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -295,7 +295,11 @@ typedef struct ShaderData { #endif } ShaderData; -/* Constrant Kernel Data */ +/* Constrant Kernel Data + * + * These structs are passed from CPU to various devices, and the struct layout + * must match exactly. Structs are padded to ensure 16 byte alignment, and we + * do not use float3 because its size may not be the same on all devices. */ typedef struct KernelCamera { /* type */ @@ -307,14 +311,8 @@ typedef struct KernelCamera { Transform rastertocamera; /* differentials */ - float3 dx; -#ifndef WITH_OPENCL - float pad1; -#endif - float3 dy; -#ifndef WITH_OPENCL - float pad2; -#endif + float4 dx; + float4 dy; /* depth of field */ float aperturesize; @@ -355,10 +353,6 @@ typedef struct KernelBackground { typedef struct KernelSunSky { /* sun direction in spherical and cartesian */ float theta, phi, pad3, pad4; - float3 dir; -#ifndef WITH_OPENCL - float pad; -#endif /* perez function parameters */ float zenith_Y, zenith_x, zenith_y, pad2; diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index d5fca87491d..a83ae81844c 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene) kcam->ortho = ortho; /* store differentials */ - kcam->dx = dx; - kcam->dy = dy; + kcam->dx = float3_to_float4(dx); + kcam->dy = float3_to_float4(dy); /* clipping */ kcam->nearclip = nearclip; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 7d873221cd6..81d156a079d 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb ksunsky->theta = theta; ksunsky->phi = phi; - ksunsky->dir = dir; float theta2 = theta*theta; float theta3 = theta*theta*theta; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 7c56f0fbb12..0a1d8ff4555 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a) return make_float3(a.x, a.y, a.z); } +__device_inline float4 float3_to_float4(const float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + #ifndef __KERNEL_GPU__ __device_inline void print_float3(const char *label, const float3& a) |