Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2011-12-20 16:25:45 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2011-12-20 16:25:45 +0400
commit40259cfe7bf8ab3fa844d87b61096562c9ea2e42 (patch)
treea241f884d68b517706dac392ca861bd1a9c3f282 /intern
parent72d2d05770a721986986c137a5cbc36cb796062f (diff)
Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment
is working compatible between cpu and gpu.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/kernel_camera.h8
-rw-r--r--intern/cycles/kernel/kernel_types.h20
-rw-r--r--intern/cycles/render/camera.cpp4
-rw-r--r--intern/cycles/render/nodes.cpp1
-rw-r--r--intern/cycles/util/util_math.h5
5 files changed, 18 insertions, 20 deletions
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 9cdc2f1f865..2dbdd076891 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float
ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
- ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff);
- ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff);
+ ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff);
+ ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff);
#endif
#ifdef __CAMERA_CLIPPING__
@@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
#ifdef __RAY_DIFFERENTIALS__
/* ray differential */
- ray->dP.dx = kernel_data.cam.dx;
- ray->dP.dy = kernel_data.cam.dy;
+ ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
+ ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 72ebfefbd90..ea73f87a8a5 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -295,7 +295,11 @@ typedef struct ShaderData {
#endif
} ShaderData;
-/* Constrant Kernel Data */
+/* Constrant Kernel Data
+ *
+ * These structs are passed from CPU to various devices, and the struct layout
+ * must match exactly. Structs are padded to ensure 16 byte alignment, and we
+ * do not use float3 because its size may not be the same on all devices. */
typedef struct KernelCamera {
/* type */
@@ -307,14 +311,8 @@ typedef struct KernelCamera {
Transform rastertocamera;
/* differentials */
- float3 dx;
-#ifndef WITH_OPENCL
- float pad1;
-#endif
- float3 dy;
-#ifndef WITH_OPENCL
- float pad2;
-#endif
+ float4 dx;
+ float4 dy;
/* depth of field */
float aperturesize;
@@ -355,10 +353,6 @@ typedef struct KernelBackground {
typedef struct KernelSunSky {
/* sun direction in spherical and cartesian */
float theta, phi, pad3, pad4;
- float3 dir;
-#ifndef WITH_OPENCL
- float pad;
-#endif
/* perez function parameters */
float zenith_Y, zenith_x, zenith_y, pad2;
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index d5fca87491d..a83ae81844c 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene)
kcam->ortho = ortho;
/* store differentials */
- kcam->dx = dx;
- kcam->dy = dy;
+ kcam->dx = float3_to_float4(dx);
+ kcam->dy = float3_to_float4(dy);
/* clipping */
kcam->nearclip = nearclip;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 7d873221cd6..81d156a079d 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb
ksunsky->theta = theta;
ksunsky->phi = phi;
- ksunsky->dir = dir;
float theta2 = theta*theta;
float theta3 = theta*theta*theta;
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 7c56f0fbb12..0a1d8ff4555 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a)
return make_float3(a.x, a.y, a.z);
}
+__device_inline float4 float3_to_float4(const float3 a)
+{
+ return make_float4(a.x, a.y, a.z, 1.0f);
+}
+
#ifndef __KERNEL_GPU__
__device_inline void print_float3(const char *label, const float3& a)