diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-01-27 17:58:32 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-01-27 17:58:32 +0400 |
commit | b023665551d2059d34ffa45d4234abb3d1c92736 (patch) | |
tree | 1f2013fa4cf048a52b0282a3edec8db2d14bbb1c /intern | |
parent | 3062798de3a34d461578da4bd9d8d4f700f70ddb (diff) |
Cycles: another fix for CUDA render passes, needed to align float4 passes.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/render/buffers.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/film.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 5 |
4 files changed, 23 insertions, 9 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 14bcaa94130..0c08baae3ff 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -106,11 +106,6 @@ public: } } - static int cuda_align_up(int& offset, int alignment) - { - return (offset + alignment - 1) & ~(alignment - 1); - } - #ifdef NDEBUG #define cuda_abort() #else @@ -485,7 +480,7 @@ public: offset += sizeof(d_rng_state); int sample = task.sample; - offset = cuda_align_up(offset, __alignof(sample)); + offset = align_up(offset, __alignof(sample)); cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample)) offset += sizeof(task.sample); @@ -549,7 +544,7 @@ public: offset += sizeof(d_buffer); int sample = task.sample; - offset = cuda_align_up(offset, __alignof(sample)); + offset = align_up(offset, __alignof(sample)); cuda_assert(cuParamSeti(cuFilmConvert, offset, task.sample)) offset += sizeof(task.sample); @@ -618,7 +613,7 @@ public: offset += sizeof(d_offset); int shader_eval_type = task.shader_eval_type; - offset = cuda_align_up(offset, __alignof(shader_eval_type)); + offset = align_up(offset, __alignof(shader_eval_type)); cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type)) offset += sizeof(task.shader_eval_type); diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 361ead3cd24..dd0ebf7195c 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -71,7 +71,7 @@ int BufferParams::get_passes_size() foreach(Pass& pass, passes) size += pass.components; - return size; + return align_up(size, 4); } /* Render Buffers */ diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index bc51384b873..376e9d6d0ca 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -21,12 +21,20 @@ #include "film.h" #include "scene.h" +#include "util_algorithm.h" #include "util_foreach.h" CCL_NAMESPACE_BEGIN /* Pass */ +static bool compare_pass_order(const Pass& a, const Pass& b) +{ + if(a.components == b.components) + return (a.type < b.type); + return (a.components > b.components); +} + void Pass::add(PassType type, vector<Pass>& passes) { Pass pass; @@ -106,6 +114,10 @@ void Pass::add(PassType type, vector<Pass>& passes) } passes.push_back(pass); + + /* order from by components, to ensure alignment so passes with size 4 + come first and then passes with size 1 */ + sort(passes.begin(), passes.end(), compare_pass_order); } bool Pass::equals(const vector<Pass>& A, const vector<Pass>& B) @@ -219,6 +231,8 @@ void Film::device_update(Device *device, DeviceScene *dscene) kfilm->pass_stride += pass.components; } + kfilm->pass_stride = align_up(kfilm->pass_stride, 4); + need_update = false; } diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 2c0ae13ad2a..efdda98571a 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -277,6 +277,11 @@ __device float4 make_float4(float x, float y, float z, float w) return a; } +__device int align_up(int offset, int alignment) +{ + return (offset + alignment - 1) & ~(alignment - 1); +} + #endif CCL_NAMESPACE_END |