diff options
author | Dalai Felinto <dfelinto@gmail.com> | 2016-11-16 19:01:19 +0300 |
---|---|---|
committer | Dalai Felinto <dfelinto@gmail.com> | 2016-11-16 19:04:21 +0300 |
commit | 2bcb1b208a4193fb28f1e0c4408b733f5ee2c028 (patch) | |
tree | 8b9260ffd9fb7b371dc66955903c7b0c7f4e7bf9 /intern | |
parent | 930f999f6ea683d02ac490026a52817f1d965377 (diff) | |
parent | 2a2eb0c463bd96d42f7306eb17f88cad87f73aea (diff) |
Merge remote-tracking branch 'origin/master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r-- | intern/atomic/atomic_ops.h | 24 | ||||
-rw-r--r-- | intern/atomic/intern/atomic_ops_ext.h | 70 | ||||
-rw-r--r-- | intern/atomic/intern/atomic_ops_msvc.h | 18 | ||||
-rw-r--r-- | intern/atomic/intern/atomic_ops_unix.h | 36 | ||||
-rw-r--r-- | intern/cycles/app/cycles_standalone.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/app/cycles_xml.cpp | 25 | ||||
-rw-r--r-- | intern/cycles/blender/blender_sync.cpp | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_passes.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 56 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path_branched.h | 42 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 270 | ||||
-rw-r--r-- | intern/cycles/render/image.h | 14 | ||||
-rw-r--r-- | intern/cycles/render/light.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_atomic.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_path.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_stats.h | 4 | ||||
-rw-r--r-- | intern/guardedalloc/intern/mallocn_guarded_impl.c | 12 | ||||
-rw-r--r-- | intern/guardedalloc/intern/mallocn_lockfree_impl.c | 24 | ||||
-rw-r--r-- | intern/iksolver/intern/IK_QSegment.h | 1 | ||||
-rw-r--r-- | intern/iksolver/intern/IK_Solver.cpp | 1 |
20 files changed, 265 insertions, 366 deletions
diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index f78eab7951f..1107deddf94 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -77,13 +77,15 @@ /* Function prototypes. */ #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) -ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x); +ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x); +ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x); +ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new); #endif -ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x); +ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x); ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new); ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x); @@ -93,18 +95,22 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x); ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b); ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b); -ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x); -ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x); +ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x); +ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x); +ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x); +ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new); -ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x); -ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x); +ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x); +ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x); +ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x); +ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x); ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new); /* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, * which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads * working on the same pointer at the same time is very low). */ -ATOMIC_INLINE float atomic_add_fl(float *p, const float x); +ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x); /******************************************************************************/ /* Include system-dependent implementations. */ diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h index 4065299d2ea..8421aa72192 100644 --- a/intern/atomic/intern/atomic_ops_ext.h +++ b/intern/atomic/intern/atomic_ops_ext.h @@ -56,25 +56,47 @@ /******************************************************************************/ /* size_t operations. */ -ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x) +ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x) { assert(sizeof(size_t) == LG_SIZEOF_PTR); #if (LG_SIZEOF_PTR == 8) - return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x); + return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); #elif (LG_SIZEOF_PTR == 4) - return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x); + return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); #endif } -ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x) +ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x) { assert(sizeof(size_t) == LG_SIZEOF_PTR); #if (LG_SIZEOF_PTR == 8) - return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); + return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); #elif (LG_SIZEOF_PTR == 4) - return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); + return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); +#endif +} + +ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x) +{ + assert(sizeof(size_t) == LG_SIZEOF_PTR); + +#if (LG_SIZEOF_PTR == 8) + return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 4) + return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x) +{ + assert(sizeof(size_t) == LG_SIZEOF_PTR); + +#if (LG_SIZEOF_PTR == 8) + return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); +#elif (LG_SIZEOF_PTR == 4) + return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); #endif } @@ -91,25 +113,47 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new) /******************************************************************************/ /* unsigned operations. */ -ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x) +{ + assert(sizeof(unsigned) == LG_SIZEOF_INT); + +#if (LG_SIZEOF_INT == 8) + return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 4) + return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x) +{ + assert(sizeof(unsigned) == LG_SIZEOF_INT); + +#if (LG_SIZEOF_INT == 8) + return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); +#elif (LG_SIZEOF_INT == 4) + return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); +#endif +} + +ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x) { assert(sizeof(unsigned) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x); + return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x); + return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); #endif } -ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x) { assert(sizeof(unsigned) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); + return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); + return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); #endif } @@ -127,7 +171,7 @@ ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new) /******************************************************************************/ /* float operations. */ -ATOMIC_INLINE float atomic_add_fl(float *p, const float x) +ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x) { assert(sizeof(float) == sizeof(uint32_t)); diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h index 3461719a4e7..034ac1e3e53 100644 --- a/intern/atomic/intern/atomic_ops_msvc.h +++ b/intern/atomic/intern/atomic_ops_msvc.h @@ -43,12 +43,12 @@ /******************************************************************************/ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) -ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) { return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; } -ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) { return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x; } @@ -57,16 +57,26 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne { return InterlockedCompareExchange64((int64_t *)v, _new, old); } + +ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) +{ + return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x); +} + +ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) +{ + return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)); +} #endif /******************************************************************************/ /* 32-bit operations. */ -ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { return InterlockedExchangeAdd(p, x) + x; } -ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) { return InterlockedExchangeAdd(p, -((int32_t)x)) - x; } diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h index e63f09c76c5..0a3322ad2b1 100644 --- a/intern/atomic/intern/atomic_ops_unix.h +++ b/intern/atomic/intern/atomic_ops_unix.h @@ -58,22 +58,32 @@ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) # if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) { return __sync_add_and_fetch(p, x); } -ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) { return __sync_sub_and_fetch(p, x); } +ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) +{ + return __sync_fetch_and_add(p, x); +} + +ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) +{ + return __sync_fetch_and_sub(p, x); +} + ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) { return __sync_val_compare_and_swap(v, old, _new); } # elif (defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) { asm volatile ( "lock; xaddq %0, %1;" @@ -83,7 +93,7 @@ ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) return x; } -ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) { x = (uint64_t)(-(int64_t)x); asm volatile ( @@ -94,6 +104,16 @@ ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) return x; } +ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) +{ + return atomic_fetch_and_add_uint64(p, x) + x; +} + +ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x) +{ + return atomic_fetch_and_sub_uint64(p, x) - x; +} + ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) { uint64_t ret; @@ -112,12 +132,12 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne /******************************************************************************/ /* 32-bit operations. */ #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { return __sync_add_and_fetch(p, x); } -ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) { return __sync_sub_and_fetch(p, x); } @@ -127,7 +147,7 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne return __sync_val_compare_and_swap(v, old, _new); } #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { uint32_t ret = x; asm volatile ( @@ -138,7 +158,7 @@ ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) return ret+x; } -ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) { ret = (uint32_t)(-(int32_t)x); asm volatile ( diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index e8168bc15ff..b21e8630cdb 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -337,7 +337,7 @@ static void options_parse(int argc, const char **argv) /* device names */ string device_names = ""; - string devicename = "cpu"; + string devicename = "CPU"; bool list = false; vector<DeviceType>& types = Device::available_types(); diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 8a3eb98a5a0..29a68bf272e 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -210,17 +210,6 @@ static void xml_read_camera(XMLReadState& state, pugi::xml_node node) /* Shader */ -static string xml_socket_name(const char *name) -{ - string sname = name; - size_t i; - - while((i = sname.find(" ")) != string::npos) - sname.replace(i, 1, ""); - - return sname; -} - static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml_node graph_node) { xml_read_node(state, shader, graph_node); @@ -255,7 +244,7 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml ShaderNode *fromnode = (ShaderNode*)graph_reader.node_map[from_node_name]; foreach(ShaderOutput *out, fromnode->outputs) - if(string_iequals(xml_socket_name(out->name().c_str()), from_socket_name.c_str())) + if(string_iequals(out->socket_type.name.string(), from_socket_name.string())) output = out; if(!output) @@ -268,7 +257,7 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, pugi::xml ShaderNode *tonode = (ShaderNode*)graph_reader.node_map[to_node_name]; foreach(ShaderInput *in, tonode->inputs) - if(string_iequals(xml_socket_name(in->name().c_str()), to_socket_name.c_str())) + if(string_iequals(in->socket_type.name.string(), to_socket_name.string())) input = in; if(!input) @@ -406,7 +395,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) int shader = 0; bool smooth = state.smooth; - /* read vertices and polygons, RIB style */ + /* read vertices and polygons */ vector<float3> P; vector<float> UV; vector<int> verts, nverts; @@ -532,8 +521,12 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) sdparams.objecttoworld = state.tfm; } - /* temporary for test compatibility */ - mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); + /* we don't yet support arbitrary attributes, for now add vertex + * coordinates as generated coordinates if requested */ + if (mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) { + Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED); + memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size()); + } } /* Light */ diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 62b9fa3c92b..49ddc8af9a8 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -249,9 +249,14 @@ void BlenderSync::sync_integrator() integrator->seed = get_int(cscene, "seed"); if(get_boolean(cscene, "use_animated_seed")) { integrator->seed = hash_int_2d(b_scene.frame_current(), - get_int(cscene, "seed")) + - hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX), get_int(cscene, "seed")); + if(b_scene.frame_subframe() != 0.0f) { + /* TODO(sergey): Ideally should be some sort of hash_merge, + * but this is good enough for now. + */ + integrator->seed += hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX), + get_int(cscene, "seed")); + } } integrator->sampling_pattern = (SamplingPattern)get_enum( diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 20cf3fa931b..7aec47e4957 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -20,7 +20,7 @@ ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sam { ccl_global float *buf = buffer; #if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) - atomic_add_float(buf, value); + atomic_add_and_fetch_float(buf, value); #else *buf = (sample == 0)? value: *buf + value; #endif // __SPLIT_KERNEL__ && __WORK_STEALING__ @@ -33,9 +33,9 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sa ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; - atomic_add_float(buf_x, value.x); - atomic_add_float(buf_y, value.y); - atomic_add_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); #else ccl_global float3 *buf = (ccl_global float3*)buffer; *buf = (sample == 0)? value: *buf + value; @@ -50,10 +50,10 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sa ccl_global float *buf_z = buffer + 2; ccl_global float *buf_w = buffer + 3; - atomic_add_float(buf_x, value.x); - atomic_add_float(buf_y, value.y); - atomic_add_float(buf_z, value.z); - atomic_add_float(buf_w, value.w); + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_w, value.w); #else ccl_global float4 *buf = (ccl_global float4*)buffer; *buf = (sample == 0)? value: *buf + value; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 4237fdb32ff..6d89a89ed5b 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -84,7 +84,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, light_ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ light_ray.time = ccl_fetch(sd, time); -#endif +#endif /* __OBJECT_MOTION__ */ light_ray.dP = ccl_fetch(sd, dP); light_ray.dD = differential3_zero(); @@ -138,7 +138,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, state->bounce); } } -#endif +#endif /* __LAMP_MIS__ */ #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ @@ -239,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, } } else -# endif +# endif /* __VOLUME_DECOUPLED__ */ { /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( @@ -271,10 +271,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, break; } } -# endif +# endif /* __VOLUME_SCATTER__ */ } } -#endif +#endif /* __VOLUME__ */ if(!hit) { #ifdef __BACKGROUND__ @@ -284,7 +284,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, throughput, L_background, state->bounce); -#endif +#endif /* __BACKGROUND__ */ break; } @@ -298,7 +298,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT); #ifdef __BRANCHED_PATH__ shader_merge_closures(sd); -#endif +#endif /* __BRANCHED_PATH__ */ /* blurring of bsdf after bounces, for rays that have a small likelihood * of following this particular path (diffuse, rough glossy) */ @@ -321,7 +321,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, state->ray_pdf); path_radiance_accum_emission(L, throughput, emission, state->bounce); } -#endif +#endif /* __EMISSION__ */ /* path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded @@ -348,7 +348,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) { kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f)); } -#endif +#endif /* __AO__ */ #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing @@ -380,7 +380,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, false); } } -#endif +#endif /* __SUBSURFACE__ */ #if defined(__EMISSION__) && defined(__BRANCHED_PATH__) if(kernel_data.integrator.use_direct_light) { @@ -395,7 +395,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, L, all); } -#endif +#endif /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */ if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray)) break; @@ -449,7 +449,7 @@ bool kernel_path_subsurface_scatter( ss_indirect->need_update_volume_stack = kernel_data.integrator.use_volumes && ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME; -# endif +# endif /* __VOLUME__ */ /* compute lighting with the BSDF closure */ for(int hit = 0; hit < num_hits; hit++) { @@ -492,7 +492,7 @@ bool kernel_path_subsurface_scatter( { # ifdef __LAMP_MIS__ hit_state->ray_t = 0.0f; -# endif +# endif /* __LAMP_MIS__ */ # ifdef __VOLUME__ if(ss_indirect->need_update_volume_stack) { @@ -507,7 +507,7 @@ bool kernel_path_subsurface_scatter( &volume_ray, hit_state->volume_stack); } -# endif +# endif /* __VOLUME__ */ path_radiance_reset_indirect(L); ss_indirect->num_rays++; } @@ -593,14 +593,14 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #ifdef __KERNEL_DEBUG__ DebugData debug_data; debug_data_init(&debug_data); -#endif +#endif /* __KERNEL_DEBUG__ */ #ifdef __SUBSURFACE__ SubsurfaceIndirectRays ss_indirect; kernel_path_subsurface_init_indirect(&ss_indirect); for(;;) { -#endif +#endif /* __SUBSURFACE__ */ /* path iteration */ for(;;) { @@ -626,7 +626,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); #else bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); -#endif +#endif /* __HAIR__ */ #ifdef __KERNEL_DEBUG__ if(state.flag & PATH_RAY_CAMERA) { @@ -634,7 +634,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; } debug_data.num_ray_bounces++; -#endif +#endif /* __KERNEL_DEBUG__ */ #ifdef __LAMP_MIS__ if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) { @@ -655,7 +655,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission)) path_radiance_accum_emission(&L, throughput, emission, state.bounce); } -#endif +#endif /* __LAMP_MIS__ */ #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ @@ -719,7 +719,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, } } else -# endif +# endif /* __VOLUME_DECOUPLED__ */ { /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( @@ -736,10 +736,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, else break; } -# endif +# endif /* __VOLUME_SCATTER__ */ } } -#endif +#endif /* __VOLUME__ */ if(!hit) { /* eval background shader if nothing hit */ @@ -748,7 +748,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #ifdef __PASSES__ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif +#endif /* __PASSES__ */ break; } @@ -756,7 +756,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, /* sample background shader */ float3 L_background = indirect_background(kg, &emission_sd, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); -#endif +#endif /* __BACKGROUND__ */ break; } @@ -784,7 +784,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, if(sd.flag & SD_HOLDOUT_MASK) break; } -#endif +#endif /* __HOLDOUT__ */ /* holdout mask objects do not write data passes */ kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput); @@ -807,7 +807,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf); path_radiance_accum_emission(&L, throughput, emission, state.bounce); } -#endif +#endif /* __EMISSION__ */ /* path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded @@ -830,7 +830,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd)); } -#endif +#endif /* __AO__ */ #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing @@ -885,7 +885,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #ifdef __KERNEL_DEBUG__ kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); -#endif +#endif /* __KERNEL_DEBUG__ */ return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent); } diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index cdb07db587a..c84727ace99 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -51,7 +51,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, light_ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ light_ray.time = ccl_fetch(sd, time); -#endif +#endif /* __OBJECT_MOTION__ */ light_ray.dP = ccl_fetch(sd, dP); light_ray.dD = differential3_zero(); @@ -169,7 +169,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, Ray volume_ray = *ray; bool need_update_volume_stack = kernel_data.integrator.use_volumes && ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME; -#endif +#endif /* __VOLUME__ */ /* compute lighting with the BSDF closure */ for(int hit = 0; hit < num_hits; hit++) { @@ -200,7 +200,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, &volume_ray, hit_state.volume_stack); } -#endif +#endif /* __VOLUME__ */ #ifdef __EMISSION__ /* direct light */ @@ -217,7 +217,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, L, all); } -#endif +#endif /* __EMISSION__ */ /* indirect light */ kernel_branched_path_surface_indirect_light( @@ -234,7 +234,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, } } } -#endif +#endif /* __SUBSURFACE__ */ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) { @@ -256,7 +256,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __KERNEL_DEBUG__ DebugData debug_data; debug_data_init(&debug_data); -#endif +#endif /* __KERNEL_DEBUG__ */ /* Main Loop * Here we only handle transparency intersections from the camera ray. @@ -285,13 +285,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); #else bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); -#endif +#endif /* __HAIR__ */ #ifdef __KERNEL_DEBUG__ debug_data.num_bvh_traversal_steps += isect.num_traversal_steps; debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; debug_data.num_ray_bounces++; -#endif +#endif /* __KERNEL_DEBUG__ */ #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ @@ -432,14 +432,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_radiance_reset_indirect(&L); } } -#endif +#endif /* __VOLUME_SCATTER__ */ } /* todo: avoid this calculation using decoupled ray marching */ kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput); -#endif +#endif /* __VOLUME_DECOUPLED__ */ } -#endif +#endif /* __VOLUME__ */ if(!hit) { /* eval background shader if nothing hit */ @@ -448,7 +448,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __PASSES__ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif +#endif /* __PASSES__ */ break; } @@ -456,7 +456,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* sample background shader */ float3 L_background = indirect_background(kg, &emission_sd, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); -#endif +#endif /* __BACKGROUND__ */ break; } @@ -484,7 +484,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(sd.flag & SD_HOLDOUT_MASK) break; } -#endif +#endif /* __HOLDOUT__ */ /* holdout mask objects do not write data passes */ kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput); @@ -495,7 +495,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf); path_radiance_accum_emission(&L, throughput, emission, state.bounce); } -#endif +#endif /* __EMISSION__ */ /* transparency termination */ if(state.flag & PATH_RAY_TRANSPARENT) { @@ -522,7 +522,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput); } -#endif +#endif /* __AO__ */ #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object */ @@ -530,7 +530,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd, &L, &state, rng, &ray, throughput); } -#endif +#endif /* __SUBSURFACE__ */ if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { PathState hit_state = state; @@ -542,7 +542,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in kernel_branched_path_surface_connect_light(kg, rng, &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all); } -#endif +#endif /* __EMISSION__ */ /* indirect light */ kernel_branched_path_surface_indirect_light(kg, rng, @@ -567,12 +567,12 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in ray.dP = sd.dP; ray.dD.dx = -sd.dI.dx; ray.dD.dy = -sd.dI.dy; -#endif +#endif /* __RAY_DIFFERENTIALS__ */ #ifdef __VOLUME__ /* enter/exit volume */ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); -#endif +#endif /* __VOLUME__ */ } float3 L_sum = path_radiance_clamp_and_sum(kg, &L); @@ -581,7 +581,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __KERNEL_DEBUG__ kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); -#endif +#endif /* __KERNEL_DEBUG__ */ return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent); } diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 073a0aa2ac9..7465fbd43a7 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -471,133 +471,43 @@ bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &wid return true; } -template<typename T> -bool ImageManager::file_load_byte_image(Image *img, ImageDataType type, device_vector<T>& tex_img) +template<TypeDesc::BASETYPE FileFormat, + typename StorageType, + typename DeviceType> +bool ImageManager::file_load_image(Image *img, + ImageDataType type, + device_vector<DeviceType>& tex_img) { + const StorageType alpha_one = (FileFormat == TypeDesc::UINT8)? 255 : 1; ImageInput *in = NULL; int width, height, depth, components; - - if(!file_load_image_generic(img, &in, width, height, depth, components)) - return false; - - /* read RGBA pixels */ - uchar *pixels = (uchar*)tex_img.resize(width, height, depth); - if(pixels == NULL) { + if(!file_load_image_generic(img, &in, width, height, depth, components)) { return false; } - bool cmyk = false; - - if(in) { - if(depth <= 1) { - int scanlinesize = width*components*sizeof(uchar); - - in->read_image(TypeDesc::UINT8, - (uchar*)pixels + (((size_t)height)-1)*scanlinesize, - AutoStride, - -scanlinesize, - AutoStride); - } - else { - in->read_image(TypeDesc::UINT8, (uchar*)pixels); - } - - cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4; - - in->close(); - delete in; - } - else { - builtin_image_pixels_cb(img->filename, img->builtin_data, pixels); - } - - /* Check if we actually have a byte4 slot, in case components == 1, but device - * doesn't support single channel textures. */ - if(type == IMAGE_DATA_TYPE_BYTE4) { - size_t num_pixels = ((size_t)width) * height * depth; - if(cmyk) { - /* CMYK */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255; - pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255; - pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255; - pixels[i*4+3] = 255; - } - } - else if(components == 2) { - /* grayscale + alpha */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = pixels[i*2+1]; - pixels[i*4+2] = pixels[i*2+0]; - pixels[i*4+1] = pixels[i*2+0]; - pixels[i*4+0] = pixels[i*2+0]; - } - } - else if(components == 3) { - /* RGB */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 255; - pixels[i*4+2] = pixels[i*3+2]; - pixels[i*4+1] = pixels[i*3+1]; - pixels[i*4+0] = pixels[i*3+0]; - } - } - else if(components == 1) { - /* grayscale */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 255; - pixels[i*4+2] = pixels[i]; - pixels[i*4+1] = pixels[i]; - pixels[i*4+0] = pixels[i]; - } - } - - if(img->use_alpha == false) { - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 255; - } - } - } - - return true; -} - -template<typename T> -bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img) -{ - ImageInput *in = NULL; - int width, height, depth, components; - - if(!file_load_image_generic(img, &in, width, height, depth, components)) - return false; - - /* read RGBA pixels */ - float *pixels = (float*)tex_img.resize(width, height, depth); + /* Read RGBA pixels. */ + StorageType *pixels = (StorageType*)tex_img.resize(width, height, depth); if(pixels == NULL) { return false; } bool cmyk = false; - if(in) { - float *readpixels = pixels; - vector<float> tmppixels; - + StorageType *readpixels = pixels; + vector<StorageType> tmppixels; if(components > 4) { tmppixels.resize(((size_t)width)*height*components); readpixels = &tmppixels[0]; } - if(depth <= 1) { - size_t scanlinesize = ((size_t)width)*components*sizeof(float); - in->read_image(TypeDesc::FLOAT, + size_t scanlinesize = ((size_t)width)*components*sizeof(StorageType); + in->read_image(FileFormat, (uchar*)readpixels + (height-1)*scanlinesize, AutoStride, -scanlinesize, AutoStride); } else { - in->read_image(TypeDesc::FLOAT, (uchar*)readpixels); + in->read_image(FileFormat, (uchar*)readpixels); } - if(components > 4) { size_t dimensions = ((size_t)width)*height; for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) { @@ -606,30 +516,42 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_ pixels[i*4+1] = tmppixels[i*components+1]; pixels[i*4+0] = tmppixels[i*components+0]; } - tmppixels.clear(); } - cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4; - in->close(); delete in; } else { - builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels); + if(FileFormat == TypeDesc::FLOAT) { + builtin_image_float_pixels_cb(img->filename, + img->builtin_data, + (float*)pixels); + } + else if(FileFormat == TypeDesc::UINT8) { + builtin_image_pixels_cb(img->filename, + img->builtin_data, + (uchar*)pixels); + } + else { + /* TODO(dingto): Support half for ImBuf. */ + } } - - /* Check if we actually have a float4 slot, in case components == 1, but device - * doesn't support single channel textures. */ - if(type == IMAGE_DATA_TYPE_FLOAT4) { + /* Check if we actually have a float4 slot, in case components == 1, + * but device doesn't support single channel textures. + */ + if(type == IMAGE_DATA_TYPE_FLOAT4 || + type == IMAGE_DATA_TYPE_HALF4 || + type == IMAGE_DATA_TYPE_BYTE4) + { size_t num_pixels = ((size_t)width) * height * depth; if(cmyk) { /* CMYK */ for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 255; pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255; pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255; pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255; + pixels[i*4+3] = alpha_one; } } else if(components == 2) { @@ -644,7 +566,7 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_ else if(components == 3) { /* RGB */ for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; + pixels[i*4+3] = alpha_one; pixels[i*4+2] = pixels[i*3+2]; pixels[i*4+1] = pixels[i*3+1]; pixels[i*4+0] = pixels[i*3+0]; @@ -653,120 +575,18 @@ bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_ else if(components == 1) { /* grayscale */ for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; + pixels[i*4+3] = alpha_one; pixels[i*4+2] = pixels[i]; pixels[i*4+1] = pixels[i]; pixels[i*4+0] = pixels[i]; } } - if(img->use_alpha == false) { for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; + pixels[i*4+3] = alpha_one; } } } - - return true; -} - -template<typename T> -bool ImageManager::file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img) -{ - ImageInput *in = NULL; - int width, height, depth, components; - - if(!file_load_image_generic(img, &in, width, height, depth, components)) - return false; - - /* read RGBA pixels */ - half *pixels = (half*)tex_img.resize(width, height, depth); - if(pixels == NULL) { - return false; - } - - if(in) { - half *readpixels = pixels; - vector<half> tmppixels; - - if(components > 4) { - tmppixels.resize(((size_t)width)*height*components); - readpixels = &tmppixels[0]; - } - - if(depth <= 1) { - size_t scanlinesize = ((size_t)width)*components*sizeof(half); - in->read_image(TypeDesc::HALF, - (uchar*)readpixels + (height-1)*scanlinesize, - AutoStride, - -scanlinesize, - AutoStride); - } - else { - in->read_image(TypeDesc::HALF, (uchar*)readpixels); - } - - if(components > 4) { - size_t dimensions = ((size_t)width)*height; - for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) { - pixels[i*4+3] = tmppixels[i*components+3]; - pixels[i*4+2] = tmppixels[i*components+2]; - pixels[i*4+1] = tmppixels[i*components+1]; - pixels[i*4+0] = tmppixels[i*components+0]; - } - - tmppixels.clear(); - } - - in->close(); - delete in; - } -#if 0 - /* TODO(dingto): Support half for ImBuf. */ - else { - builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels); - } -#endif - - /* Check if we actually have a half4 slot, in case components == 1, but device - * doesn't support single channel textures. */ - if(type == IMAGE_DATA_TYPE_HALF4) { - size_t num_pixels = ((size_t)width) * height * depth; - if(components == 2) { - /* grayscale + alpha */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = pixels[i*2+1]; - pixels[i*4+2] = pixels[i*2+0]; - pixels[i*4+1] = pixels[i*2+0]; - pixels[i*4+0] = pixels[i*2+0]; - } - } - else if(components == 3) { - /* RGB */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; - pixels[i*4+2] = pixels[i*3+2]; - pixels[i*4+1] = pixels[i*3+1]; - pixels[i*4+0] = pixels[i*3+0]; - } - } - else if(components == 1) { - /* grayscale */ - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; - pixels[i*4+2] = pixels[i]; - pixels[i*4+1] = pixels[i]; - pixels[i*4+0] = pixels[i]; - } - } - - if(img->use_alpha == false) { - for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) { - pixels[i*4+3] = 1.0f; - } - } - } - return true; } @@ -802,7 +622,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_float_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::FLOAT, float>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ float *pixels = (float*)tex_img.resize(1, 1); @@ -828,7 +648,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_float_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::FLOAT, float>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ float *pixels = (float*)tex_img.resize(1, 1); @@ -851,7 +671,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_byte_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ uchar *pixels = (uchar*)tex_img.resize(1, 1); @@ -877,7 +697,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_byte_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ uchar *pixels = (uchar*)tex_img.resize(1, 1); @@ -900,7 +720,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_half_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::HALF, half>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ half *pixels = (half*)tex_img.resize(1, 1); @@ -926,7 +746,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD device->tex_free(tex_img); } - if(!file_load_half_image(img, type, tex_img)) { + if(!file_load_image<TypeDesc::HALF, half>(img, type, tex_img)) { /* on failure to load, we set a 1x1 pixels pink image */ half *pixels = (half*)tex_img.resize(1, 1); diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cca71a6bb93..1dc4bf180f8 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -109,14 +109,12 @@ private: bool file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components); - template<typename T> - bool file_load_byte_image(Image *img, ImageDataType type, device_vector<T>& tex_img); - - template<typename T> - bool file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img); - - template<typename T> - bool file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img); + template<TypeDesc::BASETYPE FileFormat, + typename StorageType, + typename DeviceType> + bool file_load_image(Image *img, + ImageDataType type, + device_vector<DeviceType>& tex_img); int type_index_to_flattened_slot(int slot, ImageDataType type); int flattened_slot_to_type_index(int flat_slot, ImageDataType *type); diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index c43d646f515..2245c861d5a 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -106,6 +106,7 @@ NODE_DEFINE(Light) static NodeEnum type_enum; type_enum.insert("point", LIGHT_POINT); + type_enum.insert("distant", LIGHT_DISTANT); type_enum.insert("background", LIGHT_BACKGROUND); type_enum.insert("area", LIGHT_AREA); type_enum.insert("spot", LIGHT_SPOT); diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index 1d1e2963348..433e41fbbb6 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -39,7 +39,7 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) /* Float atomics implementation credits: * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html */ -ccl_device_inline void atomic_add_float(volatile ccl_global float *source, +ccl_device_inline void atomic_add_and_fetch_float(volatile ccl_global float *source, const float operand) { union { diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 62ef8fc0b48..5df262fcbbb 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -757,9 +757,9 @@ uint64_t path_modified_time(const string& path) { path_stat_t st; if(path_stat(path, &st) != 0) { - return st.st_mtime; + return 0; } - return 0; + return st.st_mtime; } bool path_remove(const string& path) diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h index b970b017270..c21a8488c81 100644 --- a/intern/cycles/util/util_stats.h +++ b/intern/cycles/util/util_stats.h @@ -29,13 +29,13 @@ public: explicit Stats(static_init_t) {} void mem_alloc(size_t size) { - atomic_add_z(&mem_used, size); + atomic_add_and_fetch_z(&mem_used, size); atomic_update_max_z(&mem_peak, mem_used); } void mem_free(size_t size) { assert(mem_used >= size); - atomic_sub_z(&mem_used, size); + atomic_sub_and_fetch_z(&mem_used, size); } size_t mem_used; diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c index 1933e9d3ee3..76b7e072321 100644 --- a/intern/guardedalloc/intern/mallocn_guarded_impl.c +++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c @@ -505,8 +505,8 @@ static void make_memhead_header(MemHead *memh, size_t len, const char *str) memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len); memt->tag3 = MEMTAG3; - atomic_add_u(&totblock, 1); - atomic_add_z(&mem_in_use, len); + atomic_add_and_fetch_u(&totblock, 1); + atomic_add_and_fetch_z(&mem_in_use, len); mem_lock_thread(); addtail(membase, &memh->next); @@ -638,7 +638,7 @@ void *MEM_guarded_mapallocN(size_t len, const char *str) if (memh != (MemHead *)-1) { make_memhead_header(memh, len, str); memh->mmap = 1; - atomic_add_z(&mmap_in_use, len); + atomic_add_and_fetch_z(&mmap_in_use, len); mem_lock_thread(); peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem; mem_unlock_thread(); @@ -1007,8 +1007,8 @@ static void rem_memblock(MemHead *memh) } mem_unlock_thread(); - atomic_sub_u(&totblock, 1); - atomic_sub_z(&mem_in_use, memh->len); + atomic_sub_and_fetch_u(&totblock, 1); + atomic_sub_and_fetch_z(&mem_in_use, memh->len); #ifdef DEBUG_MEMDUPLINAME if (memh->need_free_name) @@ -1016,7 +1016,7 @@ static void rem_memblock(MemHead *memh) #endif if (memh->mmap) { - atomic_sub_z(&mmap_in_use, memh->len); + atomic_sub_and_fetch_z(&mmap_in_use, memh->len); #if defined(WIN32) /* our windows mmap implementation is not thread safe */ mem_lock_thread(); diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c index a80d67c3e80..ce8a5b29ece 100644 --- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c +++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c @@ -142,11 +142,11 @@ void MEM_lockfree_freeN(void *vmemh) return; } - atomic_sub_u(&totblock, 1); - atomic_sub_z(&mem_in_use, len); + atomic_sub_and_fetch_u(&totblock, 1); + atomic_sub_and_fetch_z(&mem_in_use, len); if (MEMHEAD_IS_MMAP(memh)) { - atomic_sub_z(&mmap_in_use, len); + atomic_sub_and_fetch_z(&mmap_in_use, len); #if defined(WIN32) /* our windows mmap implementation is not thread safe */ mem_lock_thread(); @@ -287,8 +287,8 @@ void *MEM_lockfree_callocN(size_t len, const char *str) if (LIKELY(memh)) { memh->len = len; - atomic_add_u(&totblock, 1); - atomic_add_z(&mem_in_use, len); + atomic_add_and_fetch_u(&totblock, 1); + atomic_add_and_fetch_z(&mem_in_use, len); update_maximum(&peak_mem, mem_in_use); return PTR_FROM_MEMHEAD(memh); @@ -312,8 +312,8 @@ void *MEM_lockfree_mallocN(size_t len, const char *str) } memh->len = len; - atomic_add_u(&totblock, 1); - atomic_add_z(&mem_in_use, len); + atomic_add_and_fetch_u(&totblock, 1); + atomic_add_and_fetch_z(&mem_in_use, len); update_maximum(&peak_mem, mem_in_use); return PTR_FROM_MEMHEAD(memh); @@ -361,8 +361,8 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str memh->len = len | (size_t) MEMHEAD_ALIGN_FLAG; memh->alignment = (short) alignment; - atomic_add_u(&totblock, 1); - atomic_add_z(&mem_in_use, len); + atomic_add_and_fetch_u(&totblock, 1); + atomic_add_and_fetch_z(&mem_in_use, len); update_maximum(&peak_mem, mem_in_use); return PTR_FROM_MEMHEAD(memh); @@ -396,9 +396,9 @@ void *MEM_lockfree_mapallocN(size_t len, const char *str) if (memh != (MemHead *)-1) { memh->len = len | (size_t) MEMHEAD_MMAP_FLAG; - atomic_add_u(&totblock, 1); - atomic_add_z(&mem_in_use, len); - atomic_add_z(&mmap_in_use, len); + atomic_add_and_fetch_u(&totblock, 1); + atomic_add_and_fetch_z(&mem_in_use, len); + atomic_add_and_fetch_z(&mmap_in_use, len); update_maximum(&peak_mem, mem_in_use); update_maximum(&peak_mem, mmap_in_use); diff --git a/intern/iksolver/intern/IK_QSegment.h b/intern/iksolver/intern/IK_QSegment.h index 74f157aa763..247807dc5e0 100644 --- a/intern/iksolver/intern/IK_QSegment.h +++ b/intern/iksolver/intern/IK_QSegment.h @@ -60,6 +60,7 @@ class IK_QSegment { public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW virtual ~IK_QSegment(); // start: a user defined translation diff --git a/intern/iksolver/intern/IK_Solver.cpp b/intern/iksolver/intern/IK_Solver.cpp index cefb8c7ed7b..a00db4fa2f5 100644 --- a/intern/iksolver/intern/IK_Solver.cpp +++ b/intern/iksolver/intern/IK_Solver.cpp @@ -42,6 +42,7 @@ using namespace std; class IK_QSolver { public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW IK_QSolver() : root(NULL) { } |