diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/CMakeLists.txt | 4 | ||||
-rw-r--r-- | intern/cycles/SConscript | 11 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 29 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/SConscript | 11 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_closures.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/shader.h | 9 | ||||
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_atomic.h | 33 | ||||
-rw-r--r-- | intern/cycles/util/util_stats.h | 9 |
10 files changed, 85 insertions, 40 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 7de1182282d..c8c71fe6856 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -163,6 +163,10 @@ include_directories( ${OPENEXR_INCLUDE_DIRS} ) +# TODO(sergey): Adjust so standalone repository is also happy. +include_directories( + ../atomic +) # Warnings if(CMAKE_COMPILER_IS_GNUCXX) diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index b399844534d..15a02881ec2 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -62,12 +62,23 @@ if env['WITH_BF_CYCLES_OSL']: if env['WITH_BF_CYCLES_DEBUG']: defs.append('WITH_CYCLES_DEBUG') +if env['WITH_BF_CYCLES_LOGGING']: + defs.append('WITH_CYCLES_LOGGING') + defs.append('GOOGLE_GLOG_DLL_DECL=') + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): + incs.append('#extern/libmv/third_party/glog/src/windows') + incs.append('#extern/libmv/third_party/gflags') + else: + incs.append('#extern/libmv/third_party/glog/src') + incs.append('#extern/libmv/third_party/gflags') + incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split()) incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna #source/blender/blenlib'.split()) incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split()) incs.append(env['BF_GLEW_INC']) incs.append('#/intern/glew-mx') +incs.append('#/intern/atomic') incs.append('#intern/mikktspace') incs.extend('#extern/glew/include #extern/clew/include #extern/cuew/include #intern/mikktspace'.split()) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index b3d4215e51b..05f45ab92af 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -202,24 +202,8 @@ public: /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); - string cubin; - /* ToDo: We don't bundle sm_52 kernel yet */ - if(major == 5 && minor == 2) { - if(experimental) - cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor)); - else - cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); - - if(path_exists(cubin)) - /* self build sm_52 kernel? Use it. */ - return cubin; - else - /* use 5.0 kernel as workaround */ - minor = 0; - } - /* attempt to use kernel provided with blender */ if(experimental) cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor)); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index ca75c261636..1595f7a715a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -539,34 +539,25 @@ typedef enum AttributeStandard { #define MAX_CLOSURE 1 #endif -/* TODO(sergey): This is rather nasty bug happening in here, which - * could be simply a compilers bug for which we can't find a generic - * platform independent workaround. Also even if it's a compiler - * issue, it's not so simple to upgrade the compiler in the release - * environment for Linux and doing it so closer to the release is - * rather a risky business. - * - * For this release it's probably safer to stick with such a rather - * dirty solution, and look for a cleaner fix during the next release - * cycle. +/* This struct is to be 16 bytes aligned, we also keep some extra precautions: + * - All the float3 members are in the beginning of the struct, so compiler + * does not put own pddings trying to align this members. + * - We make sure OSL pointer is also 16 bytes aligned. */ typedef struct ShaderClosure { - ClosureType type; float3 weight; -#ifndef __APPLE__ + float3 N; + float3 T; + + ClosureType type; float sample_weight; -#endif float data0; float data1; float data2; + int pad1, pad2, pad3; - float3 N; - float3 T; -#ifdef __APPLE__ - float sample_weight; -#endif #ifdef __OSL__ - void *prim; + void *prim, *pad4; #endif } ShaderClosure; diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript index d721edbaf6e..0a21d3e6819 100644 --- a/intern/cycles/kernel/osl/SConscript +++ b/intern/cycles/kernel/osl/SConscript @@ -38,6 +38,7 @@ incs.append(env['BF_OIIO_INC']) incs.append(env['BF_BOOST_INC']) incs.append(env['BF_OSL_INC']) incs.append(env['BF_OPENEXR_INC'].split()) +incs.append('#/intern/atomic') defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {') defs.append('CCL_NAMESPACE_END=}') @@ -46,6 +47,16 @@ defs.append('WITH_OSL') if env['WITH_BF_CYCLES_DEBUG']: defs.append('WITH_CYCLES_DEBUG') +if env['WITH_BF_CYCLES_LOGGING']: + defs.append('WITH_CYCLES_LOGGING') + defs.append('GOOGLE_GLOG_DLL_DECL=') + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): + incs.append('#extern/libmv/third_party/glog/src/windows') + incs.append('#extern/libmv/third_party/gflags') + else: + incs.append('#extern/libmv/third_party/glog/src') + incs.append('#extern/libmv/third_party/gflags') + if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'): cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID /fp:fast'.split()) incs.append(env['BF_PTHREADS_INC']) diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index bae9cb2f377..1d99f1d2682 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -189,7 +189,7 @@ static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, O /* optimization: it's possible to not use a prepare function at all and * only initialize the actual class when accessing the closure component * data, but then we need to map the id to the class somehow */ - ss->register_closure(name, id, params, prepare, NULL); + ss->register_closure(name, id, params, prepare, NULL, 16); } void OSLShader::register_closures(OSLShadingSystem *ss_) diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index b267731abe5..29c10ffa4f3 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -18,6 +18,15 @@ #define __SHADER_H__ #ifdef WITH_OSL +# if defined(_MSC_VER) +/* Prevent OSL from pollyting the context with weird macroses from windows.h. + * TODO(sergey): Ideally it's only enough to have class/struct declarations in + * the header and skip header include here. + */ +# define NOGDI +# define NOMINMAX +# define WIN32_LEAN_AND_MEAN +# endif # include <OSL/oslexec.h> #endif diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 295ebd24a5c..a07deb68b15 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -30,6 +30,7 @@ endif() set(SRC_HEADERS util_algorithm.h util_args.h + util_atomic.h util_boundbox.h util_cache.h util_debug.h diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h new file mode 100644 index 00000000000..1bbb0a86e23 --- /dev/null +++ b/intern/cycles/util/util_atomic.h @@ -0,0 +1,33 @@ +/* + * Copyright 2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#ifndef __UTIL_ATOMIC_H__ +#define __UTIL_ATOMIC_H__ + +/* Using atomic ops header from Blender. */ +#include "atomic_ops.h" + +ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) +{ + size_t prev_value = *maximum_value; + while (prev_value < value) { + if (atomic_cas_z(maximum_value, prev_value, value) != prev_value) { + break; + } + } +} + +#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h index 8758b823084..fe6c162366e 100644 --- a/intern/cycles/util/util_stats.h +++ b/intern/cycles/util/util_stats.h @@ -17,6 +17,8 @@ #ifndef __UTIL_STATS_H__ #define __UTIL_STATS_H__ +#include "util_atomic.h" + CCL_NAMESPACE_BEGIN class Stats { @@ -24,14 +26,13 @@ public: Stats() : mem_used(0), mem_peak(0) {} void mem_alloc(size_t size) { - mem_used += size; - if(mem_used > mem_peak) - mem_peak = mem_used; + atomic_add_z(&mem_used, size); + atomic_update_max_z(&mem_peak, mem_used); } void mem_free(size_t size) { assert(mem_used >= size); - mem_used -= size; + atomic_sub_z(&mem_used, size); } size_t mem_used; |