diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-03-31 15:52:22 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-03-31 15:52:22 +0300 |
commit | 70fdf0fe37b37e798a5e116415c8699e3d652481 (patch) | |
tree | e3cc19476701686b9da48982df8b079101c00a5f /intern | |
parent | 0be064067a432da070add207a7f4a3201ce7f071 (diff) | |
parent | ff693959d89aeb7f763e0286f6fe63bdde20057c (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/CMakeLists.txt | 5 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh.cpp | 56 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh.h | 6 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 17 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_util.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/util/util_path.cpp | 9 |
7 files changed, 56 insertions, 46 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 31fa15a2042..806a8660e8c 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -22,6 +22,7 @@ if(WITH_CYCLES_NATIVE_ONLY) -DWITH_KERNEL_NATIVE ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + set(CYCLES_KERNEL_FLAGS "-march=native") elseif(NOT WITH_CPU_SSE) set(CXX_HAS_SSE FALSE) set(CXX_HAS_AVX FALSE) @@ -59,10 +60,13 @@ elseif(WIN32 AND MSVC) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox") + + set(CYCLES_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") elseif(CMAKE_COMPILER_IS_GNUCC) check_cxx_compiler_flag(-msse CXX_HAS_SSE) check_cxx_compiler_flag(-mavx CXX_HAS_AVX) check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2) + set(CYCLES_KERNEL_FLAGS "-ffast-math") if(CXX_HAS_SSE) set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse") set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse") @@ -79,6 +83,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") check_cxx_compiler_flag(-msse CXX_HAS_SSE) check_cxx_compiler_flag(-mavx CXX_HAS_AVX) check_cxx_compiler_flag(-mavx2 CXX_HAS_AVX2) + set(CYCLES_KERNEL_FLAGS "-ffast-math") if(CXX_HAS_SSE) set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2") set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3") diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 18e8c2bfdf2..58348d16746 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -67,7 +67,7 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects) if(params.use_qbvh) return new QBVH(params, objects); else - return new RegularBVH(params, objects); + return new BinaryBVH(params, objects); } /* Building */ @@ -427,13 +427,13 @@ static bool node_bvh_is_unaligned(const BVHNode *node) return node0->is_unaligned || node1->is_unaligned; } -RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& objects_) +BinaryBVH::BinaryBVH(const BVHParams& params_, const vector<Object*>& objects_) : BVH(params_, objects_) { } -void RegularBVH::pack_leaf(const BVHStackEntry& e, - const LeafNode *leaf) +void BinaryBVH::pack_leaf(const BVHStackEntry& e, + const LeafNode *leaf) { assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size()); float4 data[BVH_NODE_LEAF_SIZE]; @@ -456,9 +456,9 @@ void RegularBVH::pack_leaf(const BVHStackEntry& e, memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE); } -void RegularBVH::pack_inner(const BVHStackEntry& e, - const BVHStackEntry& e0, - const BVHStackEntry& e1) +void BinaryBVH::pack_inner(const BVHStackEntry& e, + const BVHStackEntry& e0, + const BVHStackEntry& e1) { if(e0.node->is_unaligned || e1.node->is_unaligned) { pack_unaligned_inner(e, e0, e1); @@ -467,9 +467,9 @@ void RegularBVH::pack_inner(const BVHStackEntry& e, } } -void RegularBVH::pack_aligned_inner(const BVHStackEntry& e, - const BVHStackEntry& e0, - const BVHStackEntry& e1) +void BinaryBVH::pack_aligned_inner(const BVHStackEntry& e, + const BVHStackEntry& e0, + const BVHStackEntry& e1) { pack_aligned_node(e.idx, e0.node->bounds, e1.node->bounds, @@ -477,11 +477,11 @@ void RegularBVH::pack_aligned_inner(const BVHStackEntry& e, e0.node->visibility, e1.node->visibility); } -void RegularBVH::pack_aligned_node(int idx, - const BoundBox& b0, - const BoundBox& b1, - int c0, int c1, - uint visibility0, uint visibility1) +void BinaryBVH::pack_aligned_node(int idx, + const BoundBox& b0, + const BoundBox& b1, + int c0, int c1, + uint visibility0, uint visibility1) { assert(idx + BVH_NODE_SIZE <= pack.nodes.size()); assert(c0 < 0 || c0 < pack.nodes.size()); @@ -508,9 +508,9 @@ void RegularBVH::pack_aligned_node(int idx, memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE); } -void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e, - const BVHStackEntry& e0, - const BVHStackEntry& e1) +void BinaryBVH::pack_unaligned_inner(const BVHStackEntry& e, + const BVHStackEntry& e0, + const BVHStackEntry& e1) { pack_unaligned_node(e.idx, e0.node->get_aligned_space(), @@ -521,13 +521,13 @@ void RegularBVH::pack_unaligned_inner(const BVHStackEntry& e, e0.node->visibility, e1.node->visibility); } -void RegularBVH::pack_unaligned_node(int idx, - const Transform& aligned_space0, - const Transform& aligned_space1, - const BoundBox& bounds0, - const BoundBox& bounds1, - int c0, int c1, - uint visibility0, uint visibility1) +void BinaryBVH::pack_unaligned_node(int idx, + const Transform& aligned_space0, + const Transform& aligned_space1, + const BoundBox& bounds0, + const BoundBox& bounds1, + int c0, int c1, + uint visibility0, uint visibility1) { assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size()); assert(c0 < 0 || c0 < pack.nodes.size()); @@ -553,7 +553,7 @@ void RegularBVH::pack_unaligned_node(int idx, memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE); } -void RegularBVH::pack_nodes(const BVHNode *root) +void BinaryBVH::pack_nodes(const BVHNode *root) { const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT); const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT); @@ -630,7 +630,7 @@ void RegularBVH::pack_nodes(const BVHNode *root) pack.root_index = (root->is_leaf())? -1: 0; } -void RegularBVH::refit_nodes() +void BinaryBVH::refit_nodes() { assert(!params.top_level); @@ -639,7 +639,7 @@ void RegularBVH::refit_nodes() refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility); } -void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility) +void BinaryBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility) { if(leaf) { assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size()); diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index 8fd849020fd..60bc62ee6e4 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -110,15 +110,15 @@ protected: virtual void refit_nodes() = 0; }; -/* Regular BVH +/* Binary BVH * * Typical BVH with each node having two children. */ -class RegularBVH : public BVH { +class BinaryBVH : public BVH { protected: /* constructor */ friend class BVH; - RegularBVH(const BVHParams& params, const vector<Object*>& objects); + BinaryBVH(const BVHParams& params, const vector<Object*>& objects); /* pack */ void pack_nodes(const BVHNode *root); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index ba3ca3c3e1e..606494f08ed 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -300,8 +300,8 @@ public: { const int cuda_version = cuewCompilerVersion(); const int machine = system_cpu_bits(); - const string kernel_path = path_get("source/kernel"); - const string include = path_dirname(kernel_path); + const string source_path = path_get("source"); + const string include_path = source_path; string cflags = string_printf("-m%d " "--ptxas-options=\"-v\" " "--use_fast_math " @@ -310,7 +310,7 @@ public: "-I\"%s\"", machine, cuda_version, - include.c_str()); + include_path.c_str()); if(use_adaptive_compilation()) { cflags += " " + requested_features.get_build_options(); } @@ -382,8 +382,8 @@ public: compile_kernel_get_common_cflags(requested_features, split); /* Try to use locally compiled kernel. */ - const string kernel_path = path_get("source/kernel"); - const string kernel_md5 = path_files_md5_hash(kernel_path); + const string source_path = path_get("source"); + const string kernel_md5 = path_files_md5_hash(source_path); /* We include cflags into md5 so changing cuda toolkit or changing other * compiler command line arguments makes sure cubin gets re-built. @@ -424,9 +424,10 @@ public: return ""; } const char *nvcc = cuewCompilerPath(); - const string kernel = path_join(kernel_path, - path_join("kernels", - path_join("cuda", split ? "kernel_split.cu" : "kernel.cu"))); + const string kernel = path_join( + path_join(source_path, "kernel"), + path_join("kernels", + path_join("cuda", split ? "kernel_split.cu" : "kernel.cu"))); double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 44845c944bf..8128fcee09b 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -235,7 +235,7 @@ string OpenCLCache::get_kernel_md5() thread_scoped_lock lock(self.kernel_md5_lock); if(self.kernel_md5.empty()) { - self.kernel_md5 = path_files_md5_hash(path_get("source/kernel")); + self.kernel_md5 = path_files_md5_hash(path_get("source")); } return self.kernel_md5; } @@ -443,8 +443,8 @@ void OpenCLDeviceBase::OpenCLProgram::load() add_log(string("OpenCL program ") + program_name + " not found in cache.", true); /* need to create source to get md5 */ - string source = "#include \"kernels/opencl/" + kernel_file + "\"\n"; - source = path_source_replace_includes(source, path_get("source/kernel")); + string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n"; + source = path_source_replace_includes(source, path_get("source")); string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source); basename = path_cache_get(path_join("kernels", basename)); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 54151f06d31..dbc2ba2503a 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -347,6 +347,9 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) +set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") +set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") + if(CXX_HAS_SSE) list(APPEND SRC kernels/cpu/kernel_sse2.cpp diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 5c00b0551d8..cd3067f7650 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -320,17 +320,18 @@ static char *path_specials(const string& sub) { static bool env_init = false; static char *env_shader_path; - static char *env_kernel_path; + static char *env_source_path; if(!env_init) { env_shader_path = getenv("CYCLES_SHADER_PATH"); - env_kernel_path = getenv("CYCLES_KERNEL_PATH"); + /* NOTE: It is KERNEL in env variable for compatibility reasons. */ + env_source_path = getenv("CYCLES_KERNEL_PATH"); env_init = true; } if(env_shader_path != NULL && sub == "shader") { return env_shader_path; } - else if(env_shader_path != NULL && sub == "kernel") { - return env_kernel_path; + else if(env_shader_path != NULL && sub == "source") { + return env_source_path; } return NULL; } |