diff options
author | Campbell Barton <ideasman42@gmail.com> | 2018-01-03 15:44:47 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2018-01-03 15:44:47 +0300 |
commit | be403891652a375e5a0ac61b493342ca6d39afb7 (patch) | |
tree | ef9637103db6d66c4b311cba5b705d575562a1f8 | |
parent | 060fdb49d64857ff1cbf9937420ed70b10b17086 (diff) | |
parent | cbc7aa80d49e3b36c9ecc0e27ec528b34c491fc1 (diff) |
Merge branch 'master' into blender2.8
29 files changed, 476 insertions, 120 deletions
diff --git a/build_files/build_environment/cmake/opensubdiv.cmake b/build_files/build_environment/cmake/opensubdiv.cmake index ca3a6983b22..930ef4e6ed3 100644 --- a/build_files/build_environment/cmake/opensubdiv.cmake +++ b/build_files/build_environment/cmake/opensubdiv.cmake @@ -43,7 +43,7 @@ if(WIN32) set(OPENSUBDIV_EXTRA_ARGS ${OPENSUBDIV_EXTRA_ARGS} -DNO_CUDA=${OPENSUBDIV_CUDA} - -DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/cl + -DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/CL -DCLEW_LIBRARY=${LIBDIR}/clew/lib/clew${LIBEXT} -DCUEW_INCLUDE_DIR=${LIBDIR}/cuew/include -DCUEW_LIBRARY=${LIBDIR}/cuew/lib/cuew${LIBEXT} @@ -54,6 +54,7 @@ else() ${OPENSUBDIV_EXTRA_ARGS} -DNO_CUDA=ON -DCUEW_INCLUDE_DIR=${LIBDIR}/cuew/include + -DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/CL -DCLEW_LIBRARY=${LIBDIR}/clew/lib/static/${LIBPREFIX}clew${LIBEXT} ) endif() diff --git a/build_files/build_environment/cmake/tbb.cmake b/build_files/build_environment/cmake/tbb.cmake index c4055d55648..fa5c9029ed0 100644 --- a/build_files/build_environment/cmake/tbb.cmake +++ b/build_files/build_environment/cmake/tbb.cmake @@ -23,6 +23,14 @@ set(TBB_EXTRA_ARGS -DTBB_BUILD_STATIC=On ) +if(TBB_VERSION MATCHES 2018) + set(TBB_VS_VERSION vs2013) +elseif(TBB_VERSION MATCHES 2017) + set(TBB_VS_VERSION vs2012) +else() + set(TBB_VS_VERSION vs2010) +endif() + # CMake script for TBB from https://github.com/wjakob/tbb/blob/master/CMakeLists.txt ExternalProject_Add(external_tbb URL ${TBB_URI} @@ -30,7 +38,7 @@ ExternalProject_Add(external_tbb URL_HASH MD5=${TBB_HASH} PREFIX ${BUILD_DIR}/tbb PATCH_COMMAND COMMAND ${CMAKE_COMMAND} -E copy ${PATCH_DIR}/cmakelists_tbb.txt ${BUILD_DIR}/tbb/src/external_tbb/CMakeLists.txt && - ${CMAKE_COMMAND} -E copy ${BUILD_DIR}/tbb/src/external_tbb/build/vs2010/version_string.ver ${BUILD_DIR}/tbb/src/external_tbb/src/tbb/version_string.ver + ${CMAKE_COMMAND} -E copy ${BUILD_DIR}/tbb/src/external_tbb/build/${TBB_VS_VERSION}/version_string.ver ${BUILD_DIR}/tbb/src/external_tbb/src/tbb/version_string.ver CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/tbb ${DEFAULT_CMAKE_FLAGS} ${TBB_EXTRA_ARGS} INSTALL_DIR ${LIBDIR}/tbb ) diff --git a/build_files/build_environment/cmake/versions.cmake b/build_files/build_environment/cmake/versions.cmake index e53beeddf4a..a4aed76ee5e 100644 --- a/build_files/build_environment/cmake/versions.cmake +++ b/build_files/build_environment/cmake/versions.cmake @@ -137,9 +137,16 @@ set(PYTHON_SHORT_VERSION_NO_DOTS 36) set(PYTHON_URI https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz) set(PYTHON_HASH 2c68846471994897278364fc18730dd9) -set(TBB_VERSION 44_20160128) -set(TBB_URI https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb${TBB_VERSION}oss_src_0.tgz) -set(TBB_HASH 9d8a4cdf43496f1b3f7c473a5248e5cc) +if(UNIX AND NOT APPLE) + # Needed to be compatible with GCC 7, other platforms can upgrade later + set(TBB_VERSION 2017_U7) + set(TBB_URI https://github.com/01org/tbb/archive/${TBB_VERSION}.tar.gz) + set(TBB_HASH 364f2a4b80e978f38a69cbf7c466b898) +else() + set(TBB_VERSION 44_20160128) + set(TBB_URI https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb${TBB_VERSION}oss_src_0.tgz) + set(TBB_HASH 9d8a4cdf43496f1b3f7c473a5248e5cc) +endif() set(OPENVDB_VERSION 3.1.0) set(OPENVDB_URI https://github.com/dreamworksanimation/openvdb/archive/v${OPENVDB_VERSION}.tar.gz) diff --git a/build_files/build_environment/install_deps.sh b/build_files/build_environment/install_deps.sh index 07ccf0cf3a6..a3760910d25 100755 --- a/build_files/build_environment/install_deps.sh +++ b/build_files/build_environment/install_deps.sh @@ -753,8 +753,8 @@ OIIO_SOURCE=( "https://github.com/OpenImageIO/oiio/archive/Release-$OIIO_VERSION OIIO_SOURCE_REPO=( "https://github.com/OpenImageIO/oiio.git" ) OIIO_SOURCE_REPO_UID="c9e67275a0b248ead96152f6d2221cc0c0f278a4" -LLVM_SOURCE=( "http://llvm.org/releases/$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.gz" ) -LLVM_CLANG_SOURCE=( "http://llvm.org/releases/$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.gz" "http://llvm.org/releases/$LLVM_VERSION/cfe-$LLVM_VERSION.src.tar.gz" ) +LLVM_SOURCE=( "http://releases.llvm.org/$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.gz" ) +LLVM_CLANG_SOURCE=( "http://releases.llvm.org/$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.gz" "http://llvm.org/releases/$LLVM_VERSION/cfe-$LLVM_VERSION.src.tar.gz" ) OSL_USE_REPO=false OSL_SOURCE=( "https://github.com/imageworks/OpenShadingLanguage/archive/Release-$OSL_VERSION.tar.gz" ) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 47c09dfebf9..82460af3b17 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -37,6 +37,7 @@ # include <cudaGL.h> #endif #include "util/util_debug.h" +#include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_map.h" #include "util/util_md5.h" @@ -128,6 +129,12 @@ public: CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule, cuFilterModule; + size_t device_texture_headroom; + size_t device_working_headroom; + bool move_texture_to_host; + size_t map_host_used; + size_t map_host_limit; + int can_map_host; int cuDevId; int cuDevArchitecture; bool first_error; @@ -135,12 +142,15 @@ public: struct CUDAMem { CUDAMem() - : texobject(0), array(0) {} + : texobject(0), array(0), map_host_pointer(0), free_map_host(false) {} CUtexObject texobject; CUarray array; + void *map_host_pointer; + bool free_map_host; }; - map<device_memory*, CUDAMem> cuda_mem_map; + typedef map<device_memory*, CUDAMem> CUDAMemMap; + CUDAMemMap cuda_mem_map; struct PixelMem { GLuint cuPBO; @@ -240,6 +250,13 @@ public: need_texture_info = false; + device_texture_headroom = 0; + device_working_headroom = 0; + move_texture_to_host = false; + map_host_limit = 0; + map_host_used = 0; + can_map_host = 0; + /* Intialize CUDA. */ if(cuda_error(cuInit(0))) return; @@ -248,9 +265,16 @@ public: if(cuda_error(cuDeviceGet(&cuDevice, cuDevId))) return; - /* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, + /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. + * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, * so we can predict which memory to map to host. */ + cuda_assert(cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice)); + unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX; + if(can_map_host) { + ctx_flags |= CU_CTX_MAP_HOST; + init_host_memory(); + } /* Create context. */ CUresult result; @@ -611,6 +635,50 @@ public: VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; + +#if 0 + /* For testing mapped host memory, fill up device memory. */ + const size_t keep_mb = 1024; + + while(free_after > keep_mb * 1024 * 1024LL) { + CUdeviceptr tmp; + cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL)); + cuMemGetInfo(&free_after, &total); + } +#endif + } + + void init_host_memory() + { + /* Limit amount of host mapped memory, because allocating too much can + * cause system instability. Leave at least half or 4 GB of system + * memory free, whichever is smaller. */ + size_t default_limit = 4 * 1024 * 1024 * 1024LL; + size_t system_ram = system_physical_ram(); + + if(system_ram > 0) { + if(system_ram / 2 > default_limit) { + map_host_limit = system_ram - default_limit; + } + else { + map_host_limit = system_ram / 2; + } + } + else { + VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; + map_host_limit = 0; + } + + /* Amount of device memory to keep is free after texture memory + * and working memory allocations respectively. We set the working + * memory limit headroom lower so that some space is left after all + * texture memory allocations. */ + device_working_headroom = 32 * 1024 * 1024LL; // 32MB + device_texture_headroom = 128 * 1024 * 1024LL; // 128MB + + VLOG(1) << "Mapped host memory limit set to " + << string_human_readable_number(map_host_limit) << " bytes. (" + << string_human_readable_size(map_host_limit) << ")"; } void load_texture_info() @@ -621,20 +689,167 @@ public: } } - CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0) + void move_textures_to_host(size_t size, bool for_texture) + { + /* Signal to reallocate textures in host memory only. */ + move_texture_to_host = true; + + while(size > 0) { + /* Find suitable memory allocation to move. */ + device_memory *max_mem = NULL; + size_t max_size = 0; + bool max_is_image = false; + + foreach(CUDAMemMap::value_type& pair, cuda_mem_map) { + device_memory& mem = *pair.first; + CUDAMem *cmem = &pair.second; + + bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + /* Can't move this type of memory. */ + if(!is_texture || cmem->array) { + continue; + } + + /* Already in host memory. */ + if(cmem->map_host_pointer) { + continue; + } + + /* For other textures, only move image textures. */ + if(for_texture && !is_image) { + continue; + } + + /* Try to move largest allocation, prefer moving images. */ + if(is_image > max_is_image || + (is_image == max_is_image && mem.device_size > max_size)) { + max_is_image = is_image; + max_size = mem.device_size; + max_mem = &mem; + } + } + + /* Move to host memory. This part is mutex protected since + * multiple CUDA devices could be moving the memory. The + * first one will do it, and the rest will adopt the pointer. */ + if(max_mem) { + VLOG(1) << "Move memory from device to host: " << max_mem->name; + + static thread_mutex move_mutex; + thread_scoped_lock lock(move_mutex); + + /* Preserve the original device pointer, in case of multi device + * we can't change it because the pointer mapping would break. */ + device_ptr prev_pointer = max_mem->device_pointer; + size_t prev_size = max_mem->device_size; + + tex_free(*max_mem); + tex_alloc(*max_mem); + size = (max_size >= size)? 0: size - max_size; + + max_mem->device_pointer = prev_pointer; + max_mem->device_size = prev_size; + } + else { + break; + } + } + + /* Update texture info array with new pointers. */ + load_texture_info(); + + move_texture_to_host = false; + } + + CUDAMem *generic_alloc(device_memory& mem, size_t pitch_padding = 0) { CUDAContextScope scope(this); + CUdeviceptr device_pointer = 0; + size_t size = mem.memory_size() + pitch_padding; + + CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; + const char *status = ""; + + /* First try allocating in device memory, respecting headroom. We make + * an exception for texture info. It is small and frequently accessed, + * so treat it as working memory. + * + * If there is not enough room for working memory, we will try to move + * textures to host memory, assuming the performance impact would have + * been worse for working memory. */ + bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + size_t headroom = (is_texture)? device_texture_headroom: + device_working_headroom; + + size_t total = 0, free = 0; + cuMemGetInfo(&free, &total); + + /* Move textures to host memory if needed. */ + if(!move_texture_to_host && !is_image && (size + headroom) >= free) { + move_textures_to_host(size + headroom - free, is_texture); + cuMemGetInfo(&free, &total); + } + + /* Allocate in device memory. */ + if(!move_texture_to_host && (size + headroom) < free) { + mem_alloc_result = cuMemAlloc(&device_pointer, size); + if(mem_alloc_result == CUDA_SUCCESS) { + status = " in device memory"; + } + } + + /* Fall back to mapped host memory if needed and possible. */ + void *map_host_pointer = 0; + bool free_map_host = false; + + if(mem_alloc_result != CUDA_SUCCESS && can_map_host && + map_host_used + size < map_host_limit) { + if(mem.shared_pointer) { + /* Another device already allocated host memory. */ + mem_alloc_result = CUDA_SUCCESS; + map_host_pointer = mem.shared_pointer; + } + else { + /* Allocate host memory ourselves. */ + mem_alloc_result = cuMemHostAlloc(&map_host_pointer, size, + CU_MEMHOSTALLOC_DEVICEMAP | + CU_MEMHOSTALLOC_WRITECOMBINED); + mem.shared_pointer = map_host_pointer; + free_map_host = true; + } + + if(mem_alloc_result == CUDA_SUCCESS) { + cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0)); + map_host_used += size; + status = " in host memory"; + + /* Replace host pointer with our host allocation. Only works if + * CUDA memory layout is the same and has no pitch padding. */ + if(pitch_padding == 0 && mem.host_pointer && mem.host_pointer != mem.shared_pointer) { + memcpy(mem.shared_pointer, mem.host_pointer, size); + mem.host_free(); + mem.host_pointer = mem.shared_pointer; + } + } + } + + if(mem_alloc_result != CUDA_SUCCESS) { + cuda_assert(mem_alloc_result); + status = " failed, out of memory"; + } + if(mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; + << string_human_readable_size(mem.memory_size()) << ")" + << status; } - /* Allocate memory on device. */ - CUdeviceptr device_pointer = 0; - size_t size = mem.memory_size(); - cuda_assert(cuMemAlloc(&device_pointer, size + padding)); mem.device_pointer = (device_ptr)device_pointer; mem.device_size = size; stats.mem_alloc(size); @@ -645,14 +860,21 @@ public: /* Insert into map of allocations. */ CUDAMem *cmem = &cuda_mem_map[&mem]; + cmem->map_host_pointer = map_host_pointer; + cmem->free_map_host = free_map_host; return cmem; } void generic_copy_to(device_memory& mem) { - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { CUDAContextScope scope(this); - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); + + if(mem.host_pointer != mem.shared_pointer) { + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), + mem.host_pointer, + mem.memory_size())); + } } } @@ -660,8 +882,24 @@ public: { if(mem.device_pointer) { CUDAContextScope scope(this); + const CUDAMem& cmem = cuda_mem_map[&mem]; + + if(cmem.map_host_pointer) { + /* Free host memory. */ + if(cmem.free_map_host) { + cuMemFreeHost(cmem.map_host_pointer); + if(mem.host_pointer == mem.shared_pointer) { + mem.host_pointer = 0; + } + mem.shared_pointer = 0; + } - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); + map_host_used -= mem.device_size; + } + else { + /* Free device memory. */ + cuMemFree(mem.device_pointer); + } stats.mem_free(mem.device_size); mem.device_pointer = 0; @@ -715,11 +953,11 @@ public: size_t offset = elem*y*w; size_t size = elem*w*h; - if(mem.device_pointer) { + if(mem.host_pointer && mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); } - else { + else if(mem.host_pointer) { memset((char*)mem.host_pointer + offset, 0, size); } } @@ -735,7 +973,8 @@ public: memset(mem.host_pointer, 0, mem.memory_size()); } - if(mem.device_pointer) { + if(mem.device_pointer && + (!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) { CUDAContextScope scope(this); cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); } @@ -774,10 +1013,6 @@ public: { CUDAContextScope scope(this); - VLOG(1) << "Texture allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - /* Check if we are on sm_30 or above, for bindless textures. */ bool has_fermi_limits = info.has_fermi_limits; @@ -881,6 +1116,10 @@ public: desc.NumChannels = mem.data_elements; desc.Flags = 0; + VLOG(1) << "Array 3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + cuda_assert(cuArray3DCreate(&array_3d, &desc)); if(!array_3d) { @@ -1118,13 +1357,17 @@ public: int shift_stride = stride*h; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*2*num_shifts; + int mem_size = sizeof(float)*shift_stride*num_shifts; int channel_offset = 0; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts; + device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr; cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride)); @@ -1156,7 +1399,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuNLMNormalize; @@ -1225,10 +1468,14 @@ public: int num_shifts = (2*r+1)*(2*r+1); int mem_size = sizeof(float)*shift_stride*num_shifts; - CUdeviceptr temporary_mem; - cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size)); - CUdeviceptr difference = temporary_mem; - CUdeviceptr blurDifference = temporary_mem + mem_size; + device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); + temporary_mem.alloc_to_device(2*mem_size); + + if(have_error()) + return false; + + CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + mem_size; { CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; @@ -1268,7 +1515,7 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); } - cuMemFree(temporary_mem); + temporary_mem.free(); { CUfunction cuFinalize; diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 3ad0946330b..82598007a59 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -35,7 +35,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type) extension(EXTENSION_REPEAT), device(device), device_pointer(0), - host_pointer(0) + host_pointer(0), + shared_pointer(0) { } @@ -86,7 +87,7 @@ void device_memory::device_free() void device_memory::device_copy_to() { - if(data_size) { + if(host_pointer) { device->mem_copy_to(*this); } } diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 453dab9bfb3..2a027917066 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -197,10 +197,13 @@ public: Device *device; device_ptr device_pointer; void *host_pointer; + void *shared_pointer; virtual ~device_memory(); protected: + friend class CUDADevice; + /* Only create through subclasses. */ device_memory(Device *device, const char *name, MemoryType type); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 16238c14aa0..91507e6be0c 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -48,11 +48,17 @@ public: MultiDevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_), unique_key(1) { - Device *device; - foreach(DeviceInfo& subinfo, info.multi_devices) { - device = Device::create(subinfo, sub_stats_, background); - devices.push_back(SubDevice(device)); + Device *device = Device::create(subinfo, sub_stats_, background); + + /* Always add CPU devices at the back since GPU devices can change + * host memory pointers, which CPU uses as device pointer. */ + if(subinfo.type == DEVICE_CPU) { + devices.push_back(SubDevice(device)); + } + else { + devices.push_front(SubDevice(device)); + } } #ifdef WITH_NETWORK @@ -63,7 +69,7 @@ public: vector<string> servers = discovery.get_server_list(); foreach(string& server, servers) { - device = device_network_create(info, stats, server.c_str()); + Device *device = device_network_create(info, stats, server.c_str()); if(device) devices.push_back(SubDevice(device)); } diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 9ff02c1586b..d3dac5706d0 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -211,9 +211,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a break; } case NODE_CLOSURE_BSDF: - if(type == SHADER_TYPE_SURFACE) { - svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset); - } + svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset); break; case NODE_CLOSURE_EMISSION: svm_node_closure_emission(sd, stack, node); @@ -331,9 +329,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a break; # if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_CLOSURE_VOLUME: - if(type == SHADER_TYPE_VOLUME) { - svm_node_closure_volume(kg, sd, stack, node, path_flag); - } + svm_node_closure_volume(kg, sd, stack, node, type, path_flag); break; # endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ # ifdef __EXTRA_NODES__ diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index f04c46ef7f9..47ebe4288e3 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -56,7 +56,7 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t } } -ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset) +ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset) { uint type, param1_offset, param2_offset; @@ -67,8 +67,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* note we read this extra node before weight check, so offset is added */ uint4 data_node = read_node(kg, offset); - if(mix_weight == 0.0f) + /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ + if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) { + if(type == CLOSURE_BSDF_PRINCIPLED_ID) { + /* Read all principled BSDF extra data to get the right offset. */ + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + } + return; + } float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; @@ -835,9 +845,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * } } -ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag) +ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag) { #ifdef __VOLUME__ + /* Only sum extinction for volumes, variable is shared with surface transparency. */ + if(shader_type != SHADER_TYPE_VOLUME) { + return; + } + uint type, param1_offset, param2_offset; uint mix_weight_offset; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index aeb5d1c1316..1fef7a0188f 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -151,6 +151,10 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre progress.reset_sample(); progress.set_total_pixel_samples(total_pixel_samples); + /* needs to be up to date for baking specific AA samples */ + dscene->data.integrator.aa_samples = num_samples; + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); @@ -175,9 +179,6 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre d_output.zero_to_device(); d_input.copy_to_device(); - /* needs to be up to data for attribute access */ - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; task.shader_output = d_output.device_pointer; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index c6502df7252..89a44c7ce3c 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device() bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + float invsample = 1.0f/sample; float scale = invsample; bool variance = (offset == DENOISING_PASS_NORMAL_VAR) || @@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { + if(buffer.data() == NULL) { + return false; + } + int pass_offset = 0; for(size_t j = 0; j < params.passes.size(); j++) { diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 482442cce29..feaa17148ee 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device, /* Slot assignment */ int flat_slot = type_index_to_flattened_slot(slot, type); - string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); + img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); /* Free previous texture in slot. */ if(img->mem) { @@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device, /* Create new texture. */ if(type == IMAGE_DATA_TYPE_FLOAT4) { device_vector<float4> *tex_img - = new device_vector<float4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::FLOAT, float>(img, type, @@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_FLOAT) { device_vector<float> *tex_img - = new device_vector<float>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::FLOAT, float>(img, type, @@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE4) { device_vector<uchar4> *tex_img - = new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, @@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_BYTE) { device_vector<uchar> *tex_img - = new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::UINT8, uchar>(img, type, @@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF4) { device_vector<half4> *tex_img - = new device_vector<half4>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::HALF, half>(img, type, @@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device, } else if(type == IMAGE_DATA_TYPE_HALF) { device_vector<half> *tex_img - = new device_vector<half>(device, name.c_str(), MEM_TEXTURE); + = new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE); if(!file_load_image<TypeDesc::HALF, half>(img, type, diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index cc7c8544bed..3519a67bc05 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -111,6 +111,7 @@ public: InterpolationType interpolation; ExtensionType extension; + string mem_name; device_memory *mem; int users; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index aef7fc29573..d7143f24850 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *, void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene) { - if(scene->objects.size() == 0) { + if(dscene->objects.size() == 0) { return; } diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index a942d738b8a..9b1b9a60c30 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -292,5 +292,26 @@ bool system_cpu_support_avx2() #endif +size_t system_physical_ram() +{ +#ifdef _WIN32 + MEMORYSTATUSEX ram; + ram.dwLength = sizeof (ram); + GlobalMemoryStatusEx(&ram); + return ram.ullTotalPhys * 1024; +#elif defined(__APPLE__) + uint64_t ram = 0; + size_t len = sizeof(ram); + if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { + return ram; + } + return 0; +#else + size_t ps = sysconf(_SC_PAGESIZE); + size_t pn = sysconf(_SC_PHYS_PAGES); + return ps * pn; +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index db7a45b2d59..e55dd6dd136 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -42,6 +42,8 @@ bool system_cpu_support_sse41(); bool system_cpu_support_avx(); bool system_cpu_support_avx2(); +size_t system_physical_ram(); + CCL_NAMESPACE_END #endif /* __UTIL_SYSTEM_H__ */ diff --git a/release/scripts/startup/bl_operators/uvcalc_smart_project.py b/release/scripts/startup/bl_operators/uvcalc_smart_project.py index 411c318643b..25783653414 100644 --- a/release/scripts/startup/bl_operators/uvcalc_smart_project.py +++ b/release/scripts/startup/bl_operators/uvcalc_smart_project.py @@ -1067,6 +1067,7 @@ class SmartProject(Operator): island_margin = FloatProperty( name="Island Margin", description="Margin to reduce bleed from adjacent islands", + unit='LENGTH', subtype='DISTANCE', min=0.0, max=1.0, default=0.0, ) diff --git a/release/scripts/startup/bl_ui/properties_particle.py b/release/scripts/startup/bl_ui/properties_particle.py index fda3096a3f5..d52705bce6e 100644 --- a/release/scripts/startup/bl_ui/properties_particle.py +++ b/release/scripts/startup/bl_ui/properties_particle.py @@ -581,10 +581,6 @@ class PARTICLE_PT_physics(ParticleButtonsPanel, Panel): layout.row().prop(part, "physics_type", expand=True) row = layout.row() - col = row.column(align=True) - col.prop(part, "particle_size") - col.prop(part, "size_random", slider=True) - if part.physics_type != 'NO': col = row.column(align=True) col.prop(part, "mass") @@ -1088,7 +1084,8 @@ class PARTICLE_PT_render(ParticleButtonsPanel, Panel): col = row.column() col.label(text="") - if part.render_type in {'OBJECT', 'GROUP'} and not part.use_advanced_hair: + if part.type == 'EMITTER' or \ + (part.render_type in {'OBJECT', 'GROUP'} and part.type == 'HAIR' and not part.use_advanced_hair): row = layout.row(align=True) row.prop(part, "particle_size") row.prop(part, "size_random", slider=True) diff --git a/source/blender/blenkernel/BKE_subsurf.h b/source/blender/blenkernel/BKE_subsurf.h index 92170325113..d7b9d20d7b0 100644 --- a/source/blender/blenkernel/BKE_subsurf.h +++ b/source/blender/blenkernel/BKE_subsurf.h @@ -34,6 +34,9 @@ /* struct DerivedMesh is used directly */ #include "BKE_DerivedMesh.h" +/* Thread sync primitives used directly. */ +#include "BLI_threads.h" + struct CCGElem; struct DMFlagMat; struct DMGridAdjacency; @@ -140,6 +143,9 @@ typedef struct CCGDerivedMesh { } multires; struct EdgeHash *ehash; + + ThreadRWMutex loops_cache_rwlock; + ThreadRWMutex origindex_cache_rwlock; } CCGDerivedMesh; #ifdef WITH_OPENSUBDIV diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c index c21c16adc85..68283b4a3aa 100644 --- a/source/blender/blenkernel/intern/mesh_evaluate.c +++ b/source/blender/blenkernel/intern/mesh_evaluate.c @@ -173,10 +173,11 @@ typedef struct MeshCalcNormalsData { const MLoop *mloop; MVert *mverts; float (*pnors)[3]; + float (*lnors_weighted)[3]; float (*vnors)[3]; } MeshCalcNormalsData; -static void mesh_calc_normals_poly_task_cb(void *userdata, const int pidx) +static void mesh_calc_normals_poly_cb(void *userdata, const int pidx) { MeshCalcNormalsData *data = userdata; const MPoly *mp = &data->mpolys[pidx]; @@ -184,7 +185,7 @@ static void mesh_calc_normals_poly_task_cb(void *userdata, const int pidx) BKE_mesh_calc_poly_normal(mp, data->mloop + mp->loopstart, data->mverts, data->pnors[pidx]); } -static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx) +static void mesh_calc_normals_poly_prepare_cb(void *userdata, const int pidx) { MeshCalcNormalsData *data = userdata; const MPoly *mp = &data->mpolys[pidx]; @@ -193,7 +194,7 @@ static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx) float pnor_temp[3]; float *pnor = data->pnors ? data->pnors[pidx] : pnor_temp; - float (*vnors)[3] = data->vnors; + float (*lnors_weighted)[3] = data->lnors_weighted; const int nverts = mp->totloop; float (*edgevecbuf)[3] = BLI_array_alloca(edgevecbuf, (size_t)nverts); @@ -220,42 +221,62 @@ static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx) v_prev = v_curr; } if (UNLIKELY(normalize_v3(pnor) == 0.0f)) { - pnor[2] = 1.0f; /* other axis set to 0.0 */ + pnor[2] = 1.0f; /* other axes set to 0.0 */ } } /* accumulate angle weighted face normal */ - /* inline version of #accumulate_vertex_normals_poly_v3 */ + /* inline version of #accumulate_vertex_normals_poly_v3, + * split between this threaded callback and #mesh_calc_normals_poly_accum_cb. */ { const float *prev_edge = edgevecbuf[nverts - 1]; for (i = 0; i < nverts; i++) { + const int lidx = mp->loopstart + i; const float *cur_edge = edgevecbuf[i]; /* calculate angle between the two poly edges incident on * this vertex */ const float fac = saacos(-dot_v3v3(cur_edge, prev_edge)); - /* accumulate */ - for (int k = 3; k--; ) { - atomic_add_and_fetch_fl(&vnors[ml[i].v][k], pnor[k] * fac); - } + /* Store for later accumulation */ + mul_v3_v3fl(lnors_weighted[lidx], pnor, fac); + prev_edge = cur_edge; } } +} + +static void mesh_calc_normals_poly_accum_cb(void *userdata, const int lidx) +{ + MeshCalcNormalsData *data = userdata; + + add_v3_v3(data->vnors[data->mloop[lidx].v], data->lnors_weighted[lidx]); +} + +static void mesh_calc_normals_poly_finalize_cb(void *userdata, const int vidx) +{ + MeshCalcNormalsData *data = userdata; + + MVert *mv = &data->mverts[vidx]; + float *no = data->vnors[vidx]; + + if (UNLIKELY(normalize_v3(no) == 0.0f)) { + /* following Mesh convention; we use vertex coordinate itself for normal in this case */ + normalize_v3_v3(no, mv->co); + } + normal_float_to_short_v3(mv->no, no); } void BKE_mesh_calc_normals_poly( MVert *mverts, float (*r_vertnors)[3], int numVerts, const MLoop *mloop, const MPoly *mpolys, - int UNUSED(numLoops), int numPolys, float (*r_polynors)[3], + int numLoops, int numPolys, float (*r_polynors)[3], const bool only_face_normals) { + const bool do_threaded = (numPolys > BKE_MESH_OMP_LIMIT); float (*pnors)[3] = r_polynors; - float (*vnors)[3] = r_vertnors; - bool free_vnors = false; - int i; if (only_face_normals) { BLI_assert((pnors != NULL) || (numPolys == 0)); @@ -265,10 +286,14 @@ void BKE_mesh_calc_normals_poly( .mpolys = mpolys, .mloop = mloop, .mverts = mverts, .pnors = pnors, }; - BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_task_cb, (numPolys > BKE_MESH_OMP_LIMIT)); + BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_cb, do_threaded); return; } + float (*vnors)[3] = r_vertnors; + float (*lnors_weighted)[3] = MEM_mallocN(sizeof(*lnors_weighted) * (size_t)numLoops, __func__); + bool free_vnors = false; + /* first go through and calculate normals for all the polys */ if (vnors == NULL) { vnors = MEM_callocN(sizeof(*vnors) * (size_t)numVerts, __func__); @@ -279,26 +304,23 @@ void BKE_mesh_calc_normals_poly( } MeshCalcNormalsData data = { - .mpolys = mpolys, .mloop = mloop, .mverts = mverts, .pnors = pnors, .vnors = vnors, + .mpolys = mpolys, .mloop = mloop, .mverts = mverts, + .pnors = pnors, .lnors_weighted = lnors_weighted, .vnors = vnors }; - BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_accum_task_cb, (numPolys > BKE_MESH_OMP_LIMIT)); - - for (i = 0; i < numVerts; i++) { - MVert *mv = &mverts[i]; - float *no = vnors[i]; + /* Compute poly normals, and prepare weighted loop normals. */ + BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, do_threaded); - if (UNLIKELY(normalize_v3(no) == 0.0f)) { - /* following Mesh convention; we use vertex coordinate itself for normal in this case */ - normalize_v3_v3(no, mv->co); - } + /* Actually accumulate weighted loop normals into vertex ones. */ + BLI_task_parallel_range(0, numLoops, &data, mesh_calc_normals_poly_accum_cb, do_threaded); - normal_float_to_short_v3(mv->no, no); - } + /* Normalize and validate computed vertex normals. */ + BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, do_threaded); if (free_vnors) { MEM_freeN(vnors); } + MEM_freeN(lnors_weighted); } void BKE_mesh_calc_normals(Mesh *mesh) diff --git a/source/blender/blenkernel/intern/sequencer.c b/source/blender/blenkernel/intern/sequencer.c index 2319d36ab16..ee11a9806f3 100644 --- a/source/blender/blenkernel/intern/sequencer.c +++ b/source/blender/blenkernel/intern/sequencer.c @@ -4501,8 +4501,10 @@ Sequence *BKE_sequencer_foreground_frame_get(Scene *scene, int frame) for (seq = ed->seqbasep->first; seq; seq = seq->next) { if (seq->flag & SEQ_MUTE || seq->startdisp > frame || seq->enddisp <= frame) continue; - /* only use elements you can see - not */ - if (ELEM(seq->type, SEQ_TYPE_IMAGE, SEQ_TYPE_META, SEQ_TYPE_SCENE, SEQ_TYPE_MOVIE, SEQ_TYPE_COLOR)) { + /* Only use strips that generate an image, not ones that combine + * other strips or apply some effect. */ + if (ELEM(seq->type, SEQ_TYPE_IMAGE, SEQ_TYPE_META, SEQ_TYPE_SCENE, + SEQ_TYPE_MOVIE, SEQ_TYPE_COLOR, SEQ_TYPE_TEXT)) { if (seq->machine > best_machine) { best_seq = seq; best_machine = seq->machine; diff --git a/source/blender/blenkernel/intern/subsurf_ccg.c b/source/blender/blenkernel/intern/subsurf_ccg.c index 2f9a7090caf..b2f859ad1f5 100644 --- a/source/blender/blenkernel/intern/subsurf_ccg.c +++ b/source/blender/blenkernel/intern/subsurf_ccg.c @@ -90,9 +90,6 @@ /* assumes MLoop's are layed out 4 for each poly, in order */ #define USE_LOOP_LAYOUT_FAST -static ThreadRWMutex loops_cache_rwlock = BLI_RWLOCK_INITIALIZER; -static ThreadRWMutex origindex_cache_rwlock = BLI_RWLOCK_INITIALIZER; - static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss, int drawInteriorEdges, int useSubsurfUv, @@ -1492,21 +1489,24 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop) /* DMFlagMat *faceFlags = ccgdm->faceFlags; */ /* UNUSED */ if (!ccgdm->ehash) { - BLI_rw_mutex_lock(&loops_cache_rwlock, THREAD_LOCK_WRITE); + BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_WRITE); if (!ccgdm->ehash) { MEdge *medge; + EdgeHash *ehash; - ccgdm->ehash = BLI_edgehash_new_ex(__func__, ccgdm->dm.numEdgeData); + ehash = BLI_edgehash_new_ex(__func__, ccgdm->dm.numEdgeData); medge = ccgdm->dm.getEdgeArray((DerivedMesh *)ccgdm); for (i = 0; i < ccgdm->dm.numEdgeData; i++) { - BLI_edgehash_insert(ccgdm->ehash, medge[i].v1, medge[i].v2, SET_INT_IN_POINTER(i)); + BLI_edgehash_insert(ehash, medge[i].v1, medge[i].v2, SET_INT_IN_POINTER(i)); } + + atomic_cas_ptr((void**)&ccgdm->ehash, ccgdm->ehash, ehash); } - BLI_rw_mutex_unlock(&loops_cache_rwlock); + BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock); } - BLI_rw_mutex_lock(&loops_cache_rwlock, THREAD_LOCK_READ); + BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_READ); totface = ccgSubSurf_getNumFaces(ss); mv = mloop; for (index = 0; index < totface; index++) { @@ -1549,7 +1549,7 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop) } } } - BLI_rw_mutex_unlock(&loops_cache_rwlock); + BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock); } static void ccgDM_copyFinalPolyArray(DerivedMesh *dm, MPoly *mpoly) @@ -3796,6 +3796,10 @@ static void ccgDM_release(DerivedMesh *dm) MEM_freeN(ccgdm->edgeMap); MEM_freeN(ccgdm->faceMap); } + + BLI_rw_mutex_end(&ccgdm->loops_cache_rwlock); + BLI_rw_mutex_end(&ccgdm->origindex_cache_rwlock); + MEM_freeN(ccgdm); } } @@ -3810,14 +3814,14 @@ static void *ccgDM_get_vert_data_layer(DerivedMesh *dm, int type) int a, index, totnone, totorig; /* Avoid re-creation if the layer exists already */ - BLI_rw_mutex_lock(&origindex_cache_rwlock, THREAD_LOCK_READ); + BLI_rw_mutex_lock(&ccgdm->origindex_cache_rwlock, THREAD_LOCK_READ); origindex = DM_get_vert_data_layer(dm, CD_ORIGINDEX); - BLI_rw_mutex_unlock(&origindex_cache_rwlock); + BLI_rw_mutex_unlock(&ccgdm->origindex_cache_rwlock); if (origindex) { return origindex; } - BLI_rw_mutex_lock(&origindex_cache_rwlock, THREAD_LOCK_WRITE); + BLI_rw_mutex_lock(&ccgdm->origindex_cache_rwlock, THREAD_LOCK_WRITE); DM_add_vert_layer(dm, CD_ORIGINDEX, CD_CALLOC, NULL); origindex = DM_get_vert_data_layer(dm, CD_ORIGINDEX); @@ -3832,7 +3836,7 @@ static void *ccgDM_get_vert_data_layer(DerivedMesh *dm, int type) CCGVert *v = ccgdm->vertMap[index].vert; origindex[a] = ccgDM_getVertMapIndex(ccgdm->ss, v); } - BLI_rw_mutex_unlock(&origindex_cache_rwlock); + BLI_rw_mutex_unlock(&ccgdm->origindex_cache_rwlock); return origindex; } @@ -4784,6 +4788,9 @@ static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss, ccgdm->dm.numLoopData = ccgdm->dm.numPolyData * 4; ccgdm->dm.numTessFaceData = 0; + BLI_rw_mutex_init(&ccgdm->loops_cache_rwlock); + BLI_rw_mutex_init(&ccgdm->origindex_cache_rwlock); + return ccgdm; } diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index eb7f186702b..afa20e3d766 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -1101,7 +1101,7 @@ static void task_parallel_range_ex( } task_scheduler = BLI_task_scheduler_get(); - task_pool = BLI_task_pool_create(task_scheduler, &state); + task_pool = BLI_task_pool_create_suspended(task_scheduler, &state); num_threads = BLI_task_scheduler_num_threads(task_scheduler); /* The idea here is to prevent creating task for each of the loop iterations @@ -1124,6 +1124,9 @@ static void task_parallel_range_ex( } num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size); + + /* NOTE: This way we are adding a memory barrier and ensure all worker + * threads can read and modify the value, without any locks. */ atomic_fetch_and_add_int32(&state.iter, 0); if (use_userdata_chunk) { @@ -1325,7 +1328,7 @@ void BLI_task_parallel_listbase( } task_scheduler = BLI_task_scheduler_get(); - task_pool = BLI_task_pool_create(task_scheduler, &state); + task_pool = BLI_task_pool_create_suspended(task_scheduler, &state); num_threads = BLI_task_scheduler_num_threads(task_scheduler); /* The idea here is to prevent creating task for each of the loop iterations @@ -1413,7 +1416,7 @@ void BLI_task_parallel_mempool( } task_scheduler = BLI_task_scheduler_get(); - task_pool = BLI_task_pool_create(task_scheduler, &state); + task_pool = BLI_task_pool_create_suspended(task_scheduler, &state); num_threads = BLI_task_scheduler_num_threads(task_scheduler); /* The idea here is to prevent creating task for each of the loop iterations diff --git a/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc b/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc index 60562641c93..3502ca69414 100644 --- a/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc +++ b/source/blender/depsgraph/intern/builder/deg_builder_nodes.cc @@ -985,7 +985,7 @@ void DepsgraphNodeBuilder::build_obdata_geom(Object *object) // TODO: "Done" operation - /* Cloyth modifier. */ + /* Cloth modifier. */ LINKLIST_FOREACH (ModifierData *, md, &object->modifiers) { if (md->type == eModifierType_Cloth) { build_cloth(object); diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations.cc b/source/blender/depsgraph/intern/builder/deg_builder_relations.cc index 3fb19adbb5c..10cf6f906de 100644 --- a/source/blender/depsgraph/intern/builder/deg_builder_relations.cc +++ b/source/blender/depsgraph/intern/builder/deg_builder_relations.cc @@ -923,7 +923,7 @@ void DepsgraphRelationBuilder::build_animdata(ID *id) /* Animation curves and NLA. */ build_animdata_curves(id); /* Drivers. */ - build_animdata_drievrs(id); + build_animdata_drivers(id); } void DepsgraphRelationBuilder::build_animdata_curves(ID *id) @@ -992,7 +992,7 @@ void DepsgraphRelationBuilder::build_animdata_curves_targets(ID *id) } } -void DepsgraphRelationBuilder::build_animdata_drievrs(ID *id) +void DepsgraphRelationBuilder::build_animdata_drivers(ID *id) { AnimData *adt = BKE_animdata_from_id(id); if (adt == NULL) { @@ -1922,7 +1922,8 @@ void DepsgraphRelationBuilder::build_gpencil(bGPdata *gpd) // TODO: parent object (when that feature is implemented) } -void DepsgraphRelationBuilder::build_cachefile(CacheFile *cache_file) { +void DepsgraphRelationBuilder::build_cachefile(CacheFile *cache_file) +{ /* Animation. */ build_animdata(&cache_file->id); } diff --git a/source/blender/depsgraph/intern/builder/deg_builder_relations.h b/source/blender/depsgraph/intern/builder/deg_builder_relations.h index 9f661b8e825..9227957adb4 100644 --- a/source/blender/depsgraph/intern/builder/deg_builder_relations.h +++ b/source/blender/depsgraph/intern/builder/deg_builder_relations.h @@ -206,7 +206,7 @@ struct DepsgraphRelationBuilder void build_animdata(ID *id); void build_animdata_curves(ID *id); void build_animdata_curves_targets(ID *id); - void build_animdata_drievrs(ID *id); + void build_animdata_drivers(ID *id); void build_driver(ID *id, FCurve *fcurve); void build_driver_data(ID *id, FCurve *fcurve); void build_driver_variables(ID *id, FCurve *fcurve); diff --git a/source/blender/editors/armature/armature_relations.c b/source/blender/editors/armature/armature_relations.c index 1d63b3aee43..ccaa9ecb8de 100644 --- a/source/blender/editors/armature/armature_relations.c +++ b/source/blender/editors/armature/armature_relations.c @@ -126,7 +126,7 @@ typedef struct tJoinArmature_AdtFixData { GHash *names_map; } tJoinArmature_AdtFixData; -/* Callback to pass to void BKE_animdata_main_cb() for fixing driver ID's to point to the new ID */ +/* Callback to pass to BKE_animdata_main_cb() for fixing driver ID's to point to the new ID */ /* FIXME: For now, we only care about drivers here. When editing rigs, it's very rare to have animation * on the rigs being edited already, so it should be safe to skip these. */ diff --git a/source/blender/editors/interface/interface_layout.c b/source/blender/editors/interface/interface_layout.c index d43b7511977..30a18ddc8bc 100644 --- a/source/blender/editors/interface/interface_layout.c +++ b/source/blender/editors/interface/interface_layout.c @@ -1711,7 +1711,7 @@ void ui_but_add_search(uiBut *but, PointerRNA *ptr, PropertyRNA *prop, PointerRN } else if (but->type == UI_BTYPE_SEARCH_MENU) { /* In case we fail to find proper searchprop, so other code might have already set but->type to search menu... */ - but->type = UI_BTYPE_LABEL; + but->flag |= UI_BUT_DISABLED; } } |