diff options
54 files changed, 2108 insertions, 267 deletions
diff --git a/build_files/buildbot/slave_compile.py b/build_files/buildbot/slave_compile.py index 5e06c7057ce..d763ddfb3e0 100644 --- a/build_files/buildbot/slave_compile.py +++ b/build_files/buildbot/slave_compile.py @@ -73,6 +73,7 @@ if 'cmake' in builder: if builder.endswith('x86_64_10_6_cmake'): cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64') cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/bin/nvcc') + cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc') elif builder.startswith('win'): if builder.endswith('_vc2015'): @@ -89,6 +90,8 @@ if 'cmake' in builder: elif builder.startswith('win32'): bits = 32 cmake_options.extend(['-G', 'Visual Studio 12 2013']) + cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v7.5/bin/nvcc.exe') + cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe') elif builder.startswith('linux'): tokens = builder.split("_") @@ -108,10 +111,14 @@ if 'cmake' in builder: cuda_chroot_name = 'buildbot_' + deb_name + '_x86_64' targets = ['player', 'blender', 'cuda'] + cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.5/bin/nvcc') + cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc') + cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file)) # Prepare CMake options needed to configure cuda binaries compilation. cuda_cmake_options.append("-DWITH_CYCLES_CUDA_BINARIES=%s" % ('ON' if build_cubins else 'OFF')) + cuda_cmake_options.append("-DCYCLES_CUDA_BINARIES_ARCH=sm_20;sm_21;sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61") if build_cubins or 'cuda' in targets: if bits == 32: cuda_cmake_options.append("-DCUDA_64_BIT_DEVICE_CODE=OFF") diff --git a/intern/audaspace/jack/AUD_JackLibrary.cpp b/intern/audaspace/jack/AUD_JackLibrary.cpp index 63306ee0b15..9ed6862bbb9 100644 --- a/intern/audaspace/jack/AUD_JackLibrary.cpp +++ b/intern/audaspace/jack/AUD_JackLibrary.cpp @@ -44,7 +44,20 @@ static bool jack_supported = false; void AUD_jack_init(void) { #ifdef WITH_JACK_DYNLOAD - jack_handle = dlopen("libjack.so", RTLD_LAZY); + const char *names[] = {"libjack.so", + "libjack.so.0", + "libjack.so.1", + "libjack.so.2", + NULL}; + int index = 0; + while (names[index] != NULL) { + jack_handle = dlopen(names[index], RTLD_LAZY); + if (jack_handle != NULL) { + // Found existing library. + break; + } + ++index; + } if (!jack_handle) { return; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 2d404918a38..6a511ea7316 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -249,121 +249,161 @@ public: return DebugFlags().cuda.adaptive_compile; } + /* Common NVCC flags which stays the same regardless of shading model, + * kernel sources md5 and only depends on compiler or compilation settings. + */ + string compile_kernel_get_common_cflags( + const DeviceRequestedFeatures& requested_features) + { + const int cuda_version = cuewCompilerVersion(); + const int machine = system_cpu_bits(); + const string kernel_path = path_get("kernel"); + const string include = kernel_path; + string cflags = string_printf("-m%d " + "--ptxas-options=\"-v\" " + "--use_fast_math " + "-DNVCC " + "-D__KERNEL_CUDA_VERSION__=%d " + "-I\"%s\"", + machine, + cuda_version, + include.c_str()); + if(use_adaptive_compilation()) { + cflags += " " + requested_features.get_build_options(); + } + const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); + if(extra_cflags) { + cflags += string(" ") + string(extra_cflags); + } +#ifdef WITH_CYCLES_DEBUG + cflags += " -D__KERNEL_DEBUG__"; +#endif + return cflags; + } + + bool compile_check_compiler() { + const char *nvcc = cuewCompilerPath(); + if(nvcc == NULL) { + cuda_error_message("CUDA nvcc compiler not found. " + "Install CUDA toolkit in default location."); + return false; + } + const int cuda_version = cuewCompilerVersion(); + VLOG(1) << "Found nvcc " << nvcc + << ", CUDA version " << cuda_version + << "."; + const int major = cuda_version / 10, minor = cuda_version & 10; + if(cuda_version == 0) { + cuda_error_message("CUDA nvcc compiler version could not be parsed."); + return false; + } + if(cuda_version < 60) { + printf("Unsupported CUDA version %d.%d detected, " + "you need CUDA 7.5 or newer.\n", + major, minor); + return false; + } + else if(cuda_version != 75 && cuda_version != 80) { + printf("CUDA version %d.%d detected, build may succeed but only " + "CUDA 7.5 and 8.0 are officially supported.\n", + major, minor); + } + return true; + } + string compile_kernel(const DeviceRequestedFeatures& requested_features) { /* Compute cubin name. */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); - string cubin; - - /* Adaptive Compile. - * If enabled, always use that */ - bool use_adaptive_compile = use_adaptive_compilation(); /* Attempt to use kernel provided with Blender. */ - if(!use_adaptive_compile) { - cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << cubin; + if(!use_adaptive_compilation()) { + const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", + major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; if(path_exists(cubin)) { - VLOG(1) << "Using precompiled kernel"; + VLOG(1) << "Using precompiled kernel."; return cubin; } } + const string common_cflags = + compile_kernel_get_common_cflags(requested_features); + /* Try to use locally compiled kernel. */ - string kernel_path = path_get("kernel"); - string md5 = path_files_md5_hash(kernel_path); - - string feature_build_options; - if(use_adaptive_compile) { - feature_build_options = requested_features.get_build_options(); - string device_md5 = util_md5_string(feature_build_options); - cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin", - device_md5.c_str(), - major, minor, - md5.c_str()); - } - else { - cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); - } + const string kernel_path = path_get("kernel"); + const string kernel_md5 = path_files_md5_hash(kernel_path); + + /* We include cflags into md5 so changing cuda toolkit or changing other + * compiler command line arguments makes sure cubin gets re-built. + */ + const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); - cubin = path_user_get(path_join("cache", cubin)); - VLOG(1) << "Testing for locally compiled kernel " << cubin; - /* If exists already, use it. */ + const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin", + major, minor, + cubin_md5.c_str()); + const string cubin = path_user_get(path_join("cache", cubin_file)); + VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; if(path_exists(cubin)) { - VLOG(1) << "Using locally compiled kernel"; + VLOG(1) << "Using locally compiled kernel."; return cubin; } #ifdef _WIN32 if(have_precompiled_kernels()) { - if(major < 2) - cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor)); - else - cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); + if(major < 2) { + cuda_error_message(string_printf( + "CUDA device requires compute capability 2.0 or up, " + "found %d.%d. Your GPU is not supported.", + major, minor)); + } + else { + cuda_error_message(string_printf( + "CUDA binary kernel for this graphics card compute " + "capability (%d.%d) not found.", + major, minor)); + } return ""; } #endif - /* If not, find CUDA compiler. */ - const char *nvcc = cuewCompilerPath(); - - if(nvcc == NULL) { - cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location."); - return ""; - } - - int cuda_version = cuewCompilerVersion(); - VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version; - - if(cuda_version == 0) { - cuda_error_message("CUDA nvcc compiler version could not be parsed."); - return ""; - } - if(cuda_version < 60) { - printf("Unsupported CUDA version %d.%d detected, you need CUDA 7.5.\n", cuda_version/10, cuda_version%10); + /* Compile. */ + if(!compile_check_compiler()) { return ""; } - else if(cuda_version != 75) - printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 is officially supported.\n", cuda_version/10, cuda_version%10); - - /* Compile. */ - string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu"))); - string include = kernel_path; - const int machine = system_cpu_bits(); - + const char *nvcc = cuewCompilerPath(); + const string kernel = path_join(kernel_path, + path_join("kernels", + path_join("cuda", "kernel.cu"))); double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); path_create_directories(cubin); - string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " - "-o \"%s\" --ptxas-options=\"-v\" --use_fast_math -I\"%s\" " - "-DNVCC -D__KERNEL_CUDA_VERSION__=%d", - nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); - - if(use_adaptive_compile) - command += " " + feature_build_options; - - const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); - if(extra_cflags) { - command += string(" ") + string(extra_cflags); - } - -#ifdef WITH_CYCLES_DEBUG - command += " -D__KERNEL_DEBUG__"; -#endif + string command = string_printf("\"%s\" " + "-arch=sm_%d%d " + "--cubin \"%s\" " + "-o \"%s\" " + "%s ", + nvcc, + major, minor, + kernel.c_str(), + cubin.c_str(), + common_cflags.c_str()); printf("%s\n", command.c_str()); if(system(command.c_str()) == -1) { - cuda_error_message("Failed to execute compilation command, see console for details."); + cuda_error_message("Failed to execute compilation command, " + "see console for details."); return ""; } /* Verify if compilation succeeded */ if(!path_exists(cubin)) { - cuda_error_message("CUDA kernel compilation failed, see console for details."); + cuda_error_message("CUDA kernel compilation failed, " + "see console for details."); return ""; } @@ -964,11 +1004,11 @@ public: if(!background) { PixelMem pmem = pixel_mem_map[mem]; CUdeviceptr buffer; - + size_t bytes; cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); - + return buffer; } @@ -1000,9 +1040,9 @@ public: glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW); else glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW); - + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - + glGenTextures(1, &pmem.cuTexId); glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); if(mem.data_type == TYPE_HALF) @@ -1012,7 +1052,7 @@ public: glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); - + CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); if(result == CUDA_SUCCESS) { @@ -1114,9 +1154,9 @@ public: else glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - + glEnable(GL_TEXTURE_2D); - + if(transparent) { glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); @@ -1181,7 +1221,7 @@ public: if(transparent) glDisable(GL_BLEND); - + glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); @@ -1197,12 +1237,12 @@ public: { if(task->type == DeviceTask::PATH_TRACE) { RenderTile tile; - + bool branched = task->integrator_branched; /* Upload Bindless Mapping */ load_bindless_mapping(); - + /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { int start_sample = tile.start_sample; @@ -1339,7 +1379,7 @@ void device_cuda_info(vector<DeviceInfo>& devices) fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result)); return; } - + vector<DeviceInfo> display_devices; for(int num = 0; num < count; num++) { diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 28695a2ebf9..f4d154ca19e 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -248,12 +248,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_debug_flags "") endif() - set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") + set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE}) + set(cuda_nvcc_version ${CUDA_VERSION}) + + if(DEFINED CUDA_NVCC8_EXECUTABLE AND ((${arch} STREQUAL "sm_60") OR (${arch} STREQUAL "sm_61"))) + set(cuda_nvcc_command ${CUDA_NVCC8_EXECUTABLE}) + set(cuda_nvcc_version "80") + endif() + + set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") set(cuda_math_flags "--use_fast_math") add_custom_command( OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} + COMMAND ${cuda_nvcc_command} -arch=${arch} ${CUDA_NVCC_FLAGS} -m${CUDA_BITS} @@ -270,7 +278,6 @@ if(WITH_CYCLES_CUDA_BINARIES) -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC - DEPENDS ${cuda_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) @@ -278,6 +285,9 @@ if(WITH_CYCLES_CUDA_BINARIES) unset(cuda_extra_flags) unset(cuda_debug_flags) + + unset(cuda_nvcc_command) + unset(cuda_nvcc_version) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index 1d6fa303d3e..e9eeff31ecc 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -37,11 +37,16 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - uint *num_hits) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + uint *num_hits) { /* todo: * - likely and unlikely for if() statements diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h index 18978efcfa3..d9623c94b2e 100644 --- a/intern/cycles/kernel/bvh/bvh_subsurface.h +++ b/intern/cycles/kernel/bvh/bvh_subsurface.h @@ -35,12 +35,17 @@ * */ -ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - SubsurfaceIntersection *ss_isect, - int subsurface_object, - uint *lcg_state, - int max_hits) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + SubsurfaceIntersection *ss_isect, + int subsurface_object, + uint *lcg_state, + int max_hits) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 68a11b65ad7..b1a52968a26 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -40,16 +40,21 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, - float difl, - float extmax + , uint *lcg_state, + float difl, + float extmax #endif - ) + ) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h index 03499e94347..107373c17dc 100644 --- a/intern/cycles/kernel/bvh/bvh_volume.h +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -36,10 +36,15 @@ * */ -ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h index 7eddc2891d0..1f6515c9862 100644 --- a/intern/cycles/kernel/bvh/bvh_volume_all.h +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -36,11 +36,16 @@ * */ -ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) +#ifndef __KERNEL_GPU__ +ccl_device +#else +ccl_device_inline +#endif +uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) { /* todo: * - test if pushing distance on the stack helps (for non shadow rays) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 72096f4d873..19cb8a04d48 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -40,7 +40,15 @@ CCL_NAMESPACE_BEGIN -ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device_inline int bsdf_sample(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { int label; @@ -157,7 +165,16 @@ ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosur return label; } -ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_inline +#endif +float3 bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + const float3 omega_in, + float *pdf) { float3 eval; diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 8ed76bea525..9929246ae5c 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -62,7 +62,11 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough return 2.0f / (roughness*roughness) - 2.0f; } -ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device_inline float3 bsdf_ashikhmin_shirley_eval_reflect( + const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; float3 N = bsdf->N; diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h index afd4a8da62a..6ebe2f6a751 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h @@ -25,11 +25,18 @@ * energy is used. In combination with MIS, that is enough to produce an unbiased result, although * the balance heuristic isn't necessarily optimal anymore. */ -ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state +ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)( + float3 wi, + float3 wo, + const bool wo_outside, + const float3 color, + const float alpha_x, + const float alpha_y, + ccl_addr_space uint *lcg_state #ifdef MF_MULTI_GLASS - , const float eta + , const float eta #elif defined(MF_MULTI_GLOSSY) - , float3 *n, float3 *k + , float3 *n, float3 *k #endif ) { diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index e31d790dd84..b713ff56b2f 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -143,7 +143,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r) } /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ -ccl_device float bssrdf_cubic_quintic_root_find(float xi) +ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi) { /* newton-raphson iteration, usually succeeds in 2-4 iterations, except * outside 0.02 ... 0.98 where it can go up to 10, so overall performance @@ -257,7 +257,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r) * Returns scaled radius, meaning the result is to be scaled up by d. * Since there's no closed form solution we do Newton-Raphson method to find it. */ -ccl_device float bssrdf_burley_root_find(float xi) +ccl_device_inline float bssrdf_burley_root_find(float xi) { const float tolerance = 1e-6f; const int max_iteration_count = 10; @@ -403,7 +403,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float bssrdf_burley_sample(sc, xi, r, h); } -ccl_device float bssrdf_pdf(const ShaderClosure *sc, float r) +ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r) { if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) return bssrdf_cubic_pdf(sc, r); diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 44734d1b70d..b16f0c9a99b 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -23,7 +23,11 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ -ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + AttributeElement elem, + int offset, + float *dx, float *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) @@ -48,7 +52,12 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData * } } -ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + AttributeElement elem, + int offset, + float3 *dx, + float3 *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) @@ -75,7 +84,7 @@ ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData /* Default UV coordinate */ -ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { AttributeElement elem_uv; int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv); @@ -144,7 +153,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) /* Motion vector for motion pass */ -ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) +ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { /* center position */ float3 center; diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index e4597aba56e..bf9be182345 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim)); + return (ccl_fetch(sd, prim) != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim)) : ~0; } /* UV coords of triangle within patch */ diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 2044aafc877..7c8182bc430 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -44,7 +44,9 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z) } #endif /* __KERNEL_GPU__ */ -ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) +ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, + const ShaderData *sd, + float3 P) { /* todo: optimize this so it's just a single matrix multiplication when * possible (not motion blur), or perhaps even just translation + scale */ diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 9ee0b09529e..bfbf73df54f 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN #ifdef __BAKING__ -ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng, - int pass_filter, int sample) +ccl_device_inline void compute_light_pass(KernelGlobals *kg, + ShaderData *sd, + PathRadiance *L, + RNG rng, + int pass_filter, + int sample) { /* initialize master radiance accumulator */ kernel_assert(kernel_data.film.use_light_pass); diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index f6c103d59dd..88514de514c 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -211,7 +211,10 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl /* Panorama Camera */ -ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) +ccl_device_inline void camera_sample_panorama(KernelGlobals *kg, + float raster_x, float raster_y, + float lens_u, float lens_v, + ccl_addr_space Ray *ray) { Transform rastertocamera = kernel_data.cam.rastertocamera; float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); @@ -303,8 +306,12 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float /* Common */ -ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v, - float lens_u, float lens_v, float time, ccl_addr_space Ray *ray) +ccl_device_inline void camera_sample(KernelGlobals *kg, + int x, int y, + float filter_u, float filter_v, + float lens_u, float lens_v, + float time, + ccl_addr_space Ray *ray) { /* pixel filter */ int filter_table_offset = kernel_data.film.filter_table_offset; diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 08f6f457805..a039b414006 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -36,7 +36,11 @@ /* Qualifier wrappers for different names on different devices */ #define ccl_device __device__ __inline__ -#define ccl_device_inline __device__ __inline__ +#if (__KERNEL_CUDA_VERSION__ == 80) && (__CUDA_ARCH__ < 500) +# define ccl_device_inline __device__ __forceinline__ +#else +# define ccl_device_inline __device__ __inline__ +#endif #define ccl_device_noinline __device__ __noinline__ #define ccl_global #define ccl_constant diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 93c4bd3f7d5..1e829eaa1fa 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -44,11 +44,11 @@ typedef struct LightSample { * * Note: light_p is modified when sample_coord is true. */ -ccl_device float area_light_sample(float3 P, - float3 *light_p, - float3 axisu, float3 axisv, - float randu, float randv, - bool sample_coord) +ccl_device_inline float area_light_sample(float3 P, + float3 *light_p, + float3 axisu, float3 axisv, + float randu, float randv, + bool sample_coord) { /* In our name system we're using P for the center, * which is o in the paper. @@ -268,11 +268,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals return false; } -ccl_device float background_portal_pdf(KernelGlobals *kg, - float3 P, - float3 direction, - int ignore_portal, - bool *is_possible) +ccl_device_inline float background_portal_pdf(KernelGlobals *kg, + float3 P, + float3 direction, + int ignore_portal, + bool *is_possible) { float portal_pdf = 0.0f; @@ -367,7 +367,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg, return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf) +ccl_device_inline float3 background_light_sample(KernelGlobals *kg, + float3 P, + float randu, float randv, + float *pdf) { /* Probability of sampling portals instead of the map. */ float portal_sampling_pdf = kernel_data.integrator.portal_pdf; @@ -507,8 +510,11 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 return t*t/cos_pi; } -ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, - float randu, float randv, float3 P, LightSample *ls) +ccl_device_inline void lamp_light_sample(KernelGlobals *kg, + int lamp, + float randu, float randv, + float3 P, + LightSample *ls) { float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index d5b31037723..1f08f3459e6 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -436,7 +436,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, #ifdef __SUBSURFACE__ -ccl_device bool kernel_path_subsurface_scatter( +ccl_device_inline bool kernel_path_subsurface_scatter( KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 56516967d8f..64f1468eacf 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ -ccl_device void kernel_branched_path_ao(KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - PathState *state, - RNG *rng, - float3 throughput) +ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + PathState *state, + RNG *rng, + float3 throughput) { int num_samples = kernel_data.integrator.ao_samples; float num_samples_inv = 1.0f/num_samples; diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 74b1ae0ca32..250b8e92a45 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -222,8 +222,13 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ #endif /* path tracing: bounce off or through surface to with new direction stored in ray */ -ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng, - ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray) +ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, + ccl_addr_space RNG *rng, + ShaderData *sd, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadiance *L, + ccl_addr_space Ray *ray) { /* no BSDF? we can stop here */ if(ccl_fetch(sd, flag) & SD_BSDF) { diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index e45522a4641..5fd4f2fad4c 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -18,8 +18,14 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME_SCATTER__ -ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, - ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L) +ccl_device_inline void kernel_path_volume_connect_light( + KernelGlobals *kg, + RNG *rng, + ShaderData *sd, + ShaderData *emission_sd, + float3 throughput, + PathState *state, + PathRadiance *L) { #ifdef __EMISSION__ if(!kernel_data.integrator.use_direct_light) diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index 8be6742699a..3437d83ed7d 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -130,7 +130,10 @@ ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float w return make_float2(u, v); } -ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height) +ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v, + float lens, + float fov, + float width, float height) { u = (u - 0.5f) * width; v = (v - 0.5f) * height; @@ -189,7 +192,7 @@ ccl_device float2 direction_to_mirrorball(float3 dir) return make_float2(u, v); } -ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v) +ccl_device_inline float3 panorama_to_direction(KernelGlobals *kg, float u, float v) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: @@ -205,7 +208,7 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v) } } -ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir) +ccl_device_inline float2 direction_to_panorama(KernelGlobals *kg, float3 dir) { switch(kernel_data.cam.panorama_type) { case PANORAMA_EQUIRECTANGULAR: @@ -221,9 +224,9 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir) } } -ccl_device float3 spherical_stereo_position(KernelGlobals *kg, - float3 dir, - float3 pos) +ccl_device_inline float3 spherical_stereo_position(KernelGlobals *kg, + float3 dir, + float3 pos) { float interocular_offset = kernel_data.cam.interocular_offset; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b7641c37d93..98d321c9c16 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -149,8 +149,16 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, /* ShaderData setup from BSSRDF scatter */ #ifdef __SUBSURFACE__ -ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd, - const Intersection *isect, const Ray *ray) +# ifndef __KERNEL_CUDS__ +ccl_device +# else +ccl_device_inline +# endif +void shader_setup_from_subsurface( + KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { bool backfacing = sd->flag & SD_BACKFACING; @@ -226,14 +234,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* ShaderData setup from position sampled on mesh */ -ccl_device void shader_setup_from_sample(KernelGlobals *kg, - ShaderData *sd, - const float3 P, - const float3 Ng, - const float3 I, - int shader, int object, int prim, - float u, float v, float t, - float time) +ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, + ShaderData *sd, + const float3 P, + const float3 Ng, + const float3 I, + int shader, int object, int prim, + float u, float v, float t, + float time) { /* vectors */ ccl_fetch(sd, P) = P; @@ -445,7 +453,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s /* Merging */ #if defined(__BRANCHED_PATH__) || defined(__VOLUME__) -ccl_device void shader_merge_closures(ShaderData *sd) +ccl_device_inline void shader_merge_closures(ShaderData *sd) { /* merge identical closures, better when we sample a single closure at a time */ for(int i = 0; i < sd->num_closure; i++) { @@ -530,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, } #endif -ccl_device void shader_bsdf_eval(KernelGlobals *kg, - ShaderData *sd, - const float3 omega_in, - BsdfEval *eval, - float light_pdf, - bool use_mis) + +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_inline +#endif +void shader_bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const float3 omega_in, + BsdfEval *eval, + float light_pdf, + bool use_mis) { bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); @@ -554,9 +568,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg, } } -ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd, - float randu, float randv, BsdfEval *bsdf_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, + ShaderData *sd, + float randu, float randv, + BsdfEval *bsdf_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { int sampled = 0; @@ -991,8 +1009,12 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData * /* Volume Evaluation */ -ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, - PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx) +ccl_device_inline void shader_eval_volume(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + VolumeStack *stack, + int path_flag, + ShaderContext ctx) { /* reset closures once at the start, we will be accumulating the closures * for all volumes in the stack into a single array of closures */ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 888df7f06fd..f404666177a 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -85,7 +85,11 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha return NULL; } -ccl_device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all) +ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd, + ShaderClosure *sc, + float disk_r, + float r, + bool all) { #ifdef BSSRDF_MULTI_EVAL /* this is the veach one-sample model with balance heuristic, some pdf @@ -231,7 +235,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, /* Subsurface scattering step, from a point on the surface to other * nearby points on the same object. */ -ccl_device int subsurface_scatter_multi_intersect( +ccl_device_inline int subsurface_scatter_multi_intersect( KernelGlobals *kg, SubsurfaceIntersection* ss_isect, ShaderData *sd, diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 01c87e6d89d..9dafed9afd1 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -36,7 +36,11 @@ typedef struct VolumeShaderCoefficients { } VolumeShaderCoefficients; /* evaluate shader to get extinction coefficient at P */ -ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction) +ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + float3 P, + float3 *extinction) { sd->P = P; shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); @@ -58,7 +62,11 @@ ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *s } /* evaluate shader to get absorption, scattering and emission at P */ -ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff) +ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + float3 P, + VolumeShaderCoefficients *coeff) { sd->P = P; shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME); diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 6c557684099..bd6013e9205 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } -ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { NodeAttributeType type, mesh_type; AttributeElement elem; @@ -123,7 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float * } } -ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +#ifndef __KERNEL_CUDS__ +ccl_device +#else +ccl_device_noinline +#endif +void svm_node_attr_bump_dy(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { NodeAttributeType type, mesh_type; AttributeElement elem; diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index bb06254c3a9..7d512f7ff4d 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN /* Geometry Node */ -ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device_inline void svm_node_geometry(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { float3 data; @@ -94,7 +98,11 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s /* Particle Info */ -ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_particle_info(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { switch(type) { case NODE_INFO_PAR_INDEX: { @@ -146,7 +154,11 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float /* Hair Info */ -ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_hair_info(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint type, + uint out_offset) { float data; float3 data3; diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index f959d90f309..368740f64c7 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg, + int offset, + float f, + bool interpolate, + bool extrapolate, + int table_size) { if((f < 0.0f || f > 1.0f) && extrapolate) { float4 t0, dy; diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h index 495d98cf250..9f2ce1276f9 100644 --- a/intern/cycles/kernel/svm/svm_ramp_util.h +++ b/intern/cycles/kernel/svm/svm_ramp_util.h @@ -21,11 +21,11 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device float3 rgb_ramp_lookup(const float3 *ramp, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp, + float f, + bool interpolate, + bool extrapolate, + int table_size) { if ((f < 0.0f || f > 1.0f) && extrapolate) { float3 t0, dy; diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 276b6f26f5e..b39d6a3e009 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, stack_store_float3(stack, out_offset, data); } -ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; @@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg, #endif } -ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, + ShaderData *sd, + int path_flag, + float *stack, + uint4 node, + int *offset) { #ifdef __RAY_DIFFERENTIALS__ float3 data; diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 30ccd523add..6eed9bc1a99 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device float wireframe(KernelGlobals *kg, - ShaderData *sd, - float size, - int pixel_size, - float3 *P) +ccl_device_inline float wireframe(KernelGlobals *kg, + ShaderData *sd, + float size, + int pixel_size, + float3 *P) { #ifdef __HAIR__ if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp index 073bafce98d..200a4c497cd 100644 --- a/intern/cycles/render/constant_fold.cpp +++ b/intern/cycles/render/constant_fold.cpp @@ -40,7 +40,8 @@ bool ConstantFolder::all_inputs_constant() const void ConstantFolder::make_constant(float value) const { - VLOG(1) << "Replacing " << node->name << " with constant " << value << "."; + VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant (" << value << ")."; + foreach(ShaderInput *sock, output->links) { sock->set(value); } @@ -50,6 +51,8 @@ void ConstantFolder::make_constant(float value) const void ConstantFolder::make_constant(float3 value) const { + VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant " << value << "."; + foreach(ShaderInput *sock, output->links) { sock->set(value); } @@ -90,6 +93,8 @@ void ConstantFolder::bypass(ShaderOutput *new_output) const { assert(new_output); + VLOG(1) << "Folding " << node->name << "::" << output->name() << " to socket " << new_output->parent->name << "::" << new_output->name() << "."; + /* Remove all outgoing links from socket and connect them to new_output instead. * The graph->relink method affects node inputs, so it's not safe to use in constant * folding if the node has multiple outputs and will thus be folded multiple times. */ @@ -105,6 +110,9 @@ void ConstantFolder::bypass(ShaderOutput *new_output) const void ConstantFolder::discard() const { assert(output->type() == SocketType::CLOSURE); + + VLOG(1) << "Discarding closure " << node->name << "."; + graph->disconnect(output); } diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 3eeb7ffc2bc..6e795ef896a 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -24,6 +24,7 @@ #include "util_debug.h" #include "util_foreach.h" #include "util_queue.h" +#include "util_logging.h" CCL_NAMESPACE_BEGIN @@ -543,6 +544,7 @@ void ShaderGraph::deduplicate_nodes() ShaderNodeSet scheduled, done; map<ustring, ShaderNodeSet> candidates; queue<ShaderNode*> traverse_queue; + int num_deduplicated = 0; /* Schedule nodes which doesn't have any dependencies. */ foreach(ShaderNode *node, nodes) { @@ -557,8 +559,10 @@ void ShaderGraph::deduplicate_nodes() traverse_queue.pop(); done.insert(node); /* Schedule the nodes which were depending on the current node. */ + bool has_output_links = false; foreach(ShaderOutput *output, node->outputs) { foreach(ShaderInput *input, output->links) { + has_output_links = true; if(scheduled.find(input->parent) != scheduled.end()) { /* Node might not be optimized yet but scheduled already * by other dependencies. No need to re-schedule it. @@ -572,6 +576,10 @@ void ShaderGraph::deduplicate_nodes() } } } + /* Only need to care about nodes that are actually used */ + if(!has_output_links) { + continue; + } /* Try to merge this node with another one. */ ShaderNode *merge_with = NULL; foreach(ShaderNode *other_node, candidates[node->type->name]) { @@ -585,11 +593,16 @@ void ShaderGraph::deduplicate_nodes() for(int i = 0; i < node->outputs.size(); ++i) { relink(node, node->outputs[i], merge_with->outputs[i]); } + num_deduplicated++; } else { candidates[node->type->name].insert(node); } } + + if(num_deduplicated > 0) { + VLOG(1) << "Deduplicated " << num_deduplicated << " nodes."; + } } void ShaderGraph::break_cycles(ShaderNode *node, vector<bool>& visited, vector<bool>& on_stack) diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index ae6042cef34..4cd77f8c6e1 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -238,14 +238,19 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen /* count */ size_t num_lights = 0; + size_t num_portals = 0; size_t num_background_lights = 0; size_t num_triangles = 0; bool background_mis = false; foreach(Light *light, scene->lights) { - if(light->is_enabled) + if(light->is_enabled) { num_lights++; + } + if(light->is_portal) { + num_portals++; + } } foreach(Object *object, scene->objects) { @@ -435,9 +440,9 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen device->tex_alloc("__light_distribution", dscene->light_distribution); /* Portals */ - if(num_background_lights > 0 && light_index != num_lights) { + if(num_portals > 0) { kintegrator->portal_offset = light_index; - kintegrator->num_portals = num_lights - light_index; + kintegrator->num_portals = num_portals; kintegrator->portal_pdf = background_mis? 0.5f: 1.0f; } else { @@ -601,10 +606,10 @@ void LightManager::device_update_points(Device *device, Scene *scene) { int num_scene_lights = scene->lights.size(); - int num_lights = 0; + int num_lights = 0; foreach(Light *light, scene->lights) { - if(light->is_enabled) { + if(light->is_enabled || light->is_portal) { num_lights++; } } diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index c377e899936..7caa801671d 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1689,6 +1689,19 @@ void ConvertNode::constant_fold(const ConstantFolder& folder) } } } + else { + ShaderInput *in = inputs[0]; + ShaderNode *prev = in->link->parent; + + /* no-op conversion of A to B to A */ + if(prev->type == node_types[to][from]) { + ShaderInput *prev_in = prev->inputs[0]; + + if(SocketType::is_float3(from) && (to == SocketType::FLOAT || SocketType::is_float3(to)) && prev_in->link) { + folder.bypass(prev_in->link); + } + } + } } void ConvertNode::compile(SVMCompiler& compiler) diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt index a6bcf980df2..80fe893826a 100644 --- a/intern/cycles/test/CMakeLists.txt +++ b/intern/cycles/test/CMakeLists.txt @@ -26,12 +26,12 @@ set(ALL_CYCLES_LIBRARIES cycles_device cycles_bvh cycles_graph - cycles_kernel_osl cycles_util ${OPENIMAGEIO_LIBRARIES} ) if(WITH_CYCLES_OSL) list(APPEND ALL_CYCLES_LIBRARIES + cycles_kernel_osl ${OSL_LIBRARIES} ${LLVM_LIBRARIES} ) diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp index 4566894d490..633e517ce9f 100644 --- a/intern/cycles/test/render_graph_finalize_test.cpp +++ b/intern/cycles/test/render_graph_finalize_test.cpp @@ -40,6 +40,7 @@ public: : name_(name) { node_ = new T(); + node_->name = name; } const string& name() const { @@ -59,6 +60,13 @@ public: return *this; } + template<typename T2, typename V> + ShaderNodeBuilder& set(V T2::*pfield, V value) + { + static_cast<T*>(node_)->*pfield = value; + return *this; + } + protected: string name_; ShaderNode *node_; @@ -69,6 +77,7 @@ public: explicit ShaderGraphBuilder(ShaderGraph *graph) : graph_(graph) { + node_map_["Output"] = graph->output(); } ShaderNode *find_node(const string& name) @@ -110,6 +119,35 @@ public: return *this; } + /* Common input/output boilerplate. */ + ShaderGraphBuilder& add_attribute(const string &name) + { + return (*this) + .add_node(ShaderNodeBuilder<AttributeNode>(name) + .set(&AttributeNode::attribute, ustring(name))); + } + + ShaderGraphBuilder& output_closure(const string& from) + { + return (*this).add_connection(from, "Output::Surface"); + } + + ShaderGraphBuilder& output_color(const string& from) + { + return (*this) + .add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode")) + .add_connection(from, "EmissionNode::Color") + .output_closure("EmissionNode::Emission"); + } + + ShaderGraphBuilder& output_value(const string& from) + { + return (*this) + .add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode")) + .add_connection(from, "EmissionNode::Strength") + .output_closure("EmissionNode::Emission"); + } + protected: ShaderGraph *graph_; map<string, ShaderNode *> node_map_; @@ -127,21 +165,1366 @@ protected: ShaderGraph graph; \ ShaderGraphBuilder builder(&graph); \ +#define EXPECT_ANY_MESSAGE(log) \ + EXPECT_CALL(log, Log(_, _, _)).Times(AnyNumber()); \ + +#define CORRECT_INFO_MESSAGE(log, message) \ + EXPECT_CALL(log, Log(google::INFO, _, HasSubstr(message))); + +#define INVALID_INFO_MESSAGE(log, message) \ + EXPECT_CALL(log, Log(google::INFO, _, HasSubstr(message))).Times(0); + +/* + * Test deduplication of nodes that have inputs, some of them folded. + */ +TEST(render_graph, deduplicate_deep) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Value1::Value to constant (0.8)."); + CORRECT_INFO_MESSAGE(log, "Folding Value2::Value to constant (0.8)."); + CORRECT_INFO_MESSAGE(log, "Deduplicated 2 nodes."); + + builder + .add_node(ShaderNodeBuilder<GeometryNode>("Geometry1")) + .add_node(ShaderNodeBuilder<GeometryNode>("Geometry2")) + .add_node(ShaderNodeBuilder<ValueNode>("Value1") + .set(&ValueNode::value, 0.8f)) + .add_node(ShaderNodeBuilder<ValueNode>("Value2") + .set(&ValueNode::value, 0.8f)) + .add_node(ShaderNodeBuilder<NoiseTextureNode>("Noise1")) + .add_node(ShaderNodeBuilder<NoiseTextureNode>("Noise2")) + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_BLEND) + .set("Fac", 0.5f)) + .add_connection("Geometry1::Parametric", "Noise1::Vector") + .add_connection("Value1::Value", "Noise1::Scale") + .add_connection("Noise1::Color", "Mix::Color1") + .add_connection("Geometry2::Parametric", "Noise2::Vector") + .add_connection("Value2::Value", "Noise2::Scale") + .add_connection("Noise2::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); + + EXPECT_EQ(graph.nodes.size(), 5); +} + +/* + * Test RGB to BW node. + */ TEST(render_graph, constant_fold_rgb_to_bw) { DEFINE_COMMON_VARIABLES(builder, log); - EXPECT_CALL(log, Log(_, _, _)).Times(AnyNumber()); - EXPECT_CALL(log, Log(google::INFO, _, - HasSubstr("Replacing rgb_to_bw with constant 0.8."))); + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding RGBToBWNodeNode::Val to constant (0.8)."); + CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (0.8, 0.8, 0.8)."); builder - .add_node(ShaderNodeBuilder<OutputNode>("OutputNode")) - .add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode")) .add_node(ShaderNodeBuilder<RGBToBWNode>("RGBToBWNodeNode") .set("Color", make_float3(0.8f, 0.8f, 0.8f))) - .add_connection("RGBToBWNodeNode::Val", "EmissionNode::Color") - .add_connection("EmissionNode::Emission", "OutputNode::Surface"); + .output_color("RGBToBWNodeNode::Val"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - folding of Emission nodes that don't emit to nothing. + */ +TEST(render_graph, constant_fold_emission1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Discarding closure Emission."); + + builder + .add_node(ShaderNodeBuilder<EmissionNode>("Emission") + .set("Color", make_float3(0.0f, 0.0f, 0.0f))) + .output_closure("Emission::Emission"); + + graph.finalize(&scene); +} + +TEST(render_graph, constant_fold_emission2) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Discarding closure Emission."); + + builder + .add_node(ShaderNodeBuilder<EmissionNode>("Emission") + .set("Strength", 0.0f)) + .output_closure("Emission::Emission"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - folding of Background nodes that don't emit to nothing. + */ +TEST(render_graph, constant_fold_background1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Discarding closure Background."); + + builder + .add_node(ShaderNodeBuilder<BackgroundNode>("Background") + .set("Color", make_float3(0.0f, 0.0f, 0.0f))) + .output_closure("Background::Background"); + + graph.finalize(&scene); +} + +TEST(render_graph, constant_fold_background2) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Discarding closure Background."); + + builder + .add_node(ShaderNodeBuilder<BackgroundNode>("Background") + .set("Strength", 0.0f)) + .output_closure("Background::Background"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Add Closure with only one input. + */ +TEST(render_graph, constant_fold_shader_add) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding AddClosure1::Closure to socket Diffuse::BSDF."); + CORRECT_INFO_MESSAGE(log, "Folding AddClosure2::Closure to socket Diffuse::BSDF."); + INVALID_INFO_MESSAGE(log, "Folding AddClosure3"); + + builder + .add_node(ShaderNodeBuilder<DiffuseBsdfNode>("Diffuse")) + .add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure1")) + .add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure2")) + .add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure3")) + .add_connection("Diffuse::BSDF", "AddClosure1::Closure1") + .add_connection("Diffuse::BSDF", "AddClosure2::Closure2") + .add_connection("AddClosure1::Closure", "AddClosure3::Closure1") + .add_connection("AddClosure2::Closure", "AddClosure3::Closure2") + .output_closure("AddClosure3::Closure"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Mix Closure with 0 or 1 fac. + * - Folding of Mix Closure with both inputs folded to the same node. + */ +TEST(render_graph, constant_fold_shader_mix) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding MixClosure1::Closure to socket Diffuse::BSDF."); + CORRECT_INFO_MESSAGE(log, "Folding MixClosure2::Closure to socket Diffuse::BSDF."); + CORRECT_INFO_MESSAGE(log, "Folding MixClosure3::Closure to socket Diffuse::BSDF."); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<DiffuseBsdfNode>("Diffuse")) + /* choose left */ + .add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure1") + .set("Fac", 0.0f)) + .add_connection("Diffuse::BSDF", "MixClosure1::Closure1") + /* choose right */ + .add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure2") + .set("Fac", 1.0f)) + .add_connection("Diffuse::BSDF", "MixClosure2::Closure2") + /* both inputs folded the same */ + .add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure3")) + .add_connection("Attribute::Fac", "MixClosure3::Fac") + .add_connection("MixClosure1::Closure", "MixClosure3::Closure1") + .add_connection("MixClosure2::Closure", "MixClosure3::Closure2") + .output_closure("MixClosure3::Closure"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Invert with all constant inputs. + */ +TEST(render_graph, constant_fold_invert) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to constant (0.68, 0.5, 0.32)."); + + builder + .add_node(ShaderNodeBuilder<InvertNode>("Invert") + .set("Fac", 0.8f) + .set("Color", make_float3(0.2f, 0.5f, 0.8f))) + .output_color("Invert::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Invert with zero Fac. + */ +TEST(render_graph, constant_fold_invert_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to socket Attribute::Color."); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<InvertNode>("Invert") + .set("Fac", 0.0f)) + .add_connection("Attribute::Color", "Invert::Color") + .output_color("Invert::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of MixRGB Add with all constant inputs (clamp false). + */ +TEST(render_graph, constant_fold_mix_add) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding MixAdd::Color to constant (0.62, 1.14, 1.42)."); + + builder + .add_node(ShaderNodeBuilder<MixNode>("MixAdd") + .set(&MixNode::type, NODE_MIX_ADD) + .set(&MixNode::use_clamp, false) + .set("Fac", 0.8f) + .set("Color1", make_float3(0.3, 0.5, 0.7)) + .set("Color2", make_float3(0.4, 0.8, 0.9))) + .output_color("MixAdd::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of MixRGB Add with all constant inputs (clamp true). + */ +TEST(render_graph, constant_fold_mix_add_clamp) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding MixAdd::Color to constant (0.62, 1, 1)."); + + builder + .add_node(ShaderNodeBuilder<MixNode>("MixAdd") + .set(&MixNode::type, NODE_MIX_ADD) + .set(&MixNode::use_clamp, true) + .set("Fac", 0.8f) + .set("Color1", make_float3(0.3, 0.5, 0.7)) + .set("Color2", make_float3(0.4, 0.8, 0.9))) + .output_color("MixAdd::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - No folding on fac 0 for dodge. + */ +TEST(render_graph, constant_fold_part_mix_dodge_no_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding "); + + builder + .add_attribute("Attribute1") + .add_attribute("Attribute2") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_DODGE) + .set(&MixNode::use_clamp, false) + .set("Fac", 0.0f)) + .add_connection("Attribute1::Color", "Mix::Color1") + .add_connection("Attribute2::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - No folding on fac 0 for light. + */ +TEST(render_graph, constant_fold_part_mix_light_no_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding "); + + builder + .add_attribute("Attribute1") + .add_attribute("Attribute2") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_LIGHT) + .set(&MixNode::use_clamp, false) + .set("Fac", 0.0f)) + .add_connection("Attribute1::Color", "Mix::Color1") + .add_connection("Attribute2::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - No folding on fac 0 for burn. + */ +TEST(render_graph, constant_fold_part_mix_burn_no_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding "); + + builder + .add_attribute("Attribute1") + .add_attribute("Attribute2") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_BURN) + .set(&MixNode::use_clamp, false) + .set("Fac", 0.0f)) + .add_connection("Attribute1::Color", "Mix::Color1") + .add_connection("Attribute2::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - No folding on fac 0 for clamped blend. + */ +TEST(render_graph, constant_fold_part_mix_blend_clamped_no_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding "); + + builder + .add_attribute("Attribute1") + .add_attribute("Attribute2") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_BLEND) + .set(&MixNode::use_clamp, true) + .set("Fac", 0.0f)) + .add_connection("Attribute1::Color", "Mix::Color1") + .add_connection("Attribute2::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Mix with 0 or 1 Fac. + * - Folding of Mix with both inputs folded to the same node. + */ +TEST(render_graph, constant_fold_part_mix_blend) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding MixBlend1::Color to socket Attribute1::Color."); + CORRECT_INFO_MESSAGE(log, "Folding MixBlend2::Color to socket Attribute1::Color."); + CORRECT_INFO_MESSAGE(log, "Folding MixBlend3::Color to socket Attribute1::Color."); + + builder + .add_attribute("Attribute1") + .add_attribute("Attribute2") + /* choose left */ + .add_node(ShaderNodeBuilder<MixNode>("MixBlend1") + .set(&MixNode::type, NODE_MIX_BLEND) + .set(&MixNode::use_clamp, false) + .set("Fac", 0.0f)) + .add_connection("Attribute1::Color", "MixBlend1::Color1") + .add_connection("Attribute2::Color", "MixBlend1::Color2") + /* choose right */ + .add_node(ShaderNodeBuilder<MixNode>("MixBlend2") + .set(&MixNode::type, NODE_MIX_BLEND) + .set(&MixNode::use_clamp, false) + .set("Fac", 1.0f)) + .add_connection("Attribute1::Color", "MixBlend2::Color2") + .add_connection("Attribute2::Color", "MixBlend2::Color1") + /* both inputs folded to Attribute1 */ + .add_node(ShaderNodeBuilder<MixNode>("MixBlend3") + .set(&MixNode::type, NODE_MIX_BLEND) + .set(&MixNode::use_clamp, false)) + .add_connection("Attribute1::Fac", "MixBlend3::Fac") + .add_connection("MixBlend1::Color", "MixBlend3::Color1") + .add_connection("MixBlend2::Color", "MixBlend3::Color2") + .output_color("MixBlend3::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - NOT folding of MixRGB Sub with the same inputs and fac NOT 1. + */ +TEST(render_graph, constant_fold_part_mix_sub_same_fac_bad) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding Mix::"); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_SUB) + .set(&MixNode::use_clamp, true) + .set("Fac", 0.5f)) + .add_connection("Attribute::Color", "Mix::Color1") + .add_connection("Attribute::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of MixRGB Sub with the same inputs and fac 1. + */ +TEST(render_graph, constant_fold_part_mix_sub_same_fac_1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Mix::Color to constant (0, 0, 0)."); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_SUB) + .set(&MixNode::use_clamp, true) + .set("Fac", 1.0f)) + .add_connection("Attribute::Color", "Mix::Color1") + .add_connection("Attribute::Color", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Graph for testing partial folds of MixRGB with one constant argument. + * Includes 4 tests: constant on each side with fac either unknown or 1. + */ +static void build_mix_partial_test_graph(ShaderGraphBuilder &builder, NodeMix type, float3 constval) +{ + builder + .add_attribute("Attribute") + /* constant on the left */ + .add_node(ShaderNodeBuilder<MixNode>("Mix_Cx_Fx") + .set(&MixNode::type, type) + .set(&MixNode::use_clamp, false) + .set("Color1", constval)) + .add_node(ShaderNodeBuilder<MixNode>("Mix_Cx_F1") + .set(&MixNode::type, type) + .set(&MixNode::use_clamp, false) + .set("Color1", constval) + .set("Fac", 1.0f)) + .add_connection("Attribute::Fac", "Mix_Cx_Fx::Fac") + .add_connection("Attribute::Color", "Mix_Cx_Fx::Color2") + .add_connection("Attribute::Color", "Mix_Cx_F1::Color2") + /* constant on the right */ + .add_node(ShaderNodeBuilder<MixNode>("Mix_xC_Fx") + .set(&MixNode::type, type) + .set(&MixNode::use_clamp, false) + .set("Color2", constval)) + .add_node(ShaderNodeBuilder<MixNode>("Mix_xC_F1") + .set(&MixNode::type, type) + .set(&MixNode::use_clamp, false) + .set("Color2", constval) + .set("Fac", 1.0f)) + .add_connection("Attribute::Fac", "Mix_xC_Fx::Fac") + .add_connection("Attribute::Color", "Mix_xC_Fx::Color1") + .add_connection("Attribute::Color", "Mix_xC_F1::Color1") + /* results of actual tests simply added up to connect to output */ + .add_node(ShaderNodeBuilder<MixNode>("Out12") + .set(&MixNode::type, NODE_MIX_ADD) + .set(&MixNode::use_clamp, true) + .set("Fac", 1.0f)) + .add_node(ShaderNodeBuilder<MixNode>("Out34") + .set(&MixNode::type, NODE_MIX_ADD) + .set(&MixNode::use_clamp, true) + .set("Fac", 1.0f)) + .add_node(ShaderNodeBuilder<MixNode>("Out1234") + .set(&MixNode::type, NODE_MIX_ADD) + .set(&MixNode::use_clamp, true) + .set("Fac", 1.0f)) + .add_connection("Mix_Cx_Fx::Color", "Out12::Color1") + .add_connection("Mix_Cx_F1::Color", "Out12::Color2") + .add_connection("Mix_xC_Fx::Color", "Out34::Color1") + .add_connection("Mix_xC_F1::Color", "Out34::Color2") + .add_connection("Out12::Color", "Out1234::Color1") + .add_connection("Out34::Color", "Out1234::Color2") + .output_color("Out1234::Color"); +} + +/* + * Tests: partial folding for RGB Add with known 0. + */ +TEST(render_graph, constant_fold_part_mix_add_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* 0 + X (fac 1) == X */ + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color"); + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to socket Attribute::Color."); + /* X + 0 (fac ?) == X */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color."); + INVALID_INFO_MESSAGE(log, "Folding Out"); + + build_mix_partial_test_graph(builder, NODE_MIX_ADD, make_float3(0, 0, 0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for RGB Sub with known 0. + */ +TEST(render_graph, constant_fold_part_mix_sub_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color"); + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color"); + /* X - 0 (fac ?) == X */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color."); + INVALID_INFO_MESSAGE(log, "Folding Out"); + + build_mix_partial_test_graph(builder, NODE_MIX_SUB, make_float3(0, 0, 0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for RGB Mul with known 1. + */ +TEST(render_graph, constant_fold_part_mix_mul_1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* 1 * X (fac 1) == X */ + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color"); + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to socket Attribute::Color."); + /* X * 1 (fac ?) == X */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color."); + INVALID_INFO_MESSAGE(log, "Folding Out"); + + build_mix_partial_test_graph(builder, NODE_MIX_MUL, make_float3(1, 1, 1)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for RGB Div with known 1. + */ +TEST(render_graph, constant_fold_part_mix_div_1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color"); + INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color"); + /* X / 1 (fac ?) == X */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color."); + INVALID_INFO_MESSAGE(log, "Folding Out"); + + build_mix_partial_test_graph(builder, NODE_MIX_DIV, make_float3(1, 1, 1)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for RGB Mul with known 0. + */ +TEST(render_graph, constant_fold_part_mix_mul_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* 0 * ? (fac ?) == 0 */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to constant (0, 0, 0)."); + /* ? * 0 (fac 1) == 0 */ + INVALID_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color"); + CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to constant (0, 0, 0)."); + + CORRECT_INFO_MESSAGE(log, "Folding Out12::Color to constant (0, 0, 0)."); + INVALID_INFO_MESSAGE(log, "Folding Out1234"); + + build_mix_partial_test_graph(builder, NODE_MIX_MUL, make_float3(0, 0, 0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for RGB Div with known 0. + */ +TEST(render_graph, constant_fold_part_mix_div_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* 0 / ? (fac ?) == 0 */ + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to constant (0, 0, 0)."); + INVALID_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color"); + INVALID_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color"); + + CORRECT_INFO_MESSAGE(log, "Folding Out12::Color to constant (0, 0, 0)."); + INVALID_INFO_MESSAGE(log, "Folding Out1234"); + + build_mix_partial_test_graph(builder, NODE_MIX_DIV, make_float3(0, 0, 0)); + graph.finalize(&scene); +} + +/* + * Tests: Separate/Combine RGB with all constant inputs. + */ +TEST(render_graph, constant_fold_separate_combine_rgb) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::R to constant (0.3)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::G to constant (0.5)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::B to constant (0.7)."); + CORRECT_INFO_MESSAGE(log, "Folding CombineRGB::Image to constant (0.3, 0.5, 0.7)."); + + builder + .add_node(ShaderNodeBuilder<SeparateRGBNode>("SeparateRGB") + .set("Image", make_float3(0.3f, 0.5f, 0.7f))) + .add_node(ShaderNodeBuilder<CombineRGBNode>("CombineRGB")) + .add_connection("SeparateRGB::R", "CombineRGB::R") + .add_connection("SeparateRGB::G", "CombineRGB::G") + .add_connection("SeparateRGB::B", "CombineRGB::B") + .output_color("CombineRGB::Image"); + + graph.finalize(&scene); +} + +/* + * Tests: Separate/Combine XYZ with all constant inputs. + */ +TEST(render_graph, constant_fold_separate_combine_xyz) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::X to constant (0.3)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::Y to constant (0.5)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::Z to constant (0.7)."); + CORRECT_INFO_MESSAGE(log, "Folding CombineXYZ::Vector to constant (0.3, 0.5, 0.7)."); + CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_color::value_color to constant (0.3, 0.5, 0.7)."); + + builder + .add_node(ShaderNodeBuilder<SeparateXYZNode>("SeparateXYZ") + .set("Vector", make_float3(0.3f, 0.5f, 0.7f))) + .add_node(ShaderNodeBuilder<CombineXYZNode>("CombineXYZ")) + .add_connection("SeparateXYZ::X", "CombineXYZ::X") + .add_connection("SeparateXYZ::Y", "CombineXYZ::Y") + .add_connection("SeparateXYZ::Z", "CombineXYZ::Z") + .output_color("CombineXYZ::Vector"); + + graph.finalize(&scene); +} + +/* + * Tests: Separate/Combine HSV with all constant inputs. + */ +TEST(render_graph, constant_fold_separate_combine_hsv) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::H to constant (0.583333)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::S to constant (0.571429)."); + CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::V to constant (0.7)."); + CORRECT_INFO_MESSAGE(log, "Folding CombineHSV::Color to constant (0.3, 0.5, 0.7)."); + + builder + .add_node(ShaderNodeBuilder<SeparateHSVNode>("SeparateHSV") + .set("Color", make_float3(0.3f, 0.5f, 0.7f))) + .add_node(ShaderNodeBuilder<CombineHSVNode>("CombineHSV")) + .add_connection("SeparateHSV::H", "CombineHSV::H") + .add_connection("SeparateHSV::S", "CombineHSV::S") + .add_connection("SeparateHSV::V", "CombineHSV::V") + .output_color("CombineHSV::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: Gamma with all constant inputs. + */ +TEST(render_graph, constant_fold_gamma) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Gamma::Color to constant (0.164317, 0.353553, 0.585662)."); + + builder + .add_node(ShaderNodeBuilder<GammaNode>("Gamma") + .set("Color", make_float3(0.3f, 0.5f, 0.7f)) + .set("Gamma", 1.5f)) + .output_color("Gamma::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: BrightnessContrast with all constant inputs. + */ +TEST(render_graph, constant_fold_bright_contrast) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding BrightContrast::Color to constant (0.16, 0.6, 1.04)."); + + builder + .add_node(ShaderNodeBuilder<BrightContrastNode>("BrightContrast") + .set("Color", make_float3(0.3f, 0.5f, 0.7f)) + .set("Bright", 0.1f) + .set("Contrast", 1.2f)) + .output_color("BrightContrast::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: blackbody with all constant inputs. + */ +TEST(render_graph, constant_fold_blackbody) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Blackbody::Color to constant (3.94163, 0.226523, 0)."); + + builder + .add_node(ShaderNodeBuilder<BlackbodyNode>("Blackbody") + .set("Temperature", 1200.0f)) + .output_color("Blackbody::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: Math with all constant inputs (clamp false). + */ +TEST(render_graph, constant_fold_math) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1.6)."); + + builder + .add_node(ShaderNodeBuilder<MathNode>("Math") + .set(&MathNode::type, NODE_MATH_ADD) + .set(&MathNode::use_clamp, false) + .set("Value1", 0.7f) + .set("Value2", 0.9f)) + .output_value("Math::Value"); + + graph.finalize(&scene); +} + +/* + * Tests: Math with all constant inputs (clamp true). + */ +TEST(render_graph, constant_fold_math_clamp) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1)."); + + builder + .add_node(ShaderNodeBuilder<MathNode>("Math") + .set(&MathNode::type, NODE_MATH_ADD) + .set(&MathNode::use_clamp, true) + .set("Value1", 0.7f) + .set("Value2", 0.9f)) + .output_value("Math::Value"); + + graph.finalize(&scene); +} + +/* + * Graph for testing partial folds of Math with one constant argument. + * Includes 2 tests: constant on each side. + */ +static void build_math_partial_test_graph(ShaderGraphBuilder &builder, NodeMath type, float constval) +{ + builder + .add_attribute("Attribute") + /* constant on the left */ + .add_node(ShaderNodeBuilder<MathNode>("Math_Cx") + .set(&MathNode::type, type) + .set(&MathNode::use_clamp, false) + .set("Value1", constval)) + .add_connection("Attribute::Fac", "Math_Cx::Value2") + /* constant on the right */ + .add_node(ShaderNodeBuilder<MathNode>("Math_xC") + .set(&MathNode::type, type) + .set(&MathNode::use_clamp, false) + .set("Value2", constval)) + .add_connection("Attribute::Fac", "Math_xC::Value1") + /* output sum */ + .add_node(ShaderNodeBuilder<MathNode>("Out") + .set(&MathNode::type, NODE_MATH_ADD) + .set(&MathNode::use_clamp, true)) + .add_connection("Math_Cx::Value", "Out::Value1") + .add_connection("Math_xC::Value", "Out::Value2") + .output_value("Out::Value"); +} + +/* + * Tests: partial folding for Math Add with known 0. + */ +TEST(render_graph, constant_fold_part_math_add_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X + 0 == 0 + X == X */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_math_partial_test_graph(builder, NODE_MATH_ADD, 0.0f); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Math Sub with known 0. + */ +TEST(render_graph, constant_fold_part_math_sub_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X - 0 == X */ + INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_math_partial_test_graph(builder, NODE_MATH_SUBTRACT, 0.0f); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Math Mul with known 1. + */ +TEST(render_graph, constant_fold_part_math_mul_1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X * 1 == 1 * X == X */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 1.0f); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Math Div with known 1. + */ +TEST(render_graph, constant_fold_part_math_div_1) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X / 1 == X */ + INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 1.0f); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Math Mul with known 0. + */ +TEST(render_graph, constant_fold_part_math_mul_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X * 0 == 0 * X == 0 */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0)."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (0)."); + CORRECT_INFO_MESSAGE(log, "Folding Out::Value to constant (0)"); + CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode."); + + build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 0.0f); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Math Div with known 0. + */ +TEST(render_graph, constant_fold_part_math_div_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* 0 / X == 0 */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0)."); + INVALID_INFO_MESSAGE(log, "Folding Math_xC::"); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 0.0f); + graph.finalize(&scene); +} + +/* + * Tests: Vector Math with all constant inputs. + */ +TEST(render_graph, constant_fold_vector_math) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Value to constant (1)."); + CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Vector to constant (3, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_float::value_float to constant (1)."); + CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (2)."); + CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (2, 2, 2)."); + + builder + .add_node(ShaderNodeBuilder<VectorMathNode>("VectorMath") + .set(&VectorMathNode::type, NODE_VECTOR_MATH_SUBTRACT) + .set("Vector1", make_float3(1.3f, 0.5f, 0.7f)) + .set("Vector2", make_float3(-1.7f, 0.5f, 0.7f))) + .add_node(ShaderNodeBuilder<MathNode>("Math") + .set(&MathNode::type, NODE_MATH_ADD)) + .add_connection("VectorMath::Vector", "Math::Value1") + .add_connection("VectorMath::Value", "Math::Value2") + .output_color("Math::Value"); + + graph.finalize(&scene); +} + +/* + * Graph for testing partial folds of Vector Math with one constant argument. + * Includes 2 tests: constant on each side. + */ +static void build_vecmath_partial_test_graph(ShaderGraphBuilder &builder, NodeVectorMath type, float3 constval) +{ + builder + .add_attribute("Attribute") + /* constant on the left */ + .add_node(ShaderNodeBuilder<VectorMathNode>("Math_Cx") + .set(&VectorMathNode::type, type) + .set("Vector1", constval)) + .add_connection("Attribute::Vector", "Math_Cx::Vector2") + /* constant on the right */ + .add_node(ShaderNodeBuilder<VectorMathNode>("Math_xC") + .set(&VectorMathNode::type, type) + .set("Vector2", constval)) + .add_connection("Attribute::Vector", "Math_xC::Vector1") + /* output sum */ + .add_node(ShaderNodeBuilder<VectorMathNode>("Out") + .set(&VectorMathNode::type, NODE_VECTOR_MATH_ADD)) + .add_connection("Math_Cx::Vector", "Out::Vector1") + .add_connection("Math_xC::Vector", "Out::Vector2") + .output_color("Out::Vector"); +} + +/* + * Tests: partial folding for Vector Math Add with known 0. + */ +TEST(render_graph, constant_fold_part_vecmath_add_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X + 0 == 0 + X == X */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to socket Attribute::Vector."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to socket Attribute::Vector."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_ADD, make_float3(0,0,0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Vector Math Sub with known 0. + */ +TEST(render_graph, constant_fold_part_vecmath_sub_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X - 0 == X */ + INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to socket Attribute::Vector."); + INVALID_INFO_MESSAGE(log, "Folding Out::"); + + build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_SUBTRACT, make_float3(0,0,0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Vector Math Dot Product with known 0. + */ +TEST(render_graph, constant_fold_part_vecmath_dot_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X * 0 == 0 * X == X */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode."); + + build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_DOT_PRODUCT, make_float3(0,0,0)); + graph.finalize(&scene); +} + +/* + * Tests: partial folding for Vector Math Cross Product with known 0. + */ +TEST(render_graph, constant_fold_part_vecmath_cross_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + /* X * 0 == 0 * X == X */ + CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0)."); + CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode."); + + build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_CROSS_PRODUCT, make_float3(0,0,0)); + graph.finalize(&scene); +} + +/* + * Tests: Bump with no height input folded to Normal input. + */ +TEST(render_graph, constant_fold_bump) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Bump::Normal to socket Geometry1::Normal."); + + builder + .add_node(ShaderNodeBuilder<GeometryNode>("Geometry1")) + .add_node(ShaderNodeBuilder<BumpNode>("Bump")) + .add_connection("Geometry1::Normal", "Bump::Normal") + .output_color("Bump::Normal"); + + graph.finalize(&scene); +} + +/* + * Tests: Bump with no inputs folded to Geometry::Normal. + */ +TEST(render_graph, constant_fold_bump_no_input) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Bump::Normal to socket geometry::Normal."); + + builder + .add_node(ShaderNodeBuilder<BumpNode>("Bump")) + .output_color("Bump::Normal"); + + graph.finalize(&scene); +} + +template<class T> +void init_test_curve(array<T> &buffer, T start, T end, int steps) +{ + buffer.resize(steps); + + for (int i = 0; i < steps; i++) + buffer[i] = lerp(start, end, float(i)/(steps-1)); +} + +/* + * Tests: + * - Folding of RGB Curves with all constant inputs. + */ +TEST(render_graph, constant_fold_rgb_curves) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Curves::Color to constant (0.275, 0.5, 0.475)."); + + array<float3> curve; + init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257); + + builder + .add_node(ShaderNodeBuilder<RGBCurvesNode>("Curves") + .set(&CurvesNode::curves, curve) + .set(&CurvesNode::min_x, 0.1f) + .set(&CurvesNode::max_x, 0.9f) + .set("Fac", 0.5f) + .set("Color", make_float3(0.3f, 0.5f, 0.7f))) + .output_color("Curves::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of RGB Curves with zero Fac. + */ +TEST(render_graph, constant_fold_rgb_curves_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Curves::Color to socket Attribute::Color."); + + array<float3> curve; + init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<RGBCurvesNode>("Curves") + .set(&CurvesNode::curves, curve) + .set(&CurvesNode::min_x, 0.1f) + .set(&CurvesNode::max_x, 0.9f) + .set("Fac", 0.0f)) + .add_connection("Attribute::Color", "Curves::Color") + .output_color("Curves::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Vector Curves with all constant inputs. + */ +TEST(render_graph, constant_fold_vector_curves) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Curves::Vector to constant (0.275, 0.5, 0.475)."); + + array<float3> curve; + init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257); + + builder + .add_node(ShaderNodeBuilder<VectorCurvesNode>("Curves") + .set(&CurvesNode::curves, curve) + .set(&CurvesNode::min_x, 0.1f) + .set(&CurvesNode::max_x, 0.9f) + .set("Fac", 0.5f) + .set("Vector", make_float3(0.3f, 0.5f, 0.7f))) + .output_color("Curves::Vector"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Vector Curves with zero Fac. + */ +TEST(render_graph, constant_fold_vector_curves_fac_0) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Curves::Vector to socket Attribute::Vector."); + + array<float3> curve; + init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<VectorCurvesNode>("Curves") + .set(&CurvesNode::curves, curve) + .set(&CurvesNode::min_x, 0.1f) + .set(&CurvesNode::max_x, 0.9f) + .set("Fac", 0.0f)) + .add_connection("Attribute::Vector", "Curves::Vector") + .output_color("Curves::Vector"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Color Ramp with all constant inputs. + */ +TEST(render_graph, constant_fold_rgb_ramp) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Ramp::Color to constant (0.14, 0.39, 0.64)."); + CORRECT_INFO_MESSAGE(log, "Folding Ramp::Alpha to constant (0.89)."); + + array<float3> curve; + array<float> alpha; + init_test_curve(curve, make_float3(0.0f, 0.25f, 0.5f), make_float3(0.25f, 0.5f, 0.75f), 9); + init_test_curve(alpha, 0.75f, 1.0f, 9); + + builder + .add_node(ShaderNodeBuilder<RGBRampNode>("Ramp") + .set(&RGBRampNode::ramp, curve) + .set(&RGBRampNode::ramp_alpha, alpha) + .set(&RGBRampNode::interpolate, true) + .set("Fac", 0.56f)) + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_ADD)) + .add_connection("Ramp::Color", "Mix::Color1") + .add_connection("Ramp::Alpha", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of Color Ramp with all constant inputs (interpolate false). + */ +TEST(render_graph, constant_fold_rgb_ramp_flat) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Ramp::Color to constant (0.125, 0.375, 0.625)."); + CORRECT_INFO_MESSAGE(log, "Folding Ramp::Alpha to constant (0.875)."); + + array<float3> curve; + array<float> alpha; + init_test_curve(curve, make_float3(0.0f, 0.25f, 0.5f), make_float3(0.25f, 0.5f, 0.75f), 9); + init_test_curve(alpha, 0.75f, 1.0f, 9); + + builder + .add_node(ShaderNodeBuilder<RGBRampNode>("Ramp") + .set(&RGBRampNode::ramp, curve) + .set(&RGBRampNode::ramp_alpha, alpha) + .set(&RGBRampNode::interpolate, false) + .set("Fac", 0.56f)) + .add_node(ShaderNodeBuilder<MixNode>("Mix") + .set(&MixNode::type, NODE_MIX_ADD)) + .add_connection("Ramp::Color", "Mix::Color1") + .add_connection("Ramp::Alpha", "Mix::Color2") + .output_color("Mix::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of redundant conversion of float to color to float. + */ +TEST(render_graph, constant_fold_convert_float_color_float) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to socket convert_float_to_color::value_color."); + CORRECT_INFO_MESSAGE(log, "Folding convert_color_to_float::value_float to socket Attribute::Fac."); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<InvertNode>("Invert") + .set("Fac", 0.0f)) + .add_connection("Attribute::Fac", "Invert::Color") + .output_value("Invert::Color"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - Folding of redundant conversion of color to vector to color. + */ +TEST(render_graph, constant_fold_convert_color_vector_color) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding VecAdd::Vector to socket convert_color_to_vector::value_vector."); + CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_color::value_color to socket Attribute::Color."); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<VectorMathNode>("VecAdd") + .set(&VectorMathNode::type, NODE_VECTOR_MATH_ADD) + .set("Vector2", make_float3(0,0,0))) + .add_connection("Attribute::Color", "VecAdd::Vector1") + .output_color("VecAdd::Vector"); + + graph.finalize(&scene); +} + +/* + * Tests: + * - NOT folding conversion of color to float to color. + */ +TEST(render_graph, constant_fold_convert_color_float_color) +{ + DEFINE_COMMON_VARIABLES(builder, log); + + EXPECT_ANY_MESSAGE(log); + CORRECT_INFO_MESSAGE(log, "Folding MathAdd::Value to socket convert_color_to_float::value_float."); + INVALID_INFO_MESSAGE(log, "Folding convert_float_to_color::"); + + builder + .add_attribute("Attribute") + .add_node(ShaderNodeBuilder<MathNode>("MathAdd") + .set(&MathNode::type, NODE_MATH_ADD) + .set("Value2", 0.0f)) + .add_connection("Attribute::Color", "MathAdd::Value1") + .output_color("MathAdd::Value"); graph.finalize(&scene); } diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 016f4a6a794..13aba0646d2 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1477,10 +1477,10 @@ ccl_device bool ray_triangle_intersect( return true; } -ccl_device bool ray_triangle_intersect_uv( - float3 ray_P, float3 ray_D, float ray_t, - float3 v0, float3 v1, float3 v2, - float *isect_u, float *isect_v, float *isect_t) +ccl_device_inline bool ray_triangle_intersect_uv( + float3 ray_P, float3 ray_D, float ray_t, + float3 v0, float3 v1, float3 v2, + float *isect_u, float *isect_v, float *isect_t) { /* Calculate intersection */ float3 e1 = v1 - v0; diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm index bce390732fe..4b8cb537ecf 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.mm +++ b/intern/ghost/intern/GHOST_SystemCocoa.mm @@ -289,6 +289,7 @@ extern "C" int GHOST_HACK_getFirstFile(char buf[FIRSTFILEBUFLG]) GHOST_SystemCocoa *systemCocoa; } - (void)setSystemCocoa:(GHOST_SystemCocoa *)sysCocoa; +- (void)applicationDidFinishLaunching:(NSNotification *)aNotification; - (BOOL)application:(NSApplication *)theApplication openFile:(NSString *)filename; - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender; - (void)applicationWillTerminate:(NSNotification *)aNotification; @@ -302,6 +303,15 @@ extern "C" int GHOST_HACK_getFirstFile(char buf[FIRSTFILEBUFLG]) systemCocoa = sysCocoa; } +- (void)applicationDidFinishLaunching:(NSNotification *)aNotification +{ + // raise application to front, convenient when starting from the terminal + // and important for launching the animation player. we call this after the + // application finishes launching, as doing it earlier can make us end up + // with a frontmost window but an inactive application + [NSApp activateIgnoringOtherApps:YES]; +} + - (BOOL)application:(NSApplication *)theApplication openFile:(NSString *)filename { return systemCocoa->handleOpenDocumentRequest(filename); diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm index 00e00b6a1ea..e23cf9ccd37 100644 --- a/intern/ghost/intern/GHOST_WindowCocoa.mm +++ b/intern/ghost/intern/GHOST_WindowCocoa.mm @@ -622,8 +622,6 @@ GHOST_WindowCocoa::GHOST_WindowCocoa( m_lionStyleFullScreen = true; } - [NSApp activateIgnoringOtherApps:YES]; // raise application to front, important for new blender instance animation play case - [pool drain]; } diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp index 4faec894801..8a27818ff36 100644 --- a/intern/smoke/intern/FLUID_3D.cpp +++ b/intern/smoke/intern/FLUID_3D.cpp @@ -993,6 +993,9 @@ void FLUID_3D::project() copyBorderAll(_pressure, 0, _zRes); + // fix fluid compression caused in isolated components by obstacle movement + fixObstacleCompression(_divergence); + // solve Poisson equation solvePressurePre(_pressure, _divergence, _obstacles); @@ -1291,6 +1294,142 @@ void FLUID_3D::setObstacleBoundaries(float *_pressure, int zBegin, int zEnd) } // z-loop } +void FLUID_3D::floodFillComponent(int *buffer, size_t *queue, size_t limit, size_t pos, int from, int to) +{ + /* Flood 'from' cells with 'to' in the grid. Rely on (from != 0 && from != to && edges == 0) to stop. */ + int offsets[] = { -1, +1, -_xRes, +_xRes, -_slabSize, +_slabSize }; + size_t qend = 0; + + buffer[pos] = to; + queue[qend++] = pos; + + for (size_t qidx = 0; qidx < qend; qidx++) + { + pos = queue[qidx]; + + for (int i = 0; i < 6; i++) + { + size_t next = pos + offsets[i]; + + if (next < limit && buffer[next] == from) + { + buffer[next] = to; + queue[qend++] = next; + } + } + } +} + +void FLUID_3D::mergeComponents(int *buffer, size_t *queue, size_t cur, size_t other) +{ + /* Replace higher value with lower. */ + if (buffer[other] < buffer[cur]) + { + floodFillComponent(buffer, queue, cur, cur, buffer[cur], buffer[other]); + } + else if (buffer[cur] < buffer[other]) + { + floodFillComponent(buffer, queue, cur, other, buffer[other], buffer[cur]); + } +} + +void FLUID_3D::fixObstacleCompression(float *divergence) +{ + int x, y, z; + size_t index; + + /* Find compartments completely separated by obstacles. + * Edge of the domain is automatically component 0. */ + int *component = new int[_totalCells]; + size_t *queue = new size_t[_totalCells]; + + memset(component, 0, sizeof(int) * _totalCells); + + int next_id = 1; + + for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes) + { + for (y = 1; y < _yRes - 1; y++, index += 2) + { + for (x = 1; x < _xRes - 1; x++, index++) + { + if(!_obstacles[index]) + { + /* Check for connection to the domain edge at iteration end. */ + if ((x == _xRes-2 && !_obstacles[index + 1]) || + (y == _yRes-2 && !_obstacles[index + _xRes]) || + (z == _zRes-2 && !_obstacles[index + _slabSize])) + { + component[index] = 0; + } + else { + component[index] = next_id; + } + + if (!_obstacles[index - 1]) + mergeComponents(component, queue, index, index - 1); + if (!_obstacles[index - _xRes]) + mergeComponents(component, queue, index, index - _xRes); + if (!_obstacles[index - _slabSize]) + mergeComponents(component, queue, index, index - _slabSize); + + if (component[index] == next_id) + next_id++; + } + } + } + } + + delete[] queue; + + /* Compute average divergence within each component. */ + float *total_divergence = new float[next_id]; + int *component_size = new int[next_id]; + + memset(total_divergence, 0, sizeof(float) * next_id); + memset(component_size, 0, sizeof(int) * next_id); + + for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes) + { + for (y = 1; y < _yRes - 1; y++, index += 2) + { + for (x = 1; x < _xRes - 1; x++, index++) + { + if(!_obstacles[index]) + { + int ci = component[index]; + + component_size[ci]++; + total_divergence[ci] += divergence[index]; + } + } + } + } + + /* Adjust divergence to make the average zero in each component except the edge. */ + total_divergence[0] = 0.0f; + + for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes) + { + for (y = 1; y < _yRes - 1; y++, index += 2) + { + for (x = 1; x < _xRes - 1; x++, index++) + { + if(!_obstacles[index]) + { + int ci = component[index]; + + divergence[index] -= total_divergence[ci] / component_size[ci]; + } + } + } + } + + delete[] component; + delete[] component_size; + delete[] total_divergence; +} + ////////////////////////////////////////////////////////////////////// // add buoyancy forces ////////////////////////////////////////////////////////////////////// @@ -1650,4 +1789,4 @@ void FLUID_3D::updateFlame(float *react, float *flame, int total_cells) else flame[index] = 0.0f; } -}
\ No newline at end of file +} diff --git a/intern/smoke/intern/FLUID_3D.h b/intern/smoke/intern/FLUID_3D.h index cd2147b2bee..fe20c10d71d 100644 --- a/intern/smoke/intern/FLUID_3D.h +++ b/intern/smoke/intern/FLUID_3D.h @@ -195,6 +195,8 @@ struct FLUID_3D void setObstacleBoundaries(float *_pressure, int zBegin, int zEnd); void setObstaclePressure(float *_pressure, int zBegin, int zEnd); + void fixObstacleCompression(float *divergence); + public: // advection, accessed e.g. by WTURBULENCE class //void advectMacCormack(); @@ -202,6 +204,9 @@ struct FLUID_3D void advectMacCormackEnd1(int zBegin, int zEnd); void advectMacCormackEnd2(int zBegin, int zEnd); + void floodFillComponent(int *components, size_t *queue, size_t limit, size_t start, int from, int to); + void mergeComponents(int *components, size_t *queue, size_t cur, size_t other); + /* burning */ float *_burning_rate; // RNA pointer float *_flame_smoke; // RNA pointer diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c index 1c86fbcfe8e..fa113ef5eef 100644 --- a/source/blender/blenkernel/intern/mesh_evaluate.c +++ b/source/blender/blenkernel/intern/mesh_evaluate.c @@ -434,7 +434,7 @@ MLoopNorSpace *BKE_lnor_space_create(MLoopNorSpaceArray *lnors_spacearr) } /* This threshold is a bit touchy (usual float precision issue), this value seems OK. */ -#define LNOR_SPACE_TRIGO_THRESHOLD (1.0f - 1e-6f) +#define LNOR_SPACE_TRIGO_THRESHOLD (1.0f - 1e-4f) /* Should only be called once. * Beware, this modifies ref_vec and other_vec in place! diff --git a/source/blender/blenkernel/intern/particle_system.c b/source/blender/blenkernel/intern/particle_system.c index e7561ee699e..8e3e2f5d6d0 100644 --- a/source/blender/blenkernel/intern/particle_system.c +++ b/source/blender/blenkernel/intern/particle_system.c @@ -3311,7 +3311,7 @@ static float get_base_time_step(ParticleSettings *part) return 1.0f / (float) (part->subframes + 1); } /* Update time step size to suit current conditions. */ -static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim, float t_frac) +static void update_timestep(ParticleSystem *psys, ParticleSimulationData *sim) { float dt_target; if (sim->courant_num == 0.0f) @@ -3331,7 +3331,10 @@ static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim, psys->dt_frac = interpf(dt_target, psys->dt_frac, TIMESTEP_EXPANSION_FACTOR); else psys->dt_frac = dt_target; +} +static float sync_timestep(ParticleSystem *psys, float t_frac) +{ /* Sync with frame end if it's close. */ if (t_frac == 1.0f) return psys->dt_frac; @@ -3991,7 +3994,9 @@ static void system_step(ParticleSimulationData *sim, float cfra, const bool use_ printf("%f,%f,%f,%f\n", cfra+dframe+t_frac - 1.f, t_frac, dt_frac, sim->courant_num); #endif if (part->time_flag & PART_TIME_AUTOSF) - dt_frac = update_timestep(psys, sim, t_frac); + update_timestep(psys, sim); + /* Even without AUTOSF dt_frac may not add up to 1.0 due to float precision. */ + dt_frac = sync_timestep(psys, t_frac); } } } diff --git a/source/blender/blenkernel/intern/smoke.c b/source/blender/blenkernel/intern/smoke.c index c7e073a7fc1..43569f9ded2 100644 --- a/source/blender/blenkernel/intern/smoke.c +++ b/source/blender/blenkernel/intern/smoke.c @@ -707,6 +707,7 @@ typedef struct ObstaclesFromDMData { bool has_velocity; float *vert_vel; float *velocityX, *velocityY, *velocityZ; + int *num_obstacles; } ObstaclesFromDMData; static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z) @@ -755,8 +756,10 @@ static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z) /* tag obstacle cells */ data->obstacle_map[index] = 1; - if (data->has_velocity) + if (data->has_velocity) { data->obstacle_map[index] |= 8; + data->num_obstacles[index]++; + } } } } @@ -764,7 +767,7 @@ static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z) static void obstacles_from_derivedmesh( Object *coll_ob, SmokeDomainSettings *sds, SmokeCollSettings *scs, - unsigned char *obstacle_map, float *velocityX, float *velocityY, float *velocityZ, float dt) + unsigned char *obstacle_map, float *velocityX, float *velocityY, float *velocityZ, int *num_obstacles, float dt) { if (!scs->dm) return; { @@ -835,7 +838,8 @@ static void obstacles_from_derivedmesh( .sds = sds, .mvert = mvert, .mloop = mloop, .looptri = looptri, .tree = &treeData, .obstacle_map = obstacle_map, .has_velocity = has_velocity, .vert_vel = vert_vel, - .velocityX = velocityX, .velocityY = velocityY, .velocityZ = velocityZ + .velocityX = velocityX, .velocityY = velocityY, .velocityZ = velocityZ, + .num_obstacles = num_obstacles }; BLI_task_parallel_range( sds->res_min[2], sds->res_max[2], &data, obstacles_from_derivedmesh_task_cb, true); @@ -871,6 +875,8 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds, float *b = smoke_get_color_b(sds->fluid); unsigned int z; + int *num_obstacles = MEM_callocN(sizeof(int) * sds->res[0] * sds->res[1] * sds->res[2], "smoke_num_obstacles"); + smoke_get_ob_velocity(sds->fluid, &velx, &vely, &velz); // TODO: delete old obstacle flags @@ -900,7 +906,7 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds, if ((smd2->type & MOD_SMOKE_TYPE_COLL) && smd2->coll) { SmokeCollSettings *scs = smd2->coll; - obstacles_from_derivedmesh(collob, sds, scs, obstacles, velx, vely, velz, dt); + obstacles_from_derivedmesh(collob, sds, scs, obstacles, velx, vely, velz, num_obstacles, dt); } } @@ -926,7 +932,15 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds, b[z] = 0; } } + /* average velocities from multiple obstacles in one cell */ + if (num_obstacles[z]) { + velx[z] /= num_obstacles[z]; + vely[z] /= num_obstacles[z]; + velz[z] /= num_obstacles[z]; + } } + + MEM_freeN(num_obstacles); } /********************************************************** diff --git a/source/blender/blenlib/intern/BLI_heap.c b/source/blender/blenlib/intern/BLI_heap.c index 0a8dafc2dc1..d7fd1caa8da 100644 --- a/source/blender/blenlib/intern/BLI_heap.c +++ b/source/blender/blenlib/intern/BLI_heap.c @@ -55,9 +55,10 @@ struct HeapNode_Chunk { * or we allocate past the reserved number. * * \note Optimize number for 64kb allocs. + * \note keep type in sync with tot_nodes in heap_node_alloc_chunk. */ #define HEAP_CHUNK_DEFAULT_NUM \ - ((MEM_SIZE_OPTIMAL((1 << 16) - sizeof(struct HeapNode_Chunk))) / sizeof(HeapNode)) + ((unsigned int)((MEM_SIZE_OPTIMAL((1 << 16) - sizeof(struct HeapNode_Chunk))) / sizeof(HeapNode))) struct Heap { unsigned int size; diff --git a/source/blender/compositor/nodes/COM_FilterNode.cpp b/source/blender/compositor/nodes/COM_FilterNode.cpp index 7493f24ba6b..e8b08ce2ce1 100644 --- a/source/blender/compositor/nodes/COM_FilterNode.cpp +++ b/source/blender/compositor/nodes/COM_FilterNode.cpp @@ -49,7 +49,7 @@ void FilterNode::convertToOperations(NodeConverter &converter, const CompositorC operation->set3x3Filter(-1, -1, -1, -1, 9, -1, -1, -1, -1); break; case CMP_FILT_LAPLACE: - operation = new ConvolutionFilterOperation(); + operation = new ConvolutionEdgeFilterOperation(); operation->set3x3Filter(-1 / 8.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f, 1.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f); break; case CMP_FILT_SOBEL: diff --git a/source/blender/editors/screen/screen_edit.c b/source/blender/editors/screen/screen_edit.c index 62aeca4b9d1..677a6472c72 100644 --- a/source/blender/editors/screen/screen_edit.c +++ b/source/blender/editors/screen/screen_edit.c @@ -1943,6 +1943,7 @@ ScrArea *ED_screen_state_toggle(bContext *C, wmWindow *win, ScrArea *sa, const s sc = ED_screen_add(win, oldscreen->scene, newname); sc->state = state; sc->redraws_flag = oldscreen->redraws_flag; + sc->temp = oldscreen->temp; /* timer */ sc->animtimer = oldscreen->animtimer; diff --git a/source/blender/editors/space_view3d/drawmesh.c b/source/blender/editors/space_view3d/drawmesh.c index f5289a0d245..74a50497164 100644 --- a/source/blender/editors/space_view3d/drawmesh.c +++ b/source/blender/editors/space_view3d/drawmesh.c @@ -435,12 +435,16 @@ static void draw_textured_begin(Scene *scene, View3D *v3d, RegionView3D *rv3d, O else { /* draw with lights in the scene otherwise */ solidtex = false; - if (v3d->flag2 & V3D_SHADELESS_TEX) + if (v3d->flag2 & V3D_SHADELESS_TEX) { Gtexdraw.is_lit = 0; - else - Gtexdraw.is_lit = GPU_scene_object_lights(scene, ob, v3d->lay, rv3d->viewmat, !rv3d->is_persp); + } + else { + Gtexdraw.is_lit = GPU_scene_object_lights( + scene, ob, v3d->localvd ? v3d->localvd->lay : v3d->lay, + rv3d->viewmat, !rv3d->is_persp); + } } - + rgba_float_to_uchar(obcol, ob->col); if (solidtex || v3d->drawtype == OB_TEXTURE) is_tex = true; diff --git a/source/blender/editors/transform/transform.c b/source/blender/editors/transform/transform.c index 8c906160a00..ef6cff19181 100644 --- a/source/blender/editors/transform/transform.c +++ b/source/blender/editors/transform/transform.c @@ -3997,10 +3997,8 @@ static void applyRotation(TransInfo *t, const int UNUSED(mval[2])) applySnapping(t, &final); - if (applyNumInput(&t->num, &final)) { - /* Clamp between -PI and PI */ - final = angle_wrap_rad(final); - } + /* Used to clamp final result in [-PI, PI[ range, no idea why, inheritance from 2.4x area, see T48998. */ + applyNumInput(&t->num, &final); t->values[0] = final; diff --git a/source/blender/makesrna/intern/rna_smoke.c b/source/blender/makesrna/intern/rna_smoke.c index 9a31952b84b..dad5577dc12 100644 --- a/source/blender/makesrna/intern/rna_smoke.c +++ b/source/blender/makesrna/intern/rna_smoke.c @@ -76,6 +76,21 @@ static void rna_Smoke_resetCache(Main *UNUSED(bmain), Scene *UNUSED(scene), Poin DAG_id_tag_update(ptr->id.data, OB_RECALC_DATA); } +static void rna_Smoke_cachetype_set(struct PointerRNA *ptr, int value) +{ + SmokeDomainSettings *settings = (SmokeDomainSettings *)ptr->data; + Object *ob = (Object *)ptr->id.data; + + if (value != settings->cache_file_format) { + /* Clear old caches. */ + PTCacheID id; + BKE_ptcache_id_from_smoke(&id, ob, settings->smd); + BKE_ptcache_id_clear(&id, PTCACHE_CLEAR_ALL, 0); + + settings->cache_file_format = value; + } +} + static void rna_Smoke_reset(Main *bmain, Scene *scene, PointerRNA *ptr) { SmokeDomainSettings *settings = (SmokeDomainSettings *)ptr->data; @@ -701,6 +716,7 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna) prop = RNA_def_property(srna, "cache_file_format", PROP_ENUM, PROP_NONE); RNA_def_property_enum_sdna(prop, NULL, "cache_file_format"); RNA_def_property_enum_items(prop, cache_file_type_items); + RNA_def_property_enum_funcs(prop, NULL, "rna_Smoke_cachetype_set", NULL); RNA_def_property_ui_text(prop, "File Format", "Select the file format to be used for caching"); RNA_def_property_update(prop, NC_OBJECT | ND_MODIFIER, "rna_Smoke_resetCache"); } |