diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-08-14 05:13:55 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-08-14 05:13:55 +0300 |
commit | 230be97284de444e6b2566f535141ca31647f6ab (patch) | |
tree | 2cedfed045fb8c015d0fe4fbf65de5623fff1a55 | |
parent | eccaa6da26b8fc58481367524725413349dd9059 (diff) | |
parent | a8fbe991cd5293f077fa946296b6caad18d0f5ab (diff) |
Merge branch 'master' into blender2.8
18 files changed, 175 insertions, 352 deletions
diff --git a/build_files/build_environment/cmake/options.cmake b/build_files/build_environment/cmake/options.cmake index 16d79d463f6..5618fc1255d 100644 --- a/build_files/build_environment/cmake/options.cmake +++ b/build_files/build_environment/cmake/options.cmake @@ -33,7 +33,8 @@ ELSE(BUILD_MODE STREQUAL "Debug") set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release) ENDIF(BUILD_MODE STREQUAL "Debug") -set(DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/downloads) +option(DOWNLOAD_DIR "Path for downloaded files" ${CMAKE_CURRENT_SOURCE_DIR}/downloads) +file(TO_CMAKE_PATH ${DOWNLOAD_DIR} DOWNLOAD_DIR) set(PATCH_DIR ${CMAKE_CURRENT_SOURCE_DIR}/patches) set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/build) diff --git a/build_files/build_environment/windows/build_deps.cmd b/build_files/build_environment/windows/build_deps.cmd index 3e458816a5a..a18eb085e4f 100644 --- a/build_files/build_environment/windows/build_deps.cmd +++ b/build_files/build_environment/windows/build_deps.cmd @@ -54,7 +54,7 @@ set CMAKE_DEBUG_OPTIONS=-DWITH_OPTIMIZED_DEBUG=On if "%3" == "debug" set CMAKE_DEBUG_OPTIONS=-DWITH_OPTIMIZED_DEBUG=Off set SOURCE_DIR=%~dp0\.. -set BUILD_DIR=%~dp0\..\..\..\..\build_windows\deps +set BUILD_DIR=%cd%\build set HARVEST_DIR=%BUILD_DIR%\output set STAGING=%BUILD_DIR%\S @@ -62,7 +62,7 @@ rem for python module build set MSSdk=1 set DISTUTILS_USE_SDK=1 rem for python externals source to be shared between the various archs and compilers -mkdir %SOURCE_DIR%\downloads\externals +mkdir %BUILD_DIR%\downloads\externals REM Detect MSVC Installation if DEFINED VisualStudioVersion goto msvc_detect_finally @@ -95,11 +95,11 @@ if %ERRORLEVEL% NEQ 0 ( ) set StatusFile=%BUILD_DIR%\%1_%2.log -set path=%SOURCE_DIR%\downloads\mingw\mingw64\msys\1.0\bin\;%SOURCE_DIR%\downloads\nasm-2.12.01\;%path% +set path=%BUILD_DIR%\downloads\mingw\mingw64\msys\1.0\bin\;%BUILD_DIR%\downloads\nasm-2.12.01\;%path% mkdir %STAGING%\%BuildDir%%ARCH%R cd %Staging%\%BuildDir%%ARCH%R echo %DATE% %TIME% : Start > %StatusFile% -cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DBUILD_MODE=Release -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ +cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DBUILD_MODE=Release -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ echo %DATE% %TIME% : Release Configuration done >> %StatusFile% msbuild /m "ll.vcxproj" /p:Configuration=Release /fl /flp:logfile=BlenderDeps_llvm.log msbuild /m "BlenderDependencies.sln" /p:Configuration=Release /fl /flp:logfile=BlenderDeps.log @@ -109,7 +109,7 @@ echo %DATE% %TIME% : Release Harvest done >> %StatusFile% cd %BUILD_DIR% mkdir %STAGING%\%BuildDir%%ARCH%D cd %Staging%\%BuildDir%%ARCH%D -cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DCMAKE_BUILD_TYPE=Debug -DBUILD_MODE=Debug -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ %CMAKE_DEBUG_OPTIONS% +cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DCMAKE_BUILD_TYPE=Debug -DBUILD_MODE=Debug -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ %CMAKE_DEBUG_OPTIONS% echo %DATE% %TIME% : Debug Configuration done >> %StatusFile% msbuild /m "ll.vcxproj" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps_llvm.log msbuild /m "BlenderDependencies.sln" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps.log diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 9fe61515570..b4ca16bdb48 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -79,7 +79,6 @@ set(SRC_HEADERS kernel_compat_cpu.h kernel_compat_cuda.h kernel_compat_opencl.h - kernel_debug.h kernel_differential.h kernel_emission.h kernel_film.h diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index d139e28b013..82d3c153bf5 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -208,6 +208,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) L->path_subsurface = make_float3(0.0f, 0.0f, 0.0f); L->path_scatter = make_float3(0.0f, 0.0f, 0.0f); + L->transparent = 0.0f; L->emission = make_float3(0.0f, 0.0f, 0.0f); L->background = make_float3(0.0f, 0.0f, 0.0f); L->ao = make_float3(0.0f, 0.0f, 0.0f); @@ -217,6 +218,7 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) else #endif { + L->transparent = 0.0f; L->emission = make_float3(0.0f, 0.0f, 0.0f); } @@ -233,7 +235,14 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f); L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f); L->denoising_depth = 0.0f; -#endif /* __DENOISING_FEATURES__ */ +#endif + +#ifdef __KERNEL_DEBUG__ + L->debug_data.num_bvh_traversed_nodes = 0; + L->debug_data.num_bvh_traversed_instances = 0; + L->debug_data.num_bvh_intersections = 0; + L->debug_data.num_ray_bounces = 0; +#endif } ccl_device_inline void path_radiance_bsdf_bounce(PathRadiance *L, ccl_addr_space float3 *throughput, diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h deleted file mode 100644 index 5647bbae5b5..00000000000 --- a/intern/cycles/kernel/kernel_debug.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2011-2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CCL_NAMESPACE_BEGIN - -ccl_device_inline void debug_data_init(DebugData *debug_data) -{ - debug_data->num_bvh_traversed_nodes = 0; - debug_data->num_bvh_traversed_instances = 0; - debug_data->num_bvh_intersections = 0; - debug_data->num_ray_bounces = 0; -} - -ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, - ccl_global float *buffer, - ccl_addr_space PathState *state, - DebugData *debug_data, - int sample) -{ - int flag = kernel_data.film.pass_flag; - if(flag & PASS_BVH_TRAVERSED_NODES) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, - sample, - debug_data->num_bvh_traversed_nodes); - } - if(flag & PASS_BVH_TRAVERSED_INSTANCES) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, - sample, - debug_data->num_bvh_traversed_instances); - } - if(flag & PASS_BVH_INTERSECTIONS) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, - sample, - debug_data->num_bvh_intersections); - } - if(flag & PASS_RAY_BOUNCES) { - kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, - sample, - debug_data->num_ray_bounces); - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 9cd7ffb181d..de65e8ef27b 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -194,6 +194,36 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, #endif /* __DENOISING_FEATURES__ */ } +#ifdef __KERNEL_DEBUG__ +ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, + ccl_global float *buffer, + PathRadiance *L, + int sample) +{ + int flag = kernel_data.film.pass_flag; + if(flag & PASS_BVH_TRAVERSED_NODES) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, + sample, + L->debug_data.num_bvh_traversed_nodes); + } + if(flag & PASS_BVH_TRAVERSED_INSTANCES) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, + sample, + L->debug_data.num_bvh_traversed_instances); + } + if(flag & PASS_BVH_INTERSECTIONS) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, + sample, + L->debug_data.num_bvh_intersections); + } + if(flag & PASS_RAY_BOUNCES) { + kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, + sample, + L->debug_data.num_ray_bounces); + } +} +#endif /* __KERNEL_DEBUG__ */ + ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput) { @@ -334,10 +364,12 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global f } ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer, - int sample, PathRadiance *L, float alpha, bool is_shadow_catcher) + int sample, PathRadiance *L, bool is_shadow_catcher) { if(L) { float3 L_sum; + float alpha = 1.0f - L->transparent; + #ifdef __SHADOW_TRICKS__ if(is_shadow_catcher) { L_sum = path_radiance_sum_shadowcatcher(kg, L, &alpha); @@ -389,6 +421,11 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float * sample, L->denoising_depth); } #endif /* __DENOISING_FEATURES__ */ + + +#ifdef __KERNEL_DEBUG__ + kernel_write_debug_passes(kg, buffer, L, sample); +#endif } else { kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f)); diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 2a801597649..c454228eab5 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -48,10 +48,6 @@ #include "kernel/kernel_path_volume.h" #include "kernel/kernel_path_subsurface.h" -#ifdef __KERNEL_DEBUG__ -# include "kernel/kernel_debug.h" -#endif - CCL_NAMESPACE_BEGIN ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, @@ -436,17 +432,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, #endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */ -ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, - RNG *rng, - int sample, - Ray ray, - ccl_global float *buffer, - PathRadiance *L, - bool *is_shadow_catcher) +ccl_device_inline void kernel_path_integrate(KernelGlobals *kg, + RNG *rng, + int sample, + Ray ray, + ccl_global float *buffer, + PathRadiance *L, + bool *is_shadow_catcher) { /* initialize */ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); @@ -458,11 +453,6 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, PathState state; path_state_init(kg, &emission_sd, &state, rng, sample, &ray); -#ifdef __KERNEL_DEBUG__ - DebugData debug_data; - debug_data_init(&debug_data); -#endif /* __KERNEL_DEBUG__ */ - #ifdef __SUBSURFACE__ SubsurfaceIndirectRays ss_indirect; kernel_path_subsurface_init_indirect(&ss_indirect); @@ -503,11 +493,11 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, #ifdef __KERNEL_DEBUG__ if(state.flag & PATH_RAY_CAMERA) { - debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; - debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; - debug_data.num_bvh_intersections += isect.num_intersections; + L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; + L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; + L->debug_data.num_bvh_intersections += isect.num_intersections; } - debug_data.num_ray_bounces++; + L->debug_data.num_ray_bounces++; #endif /* __KERNEL_DEBUG__ */ #ifdef __LAMP_MIS__ @@ -622,7 +612,7 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, if(!hit) { /* eval background shader if nothing hit */ if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) { - L_transparent += average(throughput); + L->transparent += average(throughput); #ifdef __PASSES__ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) @@ -682,7 +672,7 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, holdout_weight = shader_holdout_eval(kg, &sd); } /* any throughput is ok, should all be identical here */ - L_transparent += average(holdout_weight*throughput); + L->transparent += average(holdout_weight*throughput); } if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) { @@ -789,12 +779,6 @@ ccl_device_inline float kernel_path_integrate(KernelGlobals *kg, #ifdef __SHADOW_TRICKS__ *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER) != 0; #endif /* __SHADOW_TRICKS__ */ - -#ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); -#endif /* __KERNEL_DEBUG__ */ - - return 1.0f - L_transparent; } ccl_device void kernel_path_trace(KernelGlobals *kg, @@ -819,14 +803,12 @@ ccl_device void kernel_path_trace(KernelGlobals *kg, bool is_shadow_catcher; if(ray.t != 0.0f) { - float alpha = kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher); - kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher); + kernel_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher); + kernel_write_result(kg, buffer, sample, &L, is_shadow_catcher); } else { - kernel_write_result(kg, buffer, sample, NULL, 0.0f, false); + kernel_write_result(kg, buffer, sample, NULL, false); } - - path_rng_end(kg, rng_state, rng); } #endif /* __SPLIT_KERNEL__ */ diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 17facfa9a78..abc291bc7e3 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -269,17 +269,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, } #endif /* __SUBSURFACE__ */ -ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, - RNG *rng, - int sample, - Ray ray, - ccl_global float *buffer, - PathRadiance *L, - bool *is_shadow_catcher) +ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, + RNG *rng, + int sample, + Ray ray, + ccl_global float *buffer, + PathRadiance *L, + bool *is_shadow_catcher) { /* initialize */ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); @@ -291,11 +290,6 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, PathState state; path_state_init(kg, &emission_sd, &state, rng, sample, &ray); -#ifdef __KERNEL_DEBUG__ - DebugData debug_data; - debug_data_init(&debug_data); -#endif /* __KERNEL_DEBUG__ */ - /* Main Loop * Here we only handle transparency intersections from the camera ray. * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). @@ -326,10 +320,10 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, #endif /* __HAIR__ */ #ifdef __KERNEL_DEBUG__ - debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; - debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; - debug_data.num_bvh_intersections += isect.num_intersections; - debug_data.num_ray_bounces++; + L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; + L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; + L->debug_data.num_bvh_intersections += isect.num_intersections; + L->debug_data.num_ray_bounces++; #endif /* __KERNEL_DEBUG__ */ #ifdef __VOLUME__ @@ -482,7 +476,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, if(!hit) { /* eval background shader if nothing hit */ if(kernel_data.background.transparent) { - L_transparent += average(throughput); + L->transparent += average(throughput); #ifdef __PASSES__ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) @@ -534,7 +528,7 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, holdout_weight = shader_holdout_eval(kg, &sd); } /* any throughput is ok, should all be identical here */ - L_transparent += average(holdout_weight*throughput); + L->transparent += average(holdout_weight*throughput); } if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) { break; @@ -637,12 +631,6 @@ ccl_device float kernel_branched_path_integrate(KernelGlobals *kg, #ifdef __SHADOW_TRICKS__ *is_shadow_catcher = (state.flag & PATH_RAY_SHADOW_CATCHER) != 0; #endif /* __SHADOW_TRICKS__ */ - -#ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample); -#endif /* __KERNEL_DEBUG__ */ - - return 1.0f - L_transparent; } ccl_device void kernel_branched_path_trace(KernelGlobals *kg, @@ -667,14 +655,12 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg, bool is_shadow_catcher; if(ray.t != 0.0f) { - float alpha = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher); - kernel_write_result(kg, buffer, sample, &L, alpha, is_shadow_catcher); + kernel_branched_path_integrate(kg, &rng, sample, ray, buffer, &L, &is_shadow_catcher); + kernel_write_result(kg, buffer, sample, &L, is_shadow_catcher); } else { - kernel_write_result(kg, buffer, sample, NULL, 0.0f, false); + kernel_write_result(kg, buffer, sample, NULL, false); } - - path_rng_end(kg, rng_state, rng); } #endif /* __SPLIT_KERNEL__ */ diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index e8a912ccc0b..073011ace31 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -18,6 +18,16 @@ CCL_NAMESPACE_BEGIN +/* Pseudo random numbers, uncomment this for debugging correlations. Only run + * this single threaded on a CPU for repeatable resutls. */ +//#define __DEBUG_CORRELATION__ + + +/* High Dimensional Sobol. + * + * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal + * to classic Van der Corput and Sobol sequences. */ + #ifdef __SOBOL__ /* Skip initial numbers that are not as well distributed, especially the @@ -26,47 +36,6 @@ CCL_NAMESPACE_BEGIN */ #define SOBOL_SKIP 64 -/* High Dimensional Sobol. */ - -/* Van der Corput radical inverse. */ -ccl_device uint van_der_corput(uint bits) -{ - bits = (bits << 16) | (bits >> 16); - bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8); - bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4); - bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2); - bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1); - return bits; -} - -/* Sobol radical inverse. */ -ccl_device uint sobol(uint i) -{ - uint r = 0; - for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1) { - if(i & 1) { - r ^= v; - } - } - return r; -} - -/* Inverse of sobol radical inverse. */ -ccl_device uint sobol_inverse(uint i) -{ - const uint msb = 1U << 31; - uint r = 0; - for(uint v = 1; i; i <<= 1, v ^= v << 1) { - if(i & msb) { - r ^= v; - } - } - return r; -} - -/* Multidimensional sobol with generator matrices - * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively. - */ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) { uint result = 0; @@ -79,50 +48,31 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) return result; } -/* Lookup index and x/y coordinate, assumes m is a power of two. */ -ccl_device uint sobol_lookup(const uint m, - const uint frame, - const uint ex, - const uint ey, - uint *x, uint *y) -{ - /* Shift is constant per frame. */ - const uint shift = frame << (m << 1); - const uint sobol_shift = sobol(shift); - /* Van der Corput is its own inverse. */ - const uint lower = van_der_corput(ex << (32 - m)); - /* Need to compensate for ey difference and shift. */ - const uint sobol_lower = sobol(lower); - const uint mask = ~-(1 << m) << (32 - m); /* Only m upper bits. */ - const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask; - /* Only use m upper bits for the index (m is a power of two). */ - const uint sobol_result = delta | (delta >> m); - const uint upper = sobol_inverse(sobol_result); - const uint index = shift | upper | lower; - *x = van_der_corput(index); - *y = sobol_shift ^ sobol_result ^ sobol_lower; - return index; -} +#endif /* __SOBOL__ */ + ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension) { +#ifdef __DEBUG_CORRELATION__ + return (float)drand48(); +#endif + #ifdef __CMJ__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { +# ifdef __SOBOL__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) +# endif + { /* Correlated multi-jitter. */ int p = *rng + dimension; return cmj_sample_1D(sample, num_samples, p); } #endif -#ifdef __SOBOL_FULL_SCREEN__ - uint result = sobol_dimension(kg, *rng, dimension); - float r = (float)result * (1.0f/(float)0xFFFFFFFF); - return r; -#else - /* Compute sobol sequence value using direction vectors. */ +#ifdef __SOBOL__ + /* Sobol sequence value using direction vectors. */ uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension); float r = (float)result * (1.0f/(float)0xFFFFFFFF); @@ -145,19 +95,29 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, int dimension, float *fx, float *fy) { +#ifdef __DEBUG_CORRELATION__ + *fx = (float)drand48(); + *fy = (float)drand48(); + return; +#endif + #ifdef __CMJ__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { +# ifdef __SOBOL__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) +# endif + { /* Correlated multi-jitter. */ int p = *rng + dimension; cmj_sample_2D(sample, num_samples, p, fx, fy); + return; } - else #endif - { - /* Sobol. */ - *fx = path_rng_1D(kg, rng, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1); - } + +#ifdef __SOBOL__ + /* Sobol. */ + *fx = path_rng_1D(kg, rng, sample, num_samples, dimension); + *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1); +#endif } ccl_device_inline void path_rng_init(KernelGlobals *kg, @@ -167,81 +127,13 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg, int x, int y, float *fx, float *fy) { -#ifdef __SOBOL_FULL_SCREEN__ - uint px, py; - uint bits = 16; /* limits us to 65536x65536 and 65536 samples */ - uint size = 1 << bits; - uint frame = sample; - - *rng = sobol_lookup(bits, frame, x, y, &px, &py); - - *rng ^= kernel_data.integrator.seed; - - if(sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x; - *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y; - } -#else + /* load state */ *rng = *rng_state; - *rng ^= kernel_data.integrator.seed; - if(sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy); - } +#ifdef __DEBUG_CORRELATION__ + srand48(*rng + sample); #endif -} - -ccl_device void path_rng_end(KernelGlobals *kg, - ccl_global uint *rng_state, - RNG rng) -{ - /* nothing to do */ -} - -#else /* __SOBOL__ */ - -/* Linear Congruential Generator */ - -ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, - RNG *rng, - int sample, int num_samples, - int dimension) -{ - /* implicit mod 2^32 */ - *rng = (1103515245*(*rng) + 12345); - return (float)*rng * (1.0f/(float)0xFFFFFFFF); -} - -ccl_device_inline void path_rng_2D(KernelGlobals *kg, - RNG *rng, - int sample, int num_samples, - int dimension, - float *fx, float *fy) -{ - *fx = path_rng_1D(kg, rng, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1); -} - -ccl_device void path_rng_init(KernelGlobals *kg, - ccl_global uint *rng_state, - int sample, int num_samples, - RNG *rng, - int x, int y, - float *fx, float *fy) -{ - /* load state */ - *rng = *rng_state; - - *rng ^= kernel_data.integrator.seed; if(sample == 0) { *fx = 0.5f; @@ -252,16 +144,6 @@ ccl_device void path_rng_init(KernelGlobals *kg, } } -ccl_device void path_rng_end(KernelGlobals *kg, - ccl_global uint *rng_state, - RNG rng) -{ - /* store state for next sample */ - *rng_state = rng; -} - -#endif /* __SOBOL__ */ - /* Linear Congruential Generator */ ccl_device uint lcg_step_uint(uint *rng) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d5f720778ff..f1b82eee352 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -468,11 +468,24 @@ typedef enum DenoiseFlag { DENOISING_CLEAN_ALL_PASSES = (1 << 8)-1, } DenoiseFlag; +#ifdef __KERNEL_DEBUG__ +/* NOTE: This is a runtime-only struct, alignment is not + * really important here. + */ +typedef struct DebugData { + int num_bvh_traversed_nodes; + int num_bvh_traversed_instances; + int num_bvh_intersections; + int num_ray_bounces; +} DebugData; +#endif + typedef ccl_addr_space struct PathRadiance { #ifdef __PASSES__ int use_light_pass; #endif + float transparent; float3 emission; #ifdef __PASSES__ float3 background; @@ -538,6 +551,10 @@ typedef ccl_addr_space struct PathRadiance { float3 denoising_albedo; float denoising_depth; #endif /* __DENOISING_FEATURES__ */ + +#ifdef __KERNEL_DEBUG__ + DebugData debug_data; +#endif /* __KERNEL_DEBUG__ */ } PathRadiance; typedef struct BsdfEval { @@ -1345,18 +1362,6 @@ typedef struct KernelData { } KernelData; static_assert_align(KernelData, 16); -#ifdef __KERNEL_DEBUG__ -/* NOTE: This is a runtime-only struct, alignment is not - * really important here. - */ -typedef ccl_addr_space struct DebugData { - int num_bvh_traversed_nodes; - int num_bvh_traversed_instances; - int num_bvh_intersections; - int num_ray_bounces; -} DebugData; -#endif - /* Declarations required for split kernel */ /* Macro for queues */ diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 4c1fdd2d69c..6aa0d6948d0 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -79,14 +79,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, int stride = kernel_split_params.stride; ccl_global char *ray_state = kernel_split_state.ray_state; -#ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; -#endif ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; RNG rng = kernel_split_state.rng[ray_index]; ccl_global float *buffer = kernel_split_params.buffer; @@ -111,15 +107,9 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { -#ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, state, debug_data, sample); -#endif - /* accumulate result in output buffer */ bool is_shadow_catcher = (state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_write_result(kg, buffer, sample, L, 1.0f - (*L_transparent), is_shadow_catcher); - - path_rng_end(kg, rng_state, rng); + kernel_write_result(kg, buffer, sample, L, is_shadow_catcher); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } @@ -148,19 +138,15 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, &rng, ray); if(ray->t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; + /* Initialize throughput, path radiance, Ray, PathState; * These rays proceed with path-iteration. */ *throughput = make_float3(1.0f, 1.0f, 1.0f); - *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &rng, sample, ray); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif -#ifdef __KERNEL_DEBUG__ - debug_data_init(debug_data); -#endif ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); enqueue_flag = 1; } @@ -169,7 +155,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ kernel_write_pass_float4(buffer, sample, L_rad); - path_rng_end(kg, rng_state, rng); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 6f3297de342..2c042dfde6f 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -124,14 +124,25 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( /* zero the tiles pixels and initialize rng_state if this is the first sample */ if(start_sample == 0) { - parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) { - int pixel = i / kernel_data.film.pass_stride; - int pass = i % kernel_data.film.pass_stride; + int pass_stride = kernel_data.film.pass_stride; + +#ifdef __KERNEL_CPU__ + for(int y = sy; y < sy + sh; y++) { + int index = offset + y * stride; + memset(buffer + (sx + index) * pass_stride, 0, sizeof(float) * pass_stride * sw); + for(int x = sx; x < sx + sw; x++) { + rng_state[index + x] = hash_int_2d(x, y); + } + } +#else + parallel_for(kg, i, sw * sh * pass_stride) { + int pixel = i / pass_stride; + int pass = i % pass_stride; int x = sx + pixel % sw; int y = sy + pixel / sw; - int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass; + int index = (offset + x + y*stride) * pass_stride + pass; *(buffer + index) = 0.0f; } @@ -143,6 +154,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( int index = (offset + x + y*stride); *(rng_state + index) = hash_int_2d(x, y); } +#endif } #endif /* KERENL_STUB */ diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index adcb1bdc377..95f57fbff57 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -162,7 +162,8 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( holdout_weight = shader_holdout_eval(kg, sd); } /* any throughput is ok, should all be identical here */ - kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + L->transparent += average(holdout_weight*throughput); } if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { kernel_split_path_end(kg, ray_index); diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index f0ebb90f60a..04d5769ef0d 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -54,12 +54,11 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg) PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { /* eval background shader if nothing hit */ if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { - *L_transparent = (*L_transparent) + average((*throughput)); + L->transparent += average((*throughput)); #ifdef __PASSES__ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) #endif diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index a7ecde7c80d..8315b0b2bd3 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -71,11 +71,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { &kernel_split_state.ray[ray_index]); if(kernel_split_state.ray[ray_index].t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; + /* Initialize throughput, path radiance, Ray, PathState; * These rays proceed with path-iteration. */ kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - kernel_split_state.L_transparent[ray_index] = 0.0f; path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], @@ -86,17 +85,12 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - -#ifdef __KERNEL_DEBUG__ - debug_data_init(&kernel_split_state.debug_data[ray_index]); -#endif } else { /* These rays do not participate in path-iteration. */ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ kernel_write_pass_float4(buffer, my_sample, L_rad); - path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]); ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); } kernel_split_state.rng[ray_index] = rng; diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 45984ca509b..5c6d90eecc4 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -59,9 +59,6 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) return; } -#ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; -#endif Intersection isect; PathState state = kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; @@ -97,12 +94,14 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) kernel_split_state.isect[ray_index] = isect; #ifdef __KERNEL_DEBUG__ + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + if(state.flag & PATH_RAY_CAMERA) { - debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes; - debug_data->num_bvh_traversed_instances += isect.num_traversed_instances; - debug_data->num_bvh_intersections += isect.num_intersections; + L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes; + L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances; + L->debug_data.num_bvh_intersections += isect.num_intersections; } - debug_data->num_ray_bounces++; + L->debug_data.num_ray_bounces++; #endif if(!hit) { diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 4bb2f0d3d80..4f32c68d630 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -56,14 +56,6 @@ typedef struct SplitParams { /* SPLIT_DATA_ENTRY(type, name, num) */ -#if defined(WITH_CYCLES_DEBUG) || defined(__KERNEL_DEBUG__) -/* DebugData memory */ -# define SPLIT_DATA_DEBUG_ENTRIES \ - SPLIT_DATA_ENTRY(DebugData, debug_data, 1) -#else -# define SPLIT_DATA_DEBUG_ENTRIES -#endif /* DEBUG */ - #ifdef __BRANCHED_PATH__ typedef ccl_global struct SplitBranchedState { @@ -124,7 +116,6 @@ typedef ccl_global struct SplitBranchedState { #define SPLIT_DATA_ENTRIES \ SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ @@ -139,13 +130,11 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_DEBUG_ENTRIES \ /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ @@ -158,7 +147,6 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_DEBUG_ENTRIES \ /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 733b97e1432..aabca6c81fc 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -28,9 +28,9 @@ #endif #include "util/util_defines.h" -#include "util/util_optimization.h" #ifndef __KERNEL_GPU__ +# include "util/util_optimization.h" # include "util/util_simd.h" #endif |