diff options
-rw-r--r-- | intern/cycles/blender/addon/properties.py | 12 | ||||
-rw-r--r-- | intern/cycles/blender/addon/ui.py | 3 | ||||
-rw-r--r-- | intern/cycles/blender/blender_sync.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_jitter.h | 181 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_montecarlo.h | 32 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 175 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_random.h | 72 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 19 | ||||
-rw-r--r-- | intern/cycles/render/integrator.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/render/integrator.h | 5 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 13 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 4 |
13 files changed, 417 insertions, 110 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index a94178f1b42..7ea84659764 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -114,6 +114,11 @@ enum_use_layer_samples = ( ('IGNORE', "Ignore", "Ignore per render layer number of samples"), ) +enum_sampling_pattern = ( + ('SOBOL', "Sobol", "Use Sobol random sampling pattern"), + ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"), + ) + class CyclesRenderSettings(bpy.types.PropertyGroup): @classmethod @@ -219,6 +224,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=1, ) + cls.sampling_pattern = EnumProperty( + name="Sampling Pattern", + description="Random sampling pattern used by the integrator", + items=enum_sampling_pattern, + default='SOBOL', + ) + cls.use_layer_samples = EnumProperty( name="Layer Samples", description="How to use per render layer sample settings", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index f9c516d1963..b35bdf8f511 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -85,6 +85,9 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.prop(cscene, "mesh_light_samples", text="Mesh Light") sub.prop(cscene, "subsurface_samples", text="Subsurface") + if cscene.feature_set == 'EXPERIMENTAL': + layout.row().prop(cscene, "sampling_pattern", text="Pattern") + for rl in scene.render.layers: if rl.samples > 0: layout.separator() diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index b324385134b..ef9ce85ddf8 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -199,6 +199,9 @@ void BlenderSync::sync_integrator() integrator->subsurface_samples = get_int(cscene, "subsurface_samples"); integrator->progressive = get_boolean(cscene, "progressive"); + if(experimental) + integrator->sampling_pattern = (SamplingPattern)RNA_enum_get(&cscene, "sampling_pattern"); + if(integrator->modified(previntegrator)) integrator->tag_update(scene); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 41048c7b379..912a1321d67 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -33,6 +33,7 @@ set(SRC_HEADERS kernel_emission.h kernel_film.h kernel_globals.h + kernel_jitter.h kernel_light.h kernel_math.h kernel_montecarlo.h diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h new file mode 100644 index 00000000000..5ea44cd0cad --- /dev/null +++ b/intern/cycles/kernel/kernel_jitter.h @@ -0,0 +1,181 @@ +/* + * Copyright 2013, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +CCL_NAMESPACE_BEGIN + +/* "Correlated Multi-Jittered Sampling" + * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */ + +/* todo: find good value, suggested 64 gives pattern on cornell box ceiling */ +#define CMJ_RANDOM_OFFSET_LIMIT 4096 + +__device_inline bool cmj_is_pow2(int i) +{ + return (i & (i - 1)) == 0; +} + +__device_inline int cmj_fast_mod_pow2(int a, int b) +{ + return (a & (b - 1)); +} + +/* a must be > 0 and b must be > 1 */ +__device_inline int cmj_fast_div_pow2(int a, int b) +{ +#ifdef __KERNEL_SSE2__ + return a >> __builtin_ctz(b); +#else + return a/b; +#endif +} + +__device_inline uint cmj_w_mask(uint w) +{ +#ifdef __KERNEL_SSE2__ + return ((1 << (32 - __builtin_clz(w))) - 1); +#else + w |= w >> 1; + w |= w >> 2; + w |= w >> 4; + w |= w >> 8; + w |= w >> 16; + + return w; +#endif +} + +__device_inline uint cmj_permute(uint i, uint l, uint p) +{ + uint w = l - 1; + + if((l & w) == 0) { + /* l is a power of two (fast) */ + i ^= p; + i *= 0xe170893d; + i ^= p >> 16; + i ^= (i & w) >> 4; + i ^= p >> 8; + i *= 0x0929eb3f; + i ^= p >> 23; + i ^= (i & w) >> 1; + i *= 1 | p >> 27; + i *= 0x6935fa69; + i ^= (i & w) >> 11; + i *= 0x74dcb303; + i ^= (i & w) >> 2; + i *= 0x9e501cc3; + i ^= (i & w) >> 2; + i *= 0xc860a3df; + i &= w; + i ^= i >> 5; + + return (i + p) & w; + } + else { + /* l is not a power of two (slow) */ + w = cmj_w_mask(w); + + do { + i ^= p; + i *= 0xe170893d; + i ^= p >> 16; + i ^= (i & w) >> 4; + i ^= p >> 8; + i *= 0x0929eb3f; + i ^= p >> 23; + i ^= (i & w) >> 1; + i *= 1 | p >> 27; + i *= 0x6935fa69; + i ^= (i & w) >> 11; + i *= 0x74dcb303; + i ^= (i & w) >> 2; + i *= 0x9e501cc3; + i ^= (i & w) >> 2; + i *= 0xc860a3df; + i &= w; + i ^= i >> 5; + } while (i >= l); + + return (i + p) % l; + } +} + +__device_inline uint cmj_hash(uint i, uint p) +{ + i ^= p; + i ^= i >> 17; + i ^= i >> 10; + i *= 0xb36534e5; + i ^= i >> 12; + i ^= i >> 21; + i *= 0x93fc4795; + i ^= 0xdf6e307f; + i ^= i >> 17; + i *= 1 | p >> 18; + + return i; +} + +__device_inline float cmj_randfloat(uint i, uint p) +{ + return cmj_hash(i, p) * (1.0f / 4294967808.0f); +} + +#ifdef __CMJ__ +__device_noinline float cmj_sample_1D(int s, int N, int p) +{ + uint x = cmj_permute(s, N, p * 0x68bc21eb); + float jx = cmj_randfloat(s, p * 0x967a889b); + + float invN = 1.0f/N; + return (x + jx)*invN; +} + +__device_noinline float2 cmj_sample_2D(int s, int N, int p) +{ + int m = float_to_int(sqrtf(N)); + int n = (N + m - 1)/m; + float invN = 1.0f/N; + float invm = 1.0f/m; + float invn = 1.0f/n; + + s = cmj_permute(s, N, p * 0x51633e2d); + + int sdivm, smodm; + + if(cmj_is_pow2(m)) { + sdivm = cmj_fast_div_pow2(s, m); + smodm = cmj_fast_mod_pow2(s, m); + } + else { + sdivm = float_to_int(s * invm); + smodm = s - sdivm*m; + } + + uint sx = cmj_permute(smodm, m, p * 0x68bc21eb); + uint sy = cmj_permute(sdivm, n, p * 0x02e5be93); + + float jx = cmj_randfloat(s, p * 0x967a889b); + float jy = cmj_randfloat(s, p * 0x368cc8b7); + + return make_float2((sx + (sy + jx)*invn)*invm, (s + jy)*invN); +} +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index 2ae95084162..f608429da36 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -36,36 +36,12 @@ CCL_NAMESPACE_BEGIN /// Given values x and y on [0,1], convert them in place to values on -/// [-1,1] uniformly distributed over a unit sphere. This code is -/// derived from Peter Shirley, "Realistic Ray Tracing", p. 103. +/// [-1,1] uniformly distributed over a unit sphere. __device void to_unit_disk(float *x, float *y) { - float r, phi; - float a = 2.0f * (*x) - 1.0f; - float b = 2.0f * (*y) - 1.0f; - if(a > -b) { - if(a > b) { - r = a; - phi = M_PI_4_F *(b/a); - } - else { - r = b; - phi = M_PI_4_F *(2.0f - a/b); - } - } - else { - if(a < b) { - r = -a; - phi = M_PI_4_F *(4.0f + b/a); - } - else { - r = -b; - if(b != 0.0f) - phi = M_PI_4_F *(6.0f - a/b); - else - phi = 0.0f; - } - } + float phi = 2.0f * M_PI_F * (*x); + float r = sqrtf(*y); + *x = r * cosf(phi); *y = r * sinf(phi); } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 5915dfed08b..866024ba303 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -233,7 +233,7 @@ __device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ra return result; } -__device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer) +__device float4 kernel_path_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer) { /* initialize */ PathRadiance L; @@ -249,6 +249,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, #endif PathState state; int rng_offset = PRNG_BASE_NUM; + int num_samples = kernel_data.integrator.aa_samples; path_state_init(&state); @@ -270,7 +271,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, } extmax = kernel_data.curve_kernel_data.maximum_width; - lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d); + lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d); } bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax); @@ -292,7 +293,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, light_ray.dP = ray.dP; /* intersect with lamp */ - float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT); + float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT); float3 emission; if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission)) @@ -323,7 +324,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, /* setup shading */ ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &ray); - float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF); + float rbsdf = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF); shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); /* holdout */ @@ -373,12 +374,18 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate */ float probability = path_state_terminate_probability(kg, &state, throughput); - float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); - if(terminate >= probability) + if(probability == 0.0f) { break; + } + else if(probability != 1.0f) { + float terminate = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_TERMINATE); + + if(terminate >= probability) + break; - throughput /= probability; + throughput /= probability; + } #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing @@ -392,7 +399,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { - uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); + uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb); subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); } } @@ -402,8 +409,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { /* todo: solve correlation */ - float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; float ao_factor = kernel_data.background.ao_factor; float3 ao_N; @@ -436,10 +444,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, if(kernel_data.integrator.use_direct_light) { /* sample illumination from lights to find path contribution */ if(sd.flag & SD_BSDF_HAS_EVAL) { - float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT); - float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F); - float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V); + float light_t = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT); +#ifdef __MULTI_CLOSURE__ + float light_o = 0.0f; +#else + float light_o = path_rng_1D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_F); +#endif + float2 light_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_LIGHT_U); + float light_u = light_uv.x; + float light_v = light_uv.y; Ray light_ray; BsdfEval L_light; @@ -471,8 +484,9 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, BsdfEval bsdf_eval; float3 bsdf_omega_in; differential3 bsdf_domega_in; - float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; int label; label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval, @@ -524,8 +538,8 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, #ifdef __NON_PROGRESSIVE__ -__device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer, - float3 throughput, float num_samples_adjust, +__device void kernel_path_indirect(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer, + float3 throughput, int num_samples, int num_total_samples, float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L) { #ifdef __LAMP_MIS__ @@ -557,7 +571,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray light_ray.dP = ray.dP; /* intersect with lamp */ - float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT); + float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT); float3 emission; if(indirect_lamp_emission(kg, &light_ray, state.flag, ray_pdf, light_t, &emission)) @@ -578,7 +592,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* setup shading */ ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &ray); - float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF); + float rbsdf = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF); shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT); shader_merge_closures(kg, &sd); @@ -604,13 +618,19 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* path termination. this is a strange place to put the termination, it's * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_terminate_probability(kg, &state, throughput*num_samples_adjust); - float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); + float probability = path_state_terminate_probability(kg, &state, throughput*num_samples); - if(terminate >= probability) + if(probability == 0.0f) { break; + } + else if(probability != 1.0f) { + float terminate = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_TERMINATE); + + if(terminate >= probability) + break; - throughput /= probability; + throughput /= probability; + } #ifdef __SUBSURFACE__ /* bssrdf scatter to a different location on the same object, replacing @@ -624,7 +644,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { - uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); + uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb); subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); } } @@ -634,8 +654,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { /* todo: solve correlation */ - float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; float ao_factor = kernel_data.background.ao_factor; float3 ao_N; @@ -668,10 +689,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray if(kernel_data.integrator.use_direct_light) { /* sample illumination from lights to find path contribution */ if(sd.flag & SD_BSDF_HAS_EVAL) { - float light_t = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT); - float light_o = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_F); - float light_u = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample, rng_offset + PRNG_LIGHT_V); + float light_t = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT); +#ifdef __MULTI_CLOSURE__ + float light_o = 0.0f; +#else + float light_o = path_rng_1D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_F); +#endif + float2 light_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_LIGHT_U); + float light_u = light_uv.x; + float light_v = light_uv.y; Ray light_ray; BsdfEval L_light; @@ -704,8 +730,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray BsdfEval bsdf_eval; float3 bsdf_omega_in; differential3 bsdf_domega_in; - float bsdf_u = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; int label; label = shader_bsdf_sample(kg, &sd, bsdf_u, bsdf_v, &bsdf_eval, @@ -740,15 +767,17 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray } } -__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample, +__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG rng, int sample, ShaderData *sd, float3 throughput, float num_samples_adjust, float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L, __global float *buffer) { + int aa_samples = kernel_data.integrator.aa_samples; + #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) { - int num_samples = ceil(kernel_data.integrator.ao_samples*num_samples_adjust); + int num_samples = ceil_to_int(kernel_data.integrator.ao_samples*num_samples_adjust); float num_samples_inv = num_samples_adjust/num_samples; float ao_factor = kernel_data.background.ao_factor; float3 ao_N; @@ -756,8 +785,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R for(int j = 0; j < num_samples; j++) { /* todo: solve correlation */ - float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; float3 ao_D; float ao_pdf; @@ -798,15 +828,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R /* lamp sampling */ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - int num_samples = ceil(num_samples_adjust*light_select_num_samples(kg, i)); + int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); + RNG lamp_rng = cmj_hash(rng, i); if(kernel_data.integrator.pdf_triangles != 0.0f) num_samples_inv *= 0.5f; for(int j = 0; j < num_samples; j++) { - float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); + float2 light_uv = path_rng_2D(kg, lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U); + float light_u = light_uv.x; + float light_v = light_uv.y; if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) { /* trace shadow ray */ @@ -822,16 +854,17 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R /* mesh light sampling */ if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = ceil(num_samples_adjust*kernel_data.integrator.mesh_light_samples); + int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples); float num_samples_inv = num_samples_adjust/num_samples; if(kernel_data.integrator.num_all_lights) num_samples_inv *= 0.5f; for(int j = 0; j < num_samples; j++) { - float light_t = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT); - float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U); - float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V); + float light_t = path_rng_1D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT); + float2 light_uv = path_rng_2D(kg, rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U); + float light_u = light_uv.x; + float light_v = light_uv.y; /* only sample triangle lights */ if(kernel_data.integrator.num_all_lights) @@ -869,9 +902,10 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R else num_samples = kernel_data.integrator.transmission_samples; - num_samples = ceil(num_samples_adjust*num_samples); + num_samples = ceil_to_int(num_samples_adjust*num_samples); float num_samples_inv = num_samples_adjust/num_samples; + RNG bsdf_rng = cmj_hash(rng, i); for(int j = 0; j < num_samples; j++) { /* sample BSDF */ @@ -879,8 +913,9 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R BsdfEval bsdf_eval; float3 bsdf_omega_in; differential3 bsdf_domega_in; - float bsdf_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_U); - float bsdf_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_BSDF_V); + float2 bsdf_uv = path_rng_2D(kg, bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U); + float bsdf_u = bsdf_uv.x; + float bsdf_v = bsdf_uv.y; int label; label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, @@ -918,7 +953,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R #endif kernel_path_indirect(kg, rng, sample*num_samples + j, bsdf_ray, buffer, - tp*num_samples_inv, num_samples, + tp*num_samples_inv, num_samples, aa_samples*num_samples, min_ray_pdf, bsdf_pdf, ps, rng_offset+PRNG_BOUNCE_NUM, L); /* for render passes, sum and reset indirect light pass variables @@ -929,7 +964,7 @@ __device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, R } } -__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer) +__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer) { /* initialize */ PathRadiance L; @@ -941,6 +976,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam float ray_pdf = 0.0f; PathState state; int rng_offset = PRNG_BASE_NUM; + int aa_samples = kernel_data.integrator.aa_samples; path_state_init(&state); @@ -961,7 +997,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam } extmax = kernel_data.curve_kernel_data.maximum_width; - lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d); + lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d); } if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) { @@ -990,8 +1026,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam /* setup shading */ ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &ray); - float rbsdf = path_rng(kg, rng, sample, rng_offset + PRNG_BSDF); - shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); + shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN); shader_merge_closures(kg, &sd); /* holdout */ @@ -1031,12 +1066,18 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam * mainly due to the mixed in MIS that we use. gives too many unneeded * shader evaluations, only need emission if we are going to terminate */ float probability = path_state_terminate_probability(kg, &state, throughput); - float terminate = path_rng(kg, rng, sample, rng_offset + PRNG_TERMINATE); - if(terminate >= probability) + if(probability == 0.0f) { break; + } + else if(probability != 1.0f) { + float terminate = path_rng_1D(kg, rng, sample, aa_samples, rng_offset + PRNG_TERMINATE); - throughput /= probability; + if(terminate >= probability) + break; + + throughput /= probability; + } } #ifdef __SUBSURFACE__ @@ -1049,7 +1090,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam continue; /* set up random number generator */ - uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); + uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb); int num_samples = kernel_data.integrator.subsurface_samples; float num_samples_inv = 1.0f/num_samples; @@ -1112,22 +1153,26 @@ __device void kernel_path_trace(KernelGlobals *kg, float filter_u; float filter_v; + int num_samples = kernel_data.integrator.aa_samples; - path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v); + path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v); /* sample camera ray */ Ray ray; - + float lens_u = 0.0f, lens_v = 0.0f; - float time = 0.0f; - + if(kernel_data.cam.aperturesize > 0.0f) { - lens_u = path_rng(kg, &rng, sample, PRNG_LENS_U); - lens_v = path_rng(kg, &rng, sample, PRNG_LENS_V); + float2 lens_uv = path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U); + lens_u = lens_uv.x; + lens_v = lens_uv.y; } + + float time = 0.0f; + #ifdef __CAMERA_MOTION__ if(kernel_data.cam.shuttertime != -1.0f) - time = path_rng(kg, &rng, sample, PRNG_TIME); + time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME); #endif camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray); @@ -1139,10 +1184,10 @@ __device void kernel_path_trace(KernelGlobals *kg, #ifdef __NON_PROGRESSIVE__ if(kernel_data.integrator.progressive) #endif - L = kernel_path_progressive(kg, &rng, sample, ray, buffer); + L = kernel_path_progressive(kg, rng, sample, ray, buffer); #ifdef __NON_PROGRESSIVE__ else - L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer); + L = kernel_path_non_progressive(kg, rng, sample, ray, buffer); #endif } else diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index fc33e226051..ecf80b817d4 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -16,6 +16,8 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "kernel_jitter.h" + CCL_NAMESPACE_BEGIN typedef uint RNG; @@ -100,10 +102,10 @@ __device uint sobol_lookup(const uint m, const uint frame, const uint ex, const return index; } -__device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension) +__device_inline float path_rng(KernelGlobals *kg, RNG rng, int sample, int dimension) { #ifdef __SOBOL_FULL_SCREEN__ - uint result = sobol_dimension(kg, *rng, dimension); + uint result = sobol_dimension(kg, rng, dimension); float r = (float)result * (1.0f/(float)0xFFFFFFFF); return r; #else @@ -115,15 +117,44 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime float shift; if(dimension & 1) - shift = (*rng >> 16)/((float)0xFFFF); + shift = (rng >> 16)*(1.0f/(float)0xFFFF); else - shift = (*rng & 0xFFFF)/((float)0xFFFF); + shift = (rng & 0xFFFF)*(1.0f/(float)0xFFFF); return r + shift - floorf(r + shift); #endif } -__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device_inline float path_rng_1D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension) +{ +#ifdef __CMJ__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { + /* correlated multi-jittered */ + int p = rng + dimension; + return cmj_sample_1D(sample, num_samples, p); + } +#endif + + /* sobol */ + return path_rng(kg, rng, sample, dimension); +} + +__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension) +{ +#ifdef __CMJ__ + if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) { + /* correlated multi-jittered */ + int p = rng + dimension; + return cmj_sample_2D(sample, num_samples, p); + } +#endif + + /* sobol */ + return make_float2(path_rng(kg, rng, sample, dimension), + path_rng(kg, rng, sample, dimension + 1)); +} + +__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy) { #ifdef __SOBOL_FULL_SCREEN__ uint px, py; @@ -153,8 +184,10 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, *fy = 0.5f; } else { - *fx = path_rng(kg, rng, sample, PRNG_FILTER_U); - *fy = path_rng(kg, rng, sample, PRNG_FILTER_V); + float2 fxy = path_rng_2D(kg, *rng, sample, num_samples, PRNG_FILTER_U); + + *fx = fxy.x; + *fy = fxy.y; } #endif } @@ -168,14 +201,25 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng) /* Linear Congruential Generator */ -__device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension) +__device float path_rng(KernelGlobals *kg, RNG& rng, int sample, int dimension) { /* implicit mod 2^32 */ - *rng = (1103515245*(*rng) + 12345); - return (float)*rng * (1.0f/(float)0xFFFFFFFF); + rng = (1103515245*(rng) + 12345); + return (float)rng * (1.0f/(float)0xFFFFFFFF); } -__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device_inline float path_rng_1D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension) +{ + return path_rng(kg, rng, sample, dimension); +} + +__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG& rng, int sample, int num_samples, int dimension) +{ + return make_float2(path_rng(kg, rng, sample, dimension), + path_rng(kg, rng, sample, dimension + 1)); +} + +__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy) { /* load state */ *rng = *rng_state; @@ -187,8 +231,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam *fy = 0.5f; } else { - *fx = path_rng(kg, rng, sample, PRNG_FILTER_U); - *fy = path_rng(kg, rng, sample, PRNG_FILTER_V); + float2 fxy = path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U); + + *fx = fxy.x; + *fy = fxy.y; } } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index f165a1f3839..abdb609b55f 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -117,6 +117,7 @@ CCL_NAMESPACE_BEGIN #define __CAMERA_CLIPPING__ #define __INTERSECTION_REFINE__ #define __CLAMP_SAMPLE__ +#define __CMJ__ #ifdef __KERNEL_SHADING__ #define __SVM__ @@ -164,8 +165,10 @@ enum PathTraceDimension { PRNG_LENS_V = 3, #ifdef __CAMERA_MOTION__ PRNG_TIME = 4, - PRNG_UNUSED = 5, - PRNG_BASE_NUM = 6, + PRNG_UNUSED_0 = 5, + PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */ + PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */ + PRNG_BASE_NUM = 8, #else PRNG_BASE_NUM = 4, #endif @@ -181,6 +184,11 @@ enum PathTraceDimension { PRNG_BOUNCE_NUM = 8 }; +enum SamplingPattern { + SAMPLING_PATTERN_SOBOL = 0, + SAMPLING_PATTERN_CMJ = 1 +}; + /* these flags values correspond to raytypes in osl.cpp, so keep them in sync! * * for ray visibility tests in BVH traversal, the upper 20 bits are used for @@ -728,6 +736,7 @@ typedef struct KernelIntegrator { /* non-progressive */ int progressive; + int aa_samples; int diffuse_samples; int glossy_samples; int transmission_samples; @@ -736,7 +745,11 @@ typedef struct KernelIntegrator { int use_lamp_mis; int subsurface_samples; - int pad1, pad2, pad3; + /* sampler */ + int sampling_pattern; + + /* padding */ + int pad; } KernelIntegrator; typedef struct KernelBVH { diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 731ffd9e271..cc369e7abc9 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -49,6 +49,7 @@ Integrator::Integrator() sample_clamp = 0.0f; motion_blur = false; + aa_samples = 0; diffuse_samples = 1; glossy_samples = 1; transmission_samples = 1; @@ -57,6 +58,8 @@ Integrator::Integrator() subsurface_samples = 1; progressive = true; + sampling_pattern = SAMPLING_PATTERN_SOBOL; + need_update = true; } @@ -104,6 +107,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->sample_clamp = (sample_clamp == 0.0f)? FLT_MAX: sample_clamp*3.0f; kintegrator->progressive = progressive; + kintegrator->aa_samples = aa_samples; kintegrator->diffuse_samples = diffuse_samples; kintegrator->glossy_samples = glossy_samples; kintegrator->transmission_samples = transmission_samples; @@ -111,6 +115,8 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->mesh_light_samples = mesh_light_samples; kintegrator->subsurface_samples = subsurface_samples; + kintegrator->sampling_pattern = sampling_pattern; + /* sobol directions table */ int max_samples = 1; @@ -160,6 +166,7 @@ bool Integrator::modified(const Integrator& integrator) seed == integrator.seed && sample_clamp == integrator.sample_clamp && progressive == integrator.progressive && + aa_samples == integrator.aa_samples && diffuse_samples == integrator.diffuse_samples && glossy_samples == integrator.glossy_samples && transmission_samples == integrator.transmission_samples && diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 9867e310d4d..fff24b506fb 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -19,6 +19,8 @@ #ifndef __INTEGRATOR_H__ #define __INTEGRATOR_H__ +#include "kernel_types.h" + CCL_NAMESPACE_BEGIN class Device; @@ -49,6 +51,7 @@ public: float sample_clamp; bool motion_blur; + int aa_samples; int diffuse_samples; int glossy_samples; int transmission_samples; @@ -58,6 +61,8 @@ public: bool progressive; + SamplingPattern sampling_pattern; + bool need_update; Integrator(); diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index bc847d5719c..44364418dcf 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -22,6 +22,7 @@ #include "buffers.h" #include "camera.h" #include "device.h" +#include "integrator.h" #include "scene.h" #include "session.h" @@ -728,6 +729,18 @@ void Session::update_scene() cam->tag_update(); } + /* number of samples is needed by multi jittered sampling pattern */ + Integrator *integrator = scene->integrator; + + if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ) { + int aa_samples = tile_manager.num_samples; + + if(aa_samples != integrator->aa_samples) { + integrator->aa_samples = aa_samples; + integrator->tag_update(scene); + } + } + /* update scene */ if(scene->need_update()) { progress.set_status("Updating Scene"); diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index fe1cb61ffa9..0bc67f0618a 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -59,6 +59,8 @@ /* SIMD Types */ +#ifndef __KERNEL_GPU__ + /* not enabled, globally applying it just gives slowdown, * but useful for testing. */ //#define __KERNEL_SSE__ @@ -88,11 +90,11 @@ #endif #ifndef _WIN32 -#ifndef __KERNEL_GPU__ #include <stdint.h> #endif + #endif CCL_NAMESPACE_BEGIN |