intern/cycles/kernel/kernel_random.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

/*
 * Copyright 2011-2013 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once

#include "kernel/kernel_jitter.h"
#include "util/util_hash.h"

CCL_NAMESPACE_BEGIN

/* Pseudo random numbers, uncomment this for debugging correlations. Only run
 * this single threaded on a CPU for repeatable results. */
//#define __DEBUG_CORRELATION__

/* High Dimensional Sobol.
 *
 * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
 * to classic Van der Corput and Sobol sequences. */

#ifdef __SOBOL__

/* Skip initial numbers that for some dimensions have clear patterns that
 * don't cover the entire sample space. Ideally we would have a better
 * progressive pattern that doesn't suffer from this problem, because even
 * with this offset some dimensions are quite poor.
 */
#  define SOBOL_SKIP 64

ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimension)
{
  uint result = 0;
  uint i = index + SOBOL_SKIP;
  for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
    j += x;
    result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1));
  }
  return result;
}

#endif /* __SOBOL__ */

ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg,
                                         uint rng_hash,
                                         int sample,
                                         int dimension)
{
#ifdef __DEBUG_CORRELATION__
  return (float)drand48();
#endif

#ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
#endif
  {
    return pmj_sample_1D(kg, sample, rng_hash, dimension);
  }

#ifdef __SOBOL__
  /* Sobol sequence value using direction vectors. */
  uint result = sobol_dimension(kg, sample, dimension);
  float r = (float)result * (1.0f / (float)0xFFFFFFFF);

  /* Cranly-Patterson rotation using rng seed */
  float shift;

  /* Hash rng with dimension to solve correlation issues.
   * See T38710, T50116.
   */
  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
  shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);

  return r + shift - floorf(r + shift);
#endif
}

ccl_device_forceinline void path_rng_2D(
    const KernelGlobals *kg, uint rng_hash, int sample, int dimension, float *fx, float *fy)
{
#ifdef __DEBUG_CORRELATION__
  *fx = (float)drand48();
  *fy = (float)drand48();
  return;
#endif

#ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
#endif
  {
    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);

    return;
  }

#ifdef __SOBOL__
  /* Sobol. */
  *fx = path_rng_1D(kg, rng_hash, sample, dimension);
  *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1);
#endif
}

/**
 * 1D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
 * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
 * http://www.jcgt.org/published/0009/03/02/paper.pdf
 */
ccl_device_inline uint hash_iqint1(uint n)
{
  n = (n << 13U) ^ n;
  n = n * (n * n * 15731U + 789221U) + 1376312589U;

  return n;
}

/**
 * 2D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
 * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
 * http://www.jcgt.org/published/0009/03/02/paper.pdf
 */
ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
{
  const uint qx = 1103515245U * ((x >> 1U) ^ (y));
  const uint qy = 1103515245U * ((y >> 1U) ^ (x));
  const uint n = 1103515245U * ((qx) ^ (qy >> 3U));

  return n;
}

ccl_device_inline uint path_rng_hash_init(const KernelGlobals *ccl_restrict kg,
                                          const int sample,
                                          const int x,
                                          const int y)
{
  const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;

#ifdef __DEBUG_CORRELATION__
  srand48(rng_hash + sample);
#else
  (void)sample;
#endif

  return rng_hash;
}

/* Linear Congruential Generator */

ccl_device uint lcg_step_uint(uint *rng)
{
  /* implicit mod 2^32 */
  *rng = (1103515245 * (*rng) + 12345);
  return *rng;
}

ccl_device float lcg_step_float(uint *rng)
{
  /* implicit mod 2^32 */
  *rng = (1103515245 * (*rng) + 12345);
  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}

ccl_device uint lcg_init(uint seed)
{
  uint rng = seed;
  lcg_step_uint(&rng);
  return rng;
}

ccl_device_inline uint lcg_state_init(const uint rng_hash,
                                      const uint rng_offset,
                                      const uint sample,
                                      const uint scramble)
{
  return lcg_init(rng_hash + rng_offset + sample * scramble);
}

ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
{
  /* Implicit mod 2^32 */
  *rng = (1103515245 * (*rng) + 12345);
  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}

ccl_device_inline bool sample_is_even(int pattern, int sample)
{
  if (pattern == SAMPLING_PATTERN_PMJ) {
    /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
     * We can use this to get divide sample sequence into two classes for easier variance
     * estimation. */
#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
    return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
#elif defined(__NVCC__)
    return __popc(sample & 0xaaaaaaaa) & 1;
#else
    /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */
    int i = sample & 0xaaaaaaaa;
    i = i - ((i >> 1) & 0x55555555);
    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
    i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
    return i & 1;
#endif
  }
  else {
    /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
    return sample & 0x1;
  }
}

CCL_NAMESPACE_END