intern/cycles/kernel/film/adaptive_sampling.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

/* SPDX-License-Identifier: Apache-2.0
 * Copyright 2019-2022 Blender Foundation */

#pragma once

#include "kernel/film/write.h"

CCL_NAMESPACE_BEGIN

/* Check whether the pixel has converged and should not be sampled anymore. */

ccl_device_forceinline bool film_need_sample_pixel(KernelGlobals kg,
                                                   ConstIntegratorState state,
                                                   ccl_global float *render_buffer)
{
  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
    return true;
  }

  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                        kernel_data.film.pass_stride;
  ccl_global float *buffer = render_buffer + render_buffer_offset;

  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
  return buffer[aux_w_offset] == 0.0f;
}

/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */

ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
                                                         ccl_global float *render_buffer,
                                                         int x,
                                                         int y,
                                                         float threshold,
                                                         bool reset,
                                                         int offset,
                                                         int stride)
{
  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
  kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);

  const int render_pixel_index = offset + x + y * stride;
  ccl_global float *buffer = render_buffer +
                             (uint64_t)render_pixel_index * kernel_data.film.pass_stride;

  /* TODO(Stefan): Is this better in linear, sRGB or something else? */

  const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
  if (!reset && A.w != 0.0f) {
    /* If the pixel was considered converged, its state will not change in this kernel. Early
     * output before doing any math.
     *
     * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */
    return true;
  }

  const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);

  const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
  const float inv_sample = 1.0f / sample;

  /* The per pixel error as seen in section 2.1 of
   * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
  const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
                                 inv_sample;
  const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
  /* A small epsilon is added to the divisor to prevent division by zero. */
  const float error = error_difference / (0.0001f + error_normalize);
  const bool did_converge = (error < threshold);

  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
  buffer[aux_w_offset] = did_converge;

  return did_converge;
}

/* This is a simple box filter in two passes.
 * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */

ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg,
                                                ccl_global float *render_buffer,
                                                int y,
                                                int start_x,
                                                int width,
                                                int offset,
                                                int stride)
{
  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);

  bool prev = false;
  for (int x = start_x; x < start_x + width; ++x) {
    int index = offset + x + y * stride;
    ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;

    if (buffer[aux_w_offset] == 0.0f) {
      if (x > start_x && !prev) {
        index = index - 1;
        buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
        buffer[aux_w_offset] = 0.0f;
      }
      prev = true;
    }
    else {
      if (prev) {
        buffer[aux_w_offset] = 0.0f;
      }
      prev = false;
    }
  }
}

ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg,
                                                ccl_global float *render_buffer,
                                                int x,
                                                int start_y,
                                                int height,
                                                int offset,
                                                int stride)
{
  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);

  bool prev = false;
  for (int y = start_y; y < start_y + height; ++y) {
    int index = offset + x + y * stride;
    ccl_global float *buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;

    if (buffer[aux_w_offset] == 0.0f) {
      if (y > start_y && !prev) {
        index = index - stride;
        buffer = render_buffer + (uint64_t)index * kernel_data.film.pass_stride;
        buffer[aux_w_offset] = 0.0f;
      }
      prev = true;
    }
    else {
      if (prev) {
        buffer[aux_w_offset] = 0.0f;
      }
      prev = false;
    }
  }
}

CCL_NAMESPACE_END