diff options
Diffstat (limited to 'intern/cycles/kernel/film/film_id_passes.h')
-rw-r--r-- | intern/cycles/kernel/film/film_id_passes.h | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/intern/cycles/kernel/film/film_id_passes.h b/intern/cycles/kernel/film/film_id_passes.h new file mode 100644 index 00000000000..d5b8c90a828 --- /dev/null +++ b/intern/cycles/kernel/film/film_id_passes.h @@ -0,0 +1,106 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Element of ID pass stored in the render buffers. + * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the + * render buffers might not meet expected by compiler alignment. */ +typedef struct IDPassBufferElement { + float x; + float y; +} IDPassBufferElement; + +ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, + int num_slots, + float id, + float weight) +{ + kernel_assert(id != ID_NONE); + if (weight == 0.0f) { + return; + } + + for (int slot = 0; slot < num_slots; slot++) { + ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; +#ifdef __ATOMIC_PASS_WRITE__ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + /* Use an atomic to claim this slot. + * If a different thread got here first, try again from this slot on. */ + float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); + if (old_id != ID_NONE && old_id != id) { + continue; + } + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } +#else /* __ATOMIC_PASS_WRITE__ */ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + id_buffer[slot].x = id; + id_buffer[slot].y = weight; + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + id_buffer[slot].y += weight; + break; + } +#endif /* __ATOMIC_PASS_WRITE__ */ + } +} + +ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +{ + ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + for (int slot = 1; slot < num_slots; ++slot) { + if (id_buffer[slot].x == ID_NONE) { + return; + } + /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ + int i = slot; + while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { + const IDPassBufferElement swap = id_buffer[i]; + id_buffer[i] = id_buffer[i - 1]; + id_buffer[i - 1] = swap; + --i; + } + } +} + +/* post-sorting for Cryptomatte */ +ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, + ccl_global float *render_buffer, + int pixel_index) +{ + const int pass_stride = kernel_data.film.pass_stride; + const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride; + ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset + + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); +} + +CCL_NAMESPACE_END |