/* * Copyright 2018 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight) { kernel_assert(id != ID_NONE); if (weight == 0.0f) { return; } for (int slot = 0; slot < num_slots; slot++) { ccl_global float2 *id_buffer = (ccl_global float2 *)buffer; #ifdef __ATOMIC_PASS_WRITE__ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ if (id_buffer[slot].x == ID_NONE) { /* Use an atomic to claim this slot. * If a different thread got here first, try again from this slot on. */ float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); if (old_id != ID_NONE && old_id != id) { continue; } atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); break; } /* If there already is a slot for that ID, add the weight. * If no slot was found, add it to the last. */ else if (id_buffer[slot].x == id || slot == num_slots - 1) { atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); break; } #else /* __ATOMIC_PASS_WRITE__ */ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ if (id_buffer[slot].x == ID_NONE) { id_buffer[slot].x = id; id_buffer[slot].y = weight; break; } /* If there already is a slot for that ID, add the weight. * If no slot was found, add it to the last. */ else if (id_buffer[slot].x == id || slot == num_slots - 1) { id_buffer[slot].y += weight; break; } #endif /* __ATOMIC_PASS_WRITE__ */ } } ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) { ccl_global float2 *id_buffer = (ccl_global float2 *)buffer; for (int slot = 1; slot < num_slots; ++slot) { if (id_buffer[slot].x == ID_NONE) { return; } /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ int i = slot; while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { float2 swap = id_buffer[i]; id_buffer[i] = id_buffer[i - 1]; id_buffer[i - 1] = swap; --i; } } } #ifdef __KERNEL_GPU__ /* post-sorting for Cryptomatte */ ccl_device void kernel_cryptomatte_post( KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride) { if (sample - 1 == kernel_data.integrator.aa_samples) { int index = offset + x + y * stride; int pass_stride = kernel_data.film.pass_stride; ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte; kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); } } #endif CCL_NAMESPACE_END