/* SPDX-License-Identifier: GPL-2.0-or-later * Copyright 2017 Blender Foundation. All rights reserved. */ /** \file * \ingroup gpu * * Custom select code for picking small regions (not efficient for large regions). * `gpu_select_pick_*` API. */ #include #include #include #include "GPU_debug.h" #include "GPU_framebuffer.h" #include "GPU_immediate.h" #include "GPU_select.h" #include "GPU_state.h" #include "MEM_guardedalloc.h" #include "BLI_listbase.h" #include "BLI_rect.h" #include "BLI_utildefines.h" #include "gpu_select_private.h" #include "BLI_strict_flags.h" /* #define DEBUG_PRINT */ /* Alloc number for depths */ #define ALLOC_DEPTHS 200 /* Z-depth of cleared depth buffer */ #define DEPTH_MAX 0xffffffff /* -------------------------------------------------------------------- */ /** \name #SubRectStride * \{ */ /** For looping over a sub-region of a #rcti, could be moved into 'rct.c'. */ typedef struct SubRectStride { /** Start here. */ uint start; /** Read these. */ uint span; /** `len` times (read span 'len' times). */ uint span_len; /** Skip those. */ uint skip; } SubRectStride; /** We may want to change back to float if `uint` isn't well supported. */ typedef uint depth_t; /** * Calculate values needed for looping over a sub-region (smaller buffer within a larger buffer). * * 'src' must be bigger than 'dst'. */ static void rect_subregion_stride_calc(const rcti *src, const rcti *dst, SubRectStride *r_sub) { const int src_x = BLI_rcti_size_x(src); // const int src_y = BLI_rcti_size_y(src); const int dst_x = BLI_rcti_size_x(dst); const int dst_y = BLI_rcti_size_y(dst); const int x = dst->xmin - src->xmin; const int y = dst->ymin - src->ymin; BLI_assert(src->xmin <= dst->xmin && src->ymin <= dst->ymin && src->xmax >= dst->xmax && src->ymax >= dst->ymax); BLI_assert(x >= 0 && y >= 0); r_sub->start = (uint)((src_x * y) + x); r_sub->span = (uint)dst_x; r_sub->span_len = (uint)dst_y; r_sub->skip = (uint)(src_x - dst_x); } /** * Ignore depth clearing as a change, * only check if its been changed _and_ filled in (ignore clearing since XRAY does this). */ BLI_INLINE bool depth_is_filled(const depth_t *prev, const depth_t *curr) { return (*prev != *curr) && (*curr != DEPTH_MAX); } /** \} */ /* -------------------------------------------------------------------- */ /** \name #DepthBufCache * * Result of reading #GPU_framebuffer_read_depth, * use for both cache and non-cached storage. * \{ */ /** Store result of #GPU_framebuffer_read_depth. */ typedef struct DepthBufCache { struct DepthBufCache *next, *prev; uint id; depth_t buf[0]; } DepthBufCache; static DepthBufCache *depth_buf_malloc(uint rect_len) { DepthBufCache *rect = MEM_mallocN(sizeof(DepthBufCache) + sizeof(depth_t) * rect_len, __func__); rect->id = SELECT_ID_NONE; return rect; } static bool depth_buf_rect_depth_any(const DepthBufCache *rect_depth, uint rect_len) { const depth_t *curr = rect_depth->buf; for (uint i = 0; i < rect_len; i++, curr++) { if (*curr != DEPTH_MAX) { return true; } } return false; } static bool depth_buf_subrect_depth_any(const DepthBufCache *rect_depth, const SubRectStride *sub_rect) { const depth_t *curr = rect_depth->buf + sub_rect->start; for (uint i = 0; i < sub_rect->span_len; i++) { const depth_t *curr_end = curr + sub_rect->span; for (; curr < curr_end; curr++, curr++) { if (*curr != DEPTH_MAX) { return true; } } curr += sub_rect->skip; } return false; } static bool depth_buf_rect_depth_any_filled(const DepthBufCache *rect_prev, const DepthBufCache *rect_curr, uint rect_len) { #if 0 return memcmp(rect_depth_a->buf, rect_depth_b->buf, rect_len * sizeof(depth_t)) != 0; #else const depth_t *prev = rect_prev->buf; const depth_t *curr = rect_curr->buf; for (uint i = 0; i < rect_len; i++, curr++, prev++) { if (depth_is_filled(prev, curr)) { return true; } } return false; #endif } /** * Both buffers are the same size, just check if the sub-rect contains any differences. */ static bool depth_buf_subrect_depth_any_filled(const DepthBufCache *rect_src, const DepthBufCache *rect_dst, const SubRectStride *sub_rect) { /* Same as above but different rectangle sizes. */ const depth_t *prev = rect_src->buf + sub_rect->start; const depth_t *curr = rect_dst->buf + sub_rect->start; for (uint i = 0; i < sub_rect->span_len; i++) { const depth_t *curr_end = curr + sub_rect->span; for (; curr < curr_end; prev++, curr++) { if (depth_is_filled(prev, curr)) { return true; } } prev += sub_rect->skip; curr += sub_rect->skip; } return false; } /** \} */ /* -------------------------------------------------------------------- */ /** \name #DepthID * * Internal structure for storing hits. * \{ */ typedef struct DepthID { uint id; depth_t depth; } DepthID; static int depth_id_cmp(const void *v1, const void *v2) { const DepthID *d1 = v1, *d2 = v2; if (d1->id < d2->id) { return -1; } if (d1->id > d2->id) { return 1; } return 0; } static int depth_cmp(const void *v1, const void *v2) { const DepthID *d1 = v1, *d2 = v2; if (d1->depth < d2->depth) { return -1; } if (d1->depth > d2->depth) { return 1; } return 0; } /** \} */ /* -------------------------------------------------------------------- */ /** \name Main Selection Begin/End/Load API * \{ */ /** Depth sorting. */ typedef struct GPUPickState { /** Cache on initialization. */ GPUSelectResult *buffer; uint buffer_len; /** Mode of this operation. */ eGPUSelectMode mode; /** GPU drawing, never use when `is_cached == true`. */ struct { /** The current depth, accumulated while drawing. */ DepthBufCache *rect_depth; /** Scratch buffer, avoid allocations every time (when not caching). */ DepthBufCache *rect_depth_test; /** Pass to `GPU_framebuffer_read_depth(x, y, w, h)`. */ int clip_readpixels[4]; /** Set after first draw. */ bool is_init; uint prev_id; } gpu; /** * `src`: data stored in 'cache' and 'gpu', * `dst`: use when cached region is smaller (where `src` -> `dst` isn't 1:1). */ struct { rcti clip_rect; uint rect_len; } src, dst; /** Store cache between `GPU_select_cache_begin/end` */ bool use_cache; bool is_cached; struct { /** * Cleanup used for iterating over both source and destination buffers: * `src.clip_rect` -> `dst.clip_rect`. */ SubRectStride sub_rect; /** List of #DepthBufCache, sized of 'src.clip_rect'. */ ListBase bufs; } cache; /** Picking methods. */ union { /** #GPU_SELECT_PICK_ALL */ struct { DepthID *hits; uint hits_len; uint hits_len_alloc; } all; /** #GPU_SELECT_PICK_NEAREST */ struct { uint *rect_id; } nearest; }; /** Previous state to restore after drawing. */ int viewport[4]; int scissor[4]; eGPUWriteMask write_mask; eGPUDepthTest depth_test; } GPUPickState; static GPUPickState g_pick_state = {0}; void gpu_select_pick_begin(GPUSelectResult *buffer, const uint buffer_len, const rcti *input, eGPUSelectMode mode) { GPUPickState *ps = &g_pick_state; #ifdef DEBUG_PRINT printf("%s: mode=%d, use_cache=%d, is_cache=%d\n", __func__, (int)mode, ps->use_cache, ps->is_cached); #endif GPU_debug_group_begin("Selection Pick"); ps->buffer = buffer; ps->buffer_len = buffer_len; ps->mode = mode; const uint rect_len = (uint)(BLI_rcti_size_x(input) * BLI_rcti_size_y(input)); ps->dst.clip_rect = *input; ps->dst.rect_len = rect_len; /* Avoids unnecessary GPU operations when cache is available and they are unnecessary. */ if (ps->is_cached == false) { ps->write_mask = GPU_write_mask_get(); ps->depth_test = GPU_depth_test_get(); GPU_scissor_get(ps->scissor); /* Disable writing to the frame-buffer. */ GPU_color_mask(false, false, false, false); GPU_depth_mask(true); /* Always use #GPU_DEPTH_LESS_EQUAL even though #GPU_SELECT_PICK_ALL always clears the buffer. * This is because individual objects themselves might have sections that overlap and we need * these to have the correct distance information. */ GPU_depth_test(GPU_DEPTH_LESS_EQUAL); float viewport[4]; GPU_viewport_size_get_f(viewport); ps->src.clip_rect = *input; ps->src.rect_len = rect_len; ps->gpu.clip_readpixels[0] = (int)viewport[0]; ps->gpu.clip_readpixels[1] = (int)viewport[1]; ps->gpu.clip_readpixels[2] = BLI_rcti_size_x(&ps->src.clip_rect); ps->gpu.clip_readpixels[3] = BLI_rcti_size_y(&ps->src.clip_rect); GPU_viewport(UNPACK4(ps->gpu.clip_readpixels)); /* It's possible we don't want to clear depth buffer, * so existing elements are masked by current z-buffer. */ GPU_clear_depth(1.0f); /* scratch buffer (read new values here) */ ps->gpu.rect_depth_test = depth_buf_malloc(rect_len); ps->gpu.rect_depth = depth_buf_malloc(rect_len); /* Set initial 'far' value. */ for (uint i = 0; i < rect_len; i++) { ps->gpu.rect_depth->buf[i] = DEPTH_MAX; } ps->gpu.is_init = false; ps->gpu.prev_id = 0; } else { /* Using cache `ps->is_cached == true`. */ /* `src.clip_rect` -> `dst.clip_rect`. */ rect_subregion_stride_calc(&ps->src.clip_rect, &ps->dst.clip_rect, &ps->cache.sub_rect); BLI_assert(ps->gpu.rect_depth == NULL); BLI_assert(ps->gpu.rect_depth_test == NULL); } if (mode == GPU_SELECT_PICK_ALL) { ps->all.hits = MEM_mallocN(sizeof(*ps->all.hits) * ALLOC_DEPTHS, __func__); ps->all.hits_len = 0; ps->all.hits_len_alloc = ALLOC_DEPTHS; } else { /* Set to 0xff for #SELECT_ID_NONE. */ ps->nearest.rect_id = MEM_mallocN(sizeof(uint) * ps->dst.rect_len, __func__); memset(ps->nearest.rect_id, 0xff, sizeof(uint) * ps->dst.rect_len); } } /** * Given 2x depths, we know are different - update the depth information * use for both cached/uncached depth buffers. */ static void gpu_select_load_id_pass_all(const DepthBufCache *rect_curr) { GPUPickState *ps = &g_pick_state; const uint id = rect_curr->id; /* find the best depth for this pass and store in 'all.hits' */ depth_t depth_best = DEPTH_MAX; #define EVAL_TEST() \ if (depth_best > *curr) { \ depth_best = *curr; \ } \ ((void)0) if (ps->is_cached == false) { const depth_t *curr = rect_curr->buf; BLI_assert(ps->src.rect_len == ps->dst.rect_len); const uint rect_len = ps->src.rect_len; for (uint i = 0; i < rect_len; i++, curr++) { EVAL_TEST(); } } else { /* Same as above but different rectangle sizes. */ const depth_t *curr = rect_curr->buf + ps->cache.sub_rect.start; for (uint i = 0; i < ps->cache.sub_rect.span_len; i++) { const depth_t *curr_end = curr + ps->cache.sub_rect.span; for (; curr < curr_end; curr++) { EVAL_TEST(); } curr += ps->cache.sub_rect.skip; } } #undef EVAL_TEST /* Ensure enough space. */ if (UNLIKELY(ps->all.hits_len == ps->all.hits_len_alloc)) { ps->all.hits_len_alloc += ALLOC_DEPTHS; ps->all.hits = MEM_reallocN(ps->all.hits, ps->all.hits_len_alloc * sizeof(*ps->all.hits)); } DepthID *d = &ps->all.hits[ps->all.hits_len++]; d->id = id; d->depth = depth_best; } static void gpu_select_load_id_pass_nearest(const DepthBufCache *rect_prev, const DepthBufCache *rect_curr) { GPUPickState *ps = &g_pick_state; const uint id = rect_curr->id; /* Keep track each pixels ID in `nearest.rect_id`. */ if (id != SELECT_ID_NONE) { uint *id_ptr = ps->nearest.rect_id; /* Check against DEPTH_MAX because XRAY will clear the buffer, * so previously set values will become unset. * In this case just leave those id's left as-is. */ #define EVAL_TEST() \ if (depth_is_filled(prev, curr)) { \ *id_ptr = id; \ } \ ((void)0) if (ps->is_cached == false) { const depth_t *prev = rect_prev->buf; const depth_t *curr = rect_curr->buf; BLI_assert(ps->src.rect_len == ps->dst.rect_len); const uint rect_len = ps->src.rect_len; for (uint i = 0; i < rect_len; i++, curr++, prev++, id_ptr++) { EVAL_TEST(); } } else { /* same as above but different rect sizes */ const depth_t *prev = rect_prev->buf + ps->cache.sub_rect.start; const depth_t *curr = rect_curr->buf + ps->cache.sub_rect.start; for (uint i = 0; i < ps->cache.sub_rect.span_len; i++) { const depth_t *curr_end = curr + ps->cache.sub_rect.span; for (; curr < curr_end; prev++, curr++, id_ptr++) { EVAL_TEST(); } prev += ps->cache.sub_rect.skip; curr += ps->cache.sub_rect.skip; } } #undef EVAL_TEST } } bool gpu_select_pick_load_id(uint id, bool end) { GPUPickState *ps = &g_pick_state; if (ps->gpu.is_init) { if (id == ps->gpu.prev_id && !end) { /* No need to read if we are still drawing for the same id since * all these depths will be merged / de-duplicated in the end. */ return true; } const uint rect_len = ps->src.rect_len; GPUFrameBuffer *fb = GPU_framebuffer_active_get(); GPU_framebuffer_read_depth( fb, UNPACK4(ps->gpu.clip_readpixels), GPU_DATA_UINT, ps->gpu.rect_depth_test->buf); /* Perform initial check since most cases the array remains unchanged. */ bool do_pass = false; if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { if (depth_buf_rect_depth_any(ps->gpu.rect_depth_test, rect_len)) { ps->gpu.rect_depth_test->id = ps->gpu.prev_id; gpu_select_load_id_pass_all(ps->gpu.rect_depth_test); do_pass = true; } } else { if (depth_buf_rect_depth_any_filled(ps->gpu.rect_depth, ps->gpu.rect_depth_test, rect_len)) { ps->gpu.rect_depth_test->id = ps->gpu.prev_id; gpu_select_load_id_pass_nearest(ps->gpu.rect_depth, ps->gpu.rect_depth_test); do_pass = true; } } if (do_pass) { /* Store depth in cache */ if (ps->use_cache) { BLI_addtail(&ps->cache.bufs, ps->gpu.rect_depth); ps->gpu.rect_depth = depth_buf_malloc(ps->src.rect_len); } SWAP(DepthBufCache *, ps->gpu.rect_depth, ps->gpu.rect_depth_test); if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { /* (fclem) This is to be on the safe side. I don't know if this is required. */ bool prev_depth_mask = GPU_depth_mask_get(); /* we want new depths every time */ GPU_depth_mask(true); GPU_clear_depth(1.0f); GPU_depth_mask(prev_depth_mask); } } } ps->gpu.is_init = true; ps->gpu.prev_id = id; return true; } uint gpu_select_pick_end(void) { GPUPickState *ps = &g_pick_state; #ifdef DEBUG_PRINT printf("%s\n", __func__); #endif if (ps->is_cached == false) { if (ps->gpu.is_init) { /* force finishing last pass */ gpu_select_pick_load_id(ps->gpu.prev_id, true); } GPU_write_mask(ps->write_mask); GPU_depth_test(ps->depth_test); GPU_viewport(UNPACK4(ps->viewport)); } GPU_debug_group_end(); /* Assign but never free directly since it may be in cache. */ DepthBufCache *rect_depth_final; /* Store depth in cache */ if (ps->use_cache && !ps->is_cached) { BLI_addtail(&ps->cache.bufs, ps->gpu.rect_depth); ps->gpu.rect_depth = NULL; rect_depth_final = ps->cache.bufs.last; } else if (ps->is_cached) { rect_depth_final = ps->cache.bufs.last; } else { /* Common case, no cache. */ rect_depth_final = ps->gpu.rect_depth; } uint maxhits = g_pick_state.buffer_len; DepthID *depth_data; uint depth_data_len = 0; if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { depth_data = ps->all.hits; depth_data_len = ps->all.hits_len; /* Move ownership. */ ps->all.hits = NULL; ps->all.hits_len = 0; ps->all.hits_len_alloc = 0; } else { /* #GPU_SELECT_PICK_NEAREST */ /* Over allocate (unlikely we have as many depths as pixels). */ uint depth_data_len_first_pass = 0; depth_data = MEM_mallocN(ps->dst.rect_len * sizeof(*depth_data), __func__); /* Partially de-duplicating copy, * when contiguous ID's are found - update their closest depth. * This isn't essential but means there is less data to sort. */ #define EVAL_TEST(i_src, i_dst) \ { \ const uint id = ps->nearest.rect_id[i_dst]; \ if (id != SELECT_ID_NONE) { \ const depth_t depth = rect_depth_final->buf[i_src]; \ if (depth_last == NULL || depth_last->id != id) { \ DepthID *d = &depth_data[depth_data_len_first_pass++]; \ d->id = id; \ d->depth = depth; \ } \ else if (depth_last->depth > depth) { \ depth_last->depth = depth; \ } \ } \ } \ ((void)0) { DepthID *depth_last = NULL; if (ps->is_cached == false) { for (uint i = 0; i < ps->src.rect_len; i++) { EVAL_TEST(i, i); } } else { /* Same as above but different rectangle sizes. */ uint i_src = ps->cache.sub_rect.start, i_dst = 0; for (uint j = 0; j < ps->cache.sub_rect.span_len; j++) { const uint i_src_end = i_src + ps->cache.sub_rect.span; for (; i_src < i_src_end; i_src++, i_dst++) { EVAL_TEST(i_src, i_dst); } i_src += ps->cache.sub_rect.skip; } } } #undef EVAL_TEST qsort(depth_data, depth_data_len_first_pass, sizeof(DepthID), depth_id_cmp); /* Sort by ID's then keep the best depth for each ID. */ depth_data_len = 0; { DepthID *depth_last = NULL; for (uint i = 0; i < depth_data_len_first_pass; i++) { if (depth_last == NULL || depth_last->id != depth_data[i].id) { depth_last = &depth_data[depth_data_len++]; *depth_last = depth_data[i]; } else if (depth_last->depth > depth_data[i].depth) { depth_last->depth = depth_data[i].depth; } } } } /* Finally sort each unique (id, depth) pair by depth * so the final hit-list is sorted by depth (nearest first). */ uint hits = 0; if (depth_data_len > maxhits) { hits = (uint)-1; } else { /* Leave sorting up to the caller. */ qsort(depth_data, depth_data_len, sizeof(DepthID), depth_cmp); for (uint i = 0; i < depth_data_len; i++) { #ifdef DEBUG_PRINT printf(" hit: %u: depth %u\n", depth_data[i].id, depth_data[i].depth); #endif g_pick_state.buffer[hits].depth = depth_data[i].depth; g_pick_state.buffer[hits].id = depth_data[i].id; hits++; } BLI_assert(hits < maxhits); } MEM_freeN(depth_data); MEM_SAFE_FREE(ps->gpu.rect_depth); MEM_SAFE_FREE(ps->gpu.rect_depth_test); if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { /* 'hits' already freed as 'depth_data' */ } else { MEM_freeN(ps->nearest.rect_id); ps->nearest.rect_id = NULL; } if (ps->use_cache) { ps->is_cached = true; } return hits; } /** \} */ /* -------------------------------------------------------------------- */ /** \name Caching * * Support multiple begin/end's reusing depth buffers. * \{ */ void gpu_select_pick_cache_begin(void) { BLI_assert(g_pick_state.use_cache == false); #ifdef DEBUG_PRINT printf("%s\n", __func__); #endif g_pick_state.use_cache = true; g_pick_state.is_cached = false; } void gpu_select_pick_cache_end(void) { #ifdef DEBUG_PRINT printf("%s: with %d buffers\n", __func__, BLI_listbase_count(&g_pick_state.cache.bufs)); #endif g_pick_state.use_cache = false; g_pick_state.is_cached = false; BLI_freelistN(&g_pick_state.cache.bufs); } bool gpu_select_pick_is_cached(void) { return g_pick_state.is_cached; } void gpu_select_pick_cache_load_id(void) { BLI_assert(g_pick_state.is_cached == true); GPUPickState *ps = &g_pick_state; #ifdef DEBUG_PRINT printf("%s (building depth from cache)\n", __func__); #endif LISTBASE_FOREACH (DepthBufCache *, rect_depth, &ps->cache.bufs) { if (rect_depth->next != NULL) { /* We know the buffers differ, but this sub-region may not. * Double check before adding an id-pass. */ if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { if (depth_buf_subrect_depth_any(rect_depth->next, &ps->cache.sub_rect)) { gpu_select_load_id_pass_all(rect_depth->next); } } else { if (depth_buf_subrect_depth_any_filled( rect_depth, rect_depth->next, &ps->cache.sub_rect)) { gpu_select_load_id_pass_nearest(rect_depth, rect_depth->next); } } } } } /** \} */