From 90da8c5f4a8e1d8002b6e4d448f80833250289a3 Mon Sep 17 00:00:00 2001 From: YimingWu Date: Mon, 30 May 2022 09:49:31 +0800 Subject: LineArt: Cas --- .../gpencil_modifiers/intern/lineart/lineart_cpu.c | 96 ++++++++++++++-------- .../intern/lineart/lineart_intern.h | 3 +- 2 files changed, 65 insertions(+), 34 deletions(-) diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c index f5e2c285230..e2329580594 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c @@ -3035,7 +3035,7 @@ static LineartVert *lineart_triangle_share_point(const LineartTriangle *l, } static bool lineart_triangle_2v_intersection_math( - LineartVert *v1, LineartVert *v2, LineartTriangle *t2, float *last, float *rv) + LineartVert *v1, LineartVert *v2, LineartTriangle *t2, double *last, double *rv) { double Lv[3]; double Rv[3]; @@ -3069,17 +3069,17 @@ static bool lineart_triangle_2v_intersection_math( return false; } - copy_v3fl_v3db(rv, gloc); + copy_v3_v3_db(rv, gloc); return true; } static bool lineart_triangle_intersect_math(LineartTriangle *tri, LineartTriangle *t2, - float *v1, - float *v2) + double *v1, + double *v2) { - float *next = v1, *last = NULL; + double *next = v1, *last = NULL; LineartVert *sv1, *sv2; LineartVert *share = lineart_triangle_share_point(t2, tri); @@ -3090,7 +3090,7 @@ static bool lineart_triangle_intersect_math(LineartTriangle *tri, lineart_triangle_get_other_verts(tri, share, &sv1, &sv2); - copy_v3fl_v3db(v1, share->gloc); + copy_v3_v3_db(v1, share->gloc); if (!lineart_triangle_2v_intersection_math(sv1, sv2, t2, 0, v2)) { lineart_triangle_get_other_verts(t2, share, &sv1, &sv2); @@ -3148,8 +3148,8 @@ static bool lineart_triangle_intersect_math(LineartTriangle *tri, } static void lineart_add_isec_thread(LineartIsecThread *th, - const float *v1, - const float *v2, + const double *v1, + const double *v2, LineartTriangle *tri1, LineartTriangle *tri2) { @@ -3163,14 +3163,14 @@ static void lineart_add_isec_thread(LineartIsecThread *th, th->array = new_array; } LineartIsecSingle *is = &th->array[th->current]; - copy_v3_v3(is->v1, v1); - copy_v3_v3(is->v2, v2); + copy_v3fl_v3db(is->v1, v1); + copy_v3fl_v3db(is->v2, v2); is->tri1 = tri1; is->tri2 = tri2; th->current++; } -#define LRT_ISECT_TRIANGLE_PER_THREAD 4096; +#define LRT_ISECT_TRIANGLE_PER_THREAD 4096 static bool lineart_schedule_new_triangle_task(LineartIsecThread *th) { @@ -3210,6 +3210,11 @@ static bool lineart_schedule_new_triangle_task(LineartIsecThread *th) return true; } +/* This function initializes two things: + * 1) Triangle array scheduling info, for each worker thread to get it's chunk from the scheduler. + * 2) Per-thread intersection result array. Does not store actual #LineartEdge, these results will + * be finalized by #lineart_create_edges_from_isec_data + */ static void lineart_init_isec_thread(LineartIsecData *d, LineartRenderBuffer *rb, int thread_count) { d->threads = MEM_callocN(sizeof(LineartIsecThread) * thread_count, "LineartIsecThread arr"); @@ -3225,14 +3230,8 @@ static void lineart_init_isec_thread(LineartIsecData *d, LineartRenderBuffer *rb it->max = 100; it->current = 0; it->thread_id = i; - } - -#define OBJ_PER_ISEC_THREAD 8 /* Largely arbitrary, no need to be big. */ - for (int i = 0; i < thread_count; i++) { - LineartIsecThread *it = &d->threads[i]; it->rb = rb; } -#undef OBJ_PER_ISEC_THREAD } static void lineart_destroy_isec_thread(LineartIsecData *d) @@ -3249,6 +3248,8 @@ static void lineart_triangle_intersect_in_bounding_area(LineartTriangle *tri, LineartIsecThread *th, int up_to) { + BLI_assert(th != NULL); + if (!th) { return; } @@ -3263,7 +3264,7 @@ static void lineart_triangle_intersect_in_bounding_area(LineartTriangle *tri, /* If this _is_ the smallest subdiv bounding area, then do the intersections there. */ for (int i = 0; i < up_to; i++) { do { - testing_triangle = (LineartTriangle *)lineart_atomic_load(&ba->linked_triangles[i]); + testing_triangle = (LineartTriangle *)atomic_load_z((size_t *)&ba->linked_triangles[i]); } while (UNLIKELY(testing_triangle == NULL)); tt = (LineartTriangleThread *)testing_triangle; @@ -3295,7 +3296,7 @@ static void lineart_triangle_intersect_in_bounding_area(LineartTriangle *tri, /* If we do need to compute intersection, then finally do it. */ - float iv1[3], iv2[3]; + double iv1[3], iv2[3]; if (lineart_triangle_intersect_math(tri, testing_triangle, iv1, iv2)) { lineart_add_isec_thread(th, iv1, iv2, tri, testing_triangle); } @@ -3789,13 +3790,16 @@ static void lineart_main_bounding_areas_connect_post(LineartRenderBuffer *rb) } /** - * Subdivide a tile after one tile contains too many triangles. + * Subdivide a tile after one tile contains too many triangles, then re-link triangles into all the + * child tiles. */ static void lineart_bounding_area_split(LineartRenderBuffer *rb, LineartBoundingArea *root, int recursive_level) { - root->child = lineart_mem_acquire_thread(&rb->render_data_pool, sizeof(LineartBoundingArea) * 4); + + LineartBoundingArea *ba = lineart_mem_acquire_thread(&rb->render_data_pool, + sizeof(LineartBoundingArea) * 4); LineartTriangle *tri; LineartBoundingArea *ba = root->child; @@ -3840,7 +3844,7 @@ static void lineart_bounding_area_split(LineartRenderBuffer *rb, for (int i = 0; i < root->triangle_count; i++) { do { - tri = (LineartTriangle *)lineart_atomic_load(&root->linked_triangles[i]); + tri = (LineartTriangle *)atomic_load_z((size_t *)&root->linked_triangles[i]); /* Need to wait for worker threads to fill in the last few triangles. */ } while (UNLIKELY(tri == NULL)); double b[4]; @@ -3848,6 +3852,9 @@ static void lineart_bounding_area_split(LineartRenderBuffer *rb, b[1] = MAX3(tri->v[0]->fbcoord[0], tri->v[1]->fbcoord[0], tri->v[2]->fbcoord[0]); b[2] = MAX3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); b[3] = MIN3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); + + /* Re-link triangles into child tiles, not doing intersection lines during this because this + * batch of triangles are all tested with each other for intersecctions. */ if (LRT_BOUND_AREA_CROSSES(b, &ba[0].l)) { lineart_bounding_area_link_triangle_cas( rb, &ba[0], tri, b, 0, recursive_level + 1, false, NULL); @@ -3865,6 +3872,10 @@ static void lineart_bounding_area_split(LineartRenderBuffer *rb, rb, &ba[3], tri, b, 0, recursive_level + 1, false, NULL); } } + + /* At this point the child tiles are fully initialized and it's safe for new triangles to be + * inserted, so assign root->child for #lineart_bounding_area_link_triangle_cas to use. */ + root->child = ba; } static bool lineart_bounding_area_edge_intersect(LineartRenderBuffer *UNUSED(fb), @@ -3945,8 +3956,17 @@ static bool lineart_bounding_area_triangle_intersect(LineartRenderBuffer *fb, } /** - * 1) Link triangles with bounding areas for later occlusion test. - * 2) Test triangles with existing(added previously) triangles for intersection lines. + * This function does two things: + * + * 1) Builds a quad-tree under rb->InitialBoundingAreas to achieve good geometry separation for + * fast overlapping test between triangles and lines. This acceleration structure makes the + * occlusion stage much faster. + * + * 2) Test triangles with other triangles that are previously linked into each tile + * (#LineartBoundingArea) for intersection lines. When splitting the tile into 4 children and + * re-linking triangles into the child tiles, intersections are inhibited so we don't get + * duplicated intersection lines. + * */ static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, LineartBoundingArea *root_ba, @@ -3964,9 +3984,8 @@ static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, LineartBoundingArea *old_ba = root_ba; if (old_ba->child) { - /* Wait for splitting thread to fully initialize child tiles. */ - BLI_spin_lock(&old_ba->lock); - BLI_spin_unlock(&old_ba->lock); + /* If old_ba->child is not NULL, then tile splitting is fully finished, safe to directly insert + * into child tiles. */ double *B1 = LRUB; double b[4]; if (!LRUB) { @@ -3976,7 +3995,7 @@ static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, b[3] = MIN3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); B1 = b; } - /* continue adding into the child */ + /* Continue adding into the child. */ for (int iba = 0; iba < 4; iba++) { if (LRT_BOUND_AREA_CROSSES(B1, &old_ba->child[iba].l)) { lineart_bounding_area_link_triangle_cas( @@ -3990,20 +4009,27 @@ static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, uint32_t old_tri_index = old_ba->triangle_count; uint32_t new_tri_index = old_tri_index + 1; uint32_t max_triangles = old_ba->max_triangle_count; + + /* If there are still space left in this tile for insertion. */ if (old_tri_index < max_triangles) { + + /* Use atomic compare and swap to get a index, we can't use atomic increment here because it + * could overflow the index when multiple threads are incrementing. */ uint32_t success_old_index = atomic_cas_uint32( &old_ba->triangle_count, old_tri_index, new_tri_index); + if (success_old_index != old_tri_index) { /* Too bad, other threads grabbed this index, we retry. */ continue; } + /* Successfully grabbed a viable index to insert the triangle into. * Insert into [old_tri_index] for correct array offset starting from [0]. */ - lineart_atomic_store(&old_ba->linked_triangles[old_tri_index], tri); + atomic_store_z((size_t *)&old_ba->linked_triangles[old_tri_index], (size_t)tri); /* Do intersections in place. */ - if (rb->use_intersections) { - lineart_triangle_intersect_in_bounding_area(tri, old_ba, th, old_tri_index - 1); + if (do_intersection && rb->use_intersections) { + lineart_triangle_intersect_in_bounding_area(tri, old_ba, th, old_tri_index); } break; } @@ -4389,7 +4415,8 @@ static void lineart_create_edges_from_isec_data(LineartIsecData *d) } /** - * Sequentially add triangles into render buffer. This also does intersection along the way. + * Sequentially add triangles into render buffer, intersection lines between those triangles will + * also be computed at the same time. */ static void lineart_main_add_triangles(LineartRenderBuffer *rb) { @@ -4398,6 +4425,8 @@ static void lineart_main_add_triangles(LineartRenderBuffer *rb) t_start = PIL_check_seconds_timer(); } + /* Initialize per-thread data for thread task scheduling information and storing intersection + * results. */ LineartIsecData d = {0}; lineart_init_isec_thread(&d, rb, rb->thread_count); @@ -4408,13 +4437,14 @@ static void lineart_main_add_triangles(LineartRenderBuffer *rb) BLI_task_pool_work_and_wait(tp); BLI_task_pool_free(tp); + /* Create actual lineart edges from intersection results. */ lineart_create_edges_from_isec_data(&d); lineart_destroy_isec_thread(&d); if (G.debug_value == 4000) { double t_elapsed = PIL_check_seconds_timer() - t_start; - printf("Line art intersection time: %lf\n", t_elapsed); + printf("Line art intersection time: %f\n", t_elapsed); } } diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h b/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h index 6ecd7a724d2..65ba3a003f1 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h @@ -84,7 +84,8 @@ void lineart_count_and_print_render_buffer_memory(struct LineartRenderBuffer *rb #define LRT_BOUND_AREA_CROSSES(b1, b2) \ ((b1)[0] < (b2)[1] && (b1)[1] > (b2)[0] && (b1)[3] < (b2)[2] && (b1)[2] > (b2)[3]) -/* Initial bounding area row/column count */ +/* Initial bounding area row/column count, setting 10 is tested to be realitvely optimal for the + * performance under current CAS algorithm. */ #define LRT_BA_ROWS 10 #ifdef __cplusplus -- cgit v1.2.3