diff options
author | YimingWu <xp8110@outlook.com> | 2022-05-19 13:56:50 +0300 |
---|---|---|
committer | YimingWu <xp8110@outlook.com> | 2022-05-23 19:28:52 +0300 |
commit | 14a5a91e0e033d712134c112a4778b495bd73ba1 (patch) | |
tree | 69171257a3761fc273490306a5a1fe53d4ba87c7 /source/blender/gpencil_modifiers | |
parent | 54f357ed2ab1bfdbfd46ab30690f2a75f75ac478 (diff) |
LineArt: Use CAS for add_triangles.
Using the atomic "compare and swap" method in add_triangles stage
dramatically speeds up the add_triangles call and significantly reduced
the overall calculation time. This is currently the fastest method we
have experimented with so far.
Reviewed By: Sebastian Parborg (zeddb)
Differential Revision: https://developer.blender.org/D14953
Diffstat (limited to 'source/blender/gpencil_modifiers')
4 files changed, 624 insertions, 358 deletions
diff --git a/source/blender/gpencil_modifiers/CMakeLists.txt b/source/blender/gpencil_modifiers/CMakeLists.txt index 69fc26c99e9..c45209661f6 100644 --- a/source/blender/gpencil_modifiers/CMakeLists.txt +++ b/source/blender/gpencil_modifiers/CMakeLists.txt @@ -17,6 +17,7 @@ set(INC ../windowmanager ../../../intern/eigen ../../../intern/guardedalloc + ../../../intern/atomic # dna_type_offsets.h in BLO_read_write.h ${CMAKE_BINARY_DIR}/source/blender/makesdna/intern diff --git a/source/blender/gpencil_modifiers/intern/lineart/MOD_lineart.h b/source/blender/gpencil_modifiers/intern/lineart/MOD_lineart.h index ad3e1b5d7f2..06370cfb283 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/MOD_lineart.h +++ b/source/blender/gpencil_modifiers/intern/lineart/MOD_lineart.h @@ -236,6 +236,9 @@ typedef struct LineartRenderBuffer { ListBase line_buffer_pointers; ListBase triangle_buffer_pointers; + LineartElementLinkNode *isect_scheduled_up_to; + int isect_scheduled_up_to_index; + /** This one's memory is not from main pool and is free()ed after culling stage. */ ListBase triangle_adjacent_pointers; @@ -429,15 +432,19 @@ typedef struct LineartBoundingArea { /** 1,2,3,4 quadrant */ struct LineartBoundingArea *child; + SpinLock lock; + ListBase lp; ListBase rp; ListBase up; ListBase bp; - uint16_t triangle_count; - uint16_t max_triangle_count; - uint16_t line_count; - uint16_t max_line_count; + /* Need uint32 for the atomic cas instruction. */ + uint32_t triangle_count; + uint32_t max_triangle_count; + uint32_t line_count; + uint32_t max_line_count; + uint32_t user_count; /* Use array for speeding up multiple accesses. */ struct LineartTriangle **linked_triangles; diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c index fe1bbc3fc23..e6fce5b799d 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.c @@ -52,6 +52,38 @@ #include "lineart_intern.h" +#include "atomic_ops.h" + +typedef struct LineartIsecSingle { + float v1[3], v2[3]; + LineartTriangle *tri1, *tri2; +} LineartIsecSingle; + +typedef struct LineartIsecThread { + int thread_id; + + /* Scheduled work range. */ + LineartElementLinkNode *pending_from; + LineartElementLinkNode *pending_to; + int index_from; + int index_to; + + /* Thread intersection result data. */ + LineartIsecSingle *array; + int current; + int max; + int count_test; + + /* For individual thread reference.*/ + LineartRenderBuffer *rb; +} LineartIsecThread; + +typedef struct LineartIsecData { + LineartRenderBuffer *rb; + LineartIsecThread *threads; + int thread_count; +} LineartIsecData; + static LineartBoundingArea *lineart_edge_first_bounding_area(LineartRenderBuffer *rb, LineartEdge *e); @@ -76,14 +108,6 @@ static bool lineart_get_edge_bounding_areas(LineartRenderBuffer *rb, int *colbegin, int *colend); -static void lineart_bounding_area_link_triangle(LineartRenderBuffer *rb, - LineartBoundingArea *root_ba, - LineartTriangle *tri, - double *LRUB, - int recursive, - int recursive_level, - bool do_intersection); - static bool lineart_triangle_edge_image_space_occlusion(SpinLock *spl, const LineartTriangle *tri, const LineartEdge *e, @@ -99,6 +123,19 @@ static bool lineart_triangle_edge_image_space_occlusion(SpinLock *spl, static void lineart_add_edge_to_array(LineartPendingEdges *pe, LineartEdge *e); +static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, + LineartBoundingArea *root_ba, + LineartTriangle *tri, + double *LRUB, + int recursive, + int recursive_level, + bool do_intersection, + struct LineartIsecThread *th); + +static void lineart_free_bounding_area_memory(LineartBoundingArea *ba, bool recursive); + +static void lineart_free_bounding_area_memories(LineartRenderBuffer *rb); + static LineartCache *lineart_init_cache(void); static void lineart_discard_segment(LineartRenderBuffer *rb, LineartEdgeSegment *es) @@ -312,29 +349,17 @@ BLI_INLINE bool lineart_occlusion_is_adjacent_intersection(LineartEdge *e, Linea (v2->base.flag && v2->intersecting_with == tri)); } -static void lineart_bounding_area_triangle_add(LineartRenderBuffer *rb, - LineartBoundingArea *ba, - LineartTriangle *tri) -{ /* In case of too many triangles concentrating in one point, do not add anymore, these triangles - * will be either narrower than a single pixel, or will still be added into the list of other - * less dense areas. */ - if (ba->triangle_count >= 65535) { - return; - } - if (ba->triangle_count >= ba->max_triangle_count) { - LineartTriangle **new_array = lineart_mem_acquire( - &rb->render_data_pool, sizeof(LineartTriangle *) * ba->max_triangle_count * 2); - memcpy(new_array, ba->linked_triangles, sizeof(LineartTriangle *) * ba->max_triangle_count); - ba->max_triangle_count *= 2; - ba->linked_triangles = new_array; - } - ba->linked_triangles[ba->triangle_count] = tri; - ba->triangle_count++; +static void lineart_bounding_area_triangle_reallocate(LineartBoundingArea *ba) +{ + LineartTriangle **new_array = MEM_callocN(sizeof(LineartTriangle *) * ba->max_triangle_count * 2, + "new ba_triangle_array"); + memcpy(new_array, ba->linked_triangles, sizeof(LineartTriangle *) * ba->max_triangle_count); + ba->max_triangle_count *= 2; + MEM_freeN(ba->linked_triangles); + ba->linked_triangles = new_array; } -static void lineart_bounding_area_line_add(LineartRenderBuffer *rb, - LineartBoundingArea *ba, - LineartEdge *e) +static void lineart_bounding_area_line_add(LineartBoundingArea *ba, LineartEdge *e) { /* In case of too many lines concentrating in one point, do not add anymore, these lines will * be either shorter than a single pixel, or will still be added into the list of other less @@ -343,10 +368,11 @@ static void lineart_bounding_area_line_add(LineartRenderBuffer *rb, return; } if (ba->line_count >= ba->max_line_count) { - LineartEdge **new_array = lineart_mem_acquire(&rb->render_data_pool, - sizeof(LineartEdge *) * ba->max_line_count * 2); + LineartEdge **new_array = MEM_mallocN(sizeof(LineartEdge *) * ba->max_line_count * 2, + "new ba_line_array"); memcpy(new_array, ba->linked_lines, sizeof(LineartEdge *) * ba->max_line_count); ba->max_line_count *= 2; + MEM_freeN(ba->linked_lines); ba->linked_lines = new_array; } ba->linked_lines[ba->line_count] = e; @@ -1679,6 +1705,7 @@ static void lineart_join_loose_edge_arr(LooseEdgeData *loose_data, LooseEdgeData sizeof(MEdge *) * to_be_joined->loose_count); loose_data->loose_count += to_be_joined->loose_count; MEM_freeN(to_be_joined->loose_array); + to_be_joined->loose_array = NULL; } static void lineart_add_loose_edge(LooseEdgeData *loose_data, MEdge *e) @@ -2099,7 +2126,7 @@ static void lineart_geometry_object_load(LineartObjectInfo *ob_info, LineartRend elem_link_node->element_count = allocate_la_e; elem_link_node->object_ref = orig_ob; - // Start of the edge/seg arr + /* Start of the edge/seg arr */ LineartEdge *la_edge; LineartEdgeSegment *la_seg; la_edge = la_edge_arr; @@ -2987,60 +3014,23 @@ static LineartVert *lineart_triangle_share_point(const LineartTriangle *l, return NULL; } -/** - * To save time and prevent overlapping lines when computing intersection lines. - */ -static bool lineart_vert_already_intersected_2v(LineartVertIntersection *vt, - LineartVertIntersection *v1, - LineartVertIntersection *v2) -{ - return ((vt->isec1 == v1->base.index && vt->isec2 == v2->base.index) || - (vt->isec2 == v2->base.index && vt->isec1 == v1->base.index)); -} - -static void lineart_vert_set_intersection_2v(LineartVert *vt, LineartVert *v1, LineartVert *v2) -{ - LineartVertIntersection *irv = (LineartVertIntersection *)vt; - irv->isec1 = v1->index; - irv->isec2 = v2->index; -} - -/** - * This tests a triangle against a virtual line represented by `v1---v2`. - * The vertices returned after repeated calls to this function - * is then used to create a triangle/triangle intersection line. - */ -static LineartVert *lineart_triangle_2v_intersection_test(LineartRenderBuffer *rb, - LineartVert *v1, - LineartVert *v2, - LineartTriangle *tri, - LineartTriangle *testing, - LineartVert *last) +static bool lineart_triangle_2v_intersection_math( + LineartVert *v1, LineartVert *v2, LineartTriangle *t2, double *last, double *rv) { double Lv[3]; double Rv[3]; double dot_l, dot_r; - LineartVert *result; double gloc[3]; LineartVert *l = v1, *r = v2; - for (LinkNode *ln = (void *)testing->intersecting_verts; ln; ln = ln->next) { - LineartVertIntersection *vt = ln->link; - if (vt->intersecting_with == tri && - lineart_vert_already_intersected_2v( - vt, (LineartVertIntersection *)l, (LineartVertIntersection *)r)) { - return (LineartVert *)vt; - } - } - - sub_v3_v3v3_db(Lv, l->gloc, testing->v[0]->gloc); - sub_v3_v3v3_db(Rv, r->gloc, testing->v[0]->gloc); + sub_v3_v3v3_db(Lv, l->gloc, t2->v[0]->gloc); + sub_v3_v3v3_db(Rv, r->gloc, t2->v[0]->gloc); - dot_l = dot_v3v3_db(Lv, testing->gn); - dot_r = dot_v3v3_db(Rv, testing->gn); + dot_l = dot_v3v3_db(Lv, t2->gn); + dot_r = dot_v3v3_db(Rv, t2->gn); if (dot_l * dot_r > 0 || (!dot_l && !dot_r)) { - return 0; + return false; } dot_l = fabs(dot_l); @@ -3050,184 +3040,204 @@ static LineartVert *lineart_triangle_2v_intersection_test(LineartRenderBuffer *r /* Due to precision issue, we might end up with the same point as the one we already detected. */ - if (last && LRT_DOUBLE_CLOSE_ENOUGH(last->gloc[0], gloc[0]) && - LRT_DOUBLE_CLOSE_ENOUGH(last->gloc[1], gloc[1]) && - LRT_DOUBLE_CLOSE_ENOUGH(last->gloc[2], gloc[2])) { - return NULL; + if (last && LRT_DOUBLE_CLOSE_ENOUGH(last[0], gloc[0]) && + LRT_DOUBLE_CLOSE_ENOUGH(last[1], gloc[1]) && LRT_DOUBLE_CLOSE_ENOUGH(last[2], gloc[2])) { + return false; } - if (!(lineart_point_inside_triangle3d( - gloc, testing->v[0]->gloc, testing->v[1]->gloc, testing->v[2]->gloc))) { - return NULL; + if (!(lineart_point_inside_triangle3d(gloc, t2->v[0]->gloc, t2->v[1]->gloc, t2->v[2]->gloc))) { + return false; } - /* This is an intersection vert, the size is bigger than LineartVert, - * allocated separately. */ - result = lineart_mem_acquire(&rb->render_data_pool, sizeof(LineartVertIntersection)); + copy_v3_v3_db(rv, gloc); - /* Indicate the data structure difference. */ - result->flag = LRT_VERT_HAS_INTERSECTION_DATA; - - copy_v3_v3_db(result->gloc, gloc); - - lineart_prepend_pool(&testing->intersecting_verts, &rb->render_data_pool, result); - - return result; + return true; } -/** - * Test if two triangles intersect. Generates one intersection line if the check succeeds. - */ -static LineartEdge *lineart_triangle_intersect(LineartRenderBuffer *rb, - LineartTriangle *tri, - LineartTriangle *testing) +static bool lineart_triangle_intersect_math(LineartTriangle *tri, + LineartTriangle *t2, + double *v1, + double *v2) { - LineartVert *v1 = 0, *v2 = 0; - LineartVert **next = &v1; - LineartEdge *result; - LineartVert *E0T = 0; - LineartVert *E1T = 0; - LineartVert *E2T = 0; - LineartVert *TE0 = 0; - LineartVert *TE1 = 0; - LineartVert *TE2 = 0; + double *next = v1, *last = NULL; LineartVert *sv1, *sv2; - double cl[3]; - double ZMin, ZMax; - ZMax = rb->far_clip; - ZMin = rb->near_clip; - copy_v3_v3_db(cl, rb->camera_pos); - LineartVert *share = lineart_triangle_share_point(testing, tri); + LineartVert *share = lineart_triangle_share_point(t2, tri); if (share) { /* If triangles have sharing points like `abc` and `acd`, then we only need to detect `bc` * against `acd` or `cd` against `abc`. */ - LineartVert *new_share; lineart_triangle_get_other_verts(tri, share, &sv1, &sv2); - v1 = new_share = lineart_mem_acquire(&rb->render_data_pool, (sizeof(LineartVertIntersection))); - - new_share->flag = LRT_VERT_HAS_INTERSECTION_DATA; + copy_v3_v3_db(v1, share->gloc); - copy_v3_v3_db(new_share->gloc, share->gloc); - - v2 = lineart_triangle_2v_intersection_test(rb, sv1, sv2, tri, testing, 0); - - if (v2 == NULL) { - lineart_triangle_get_other_verts(testing, share, &sv1, &sv2); - v2 = lineart_triangle_2v_intersection_test(rb, sv1, sv2, testing, tri, 0); - if (v2 == NULL) { - return 0; + if (!lineart_triangle_2v_intersection_math(sv1, sv2, t2, 0, v2)) { + lineart_triangle_get_other_verts(t2, share, &sv1, &sv2); + if (lineart_triangle_2v_intersection_math(sv1, sv2, tri, 0, v2)) { + return true; } - lineart_prepend_pool(&testing->intersecting_verts, &rb->render_data_pool, new_share); - } - else { - lineart_prepend_pool(&tri->intersecting_verts, &rb->render_data_pool, new_share); } } else { /* If not sharing any points, then we need to try all the possibilities. */ - E0T = lineart_triangle_2v_intersection_test(rb, tri->v[0], tri->v[1], tri, testing, 0); - if (E0T && (!(*next))) { - (*next) = E0T; - lineart_vert_set_intersection_2v((*next), tri->v[0], tri->v[1]); - next = &v2; - } - E1T = lineart_triangle_2v_intersection_test(rb, tri->v[1], tri->v[2], tri, testing, v1); - if (E1T && (!(*next))) { - (*next) = E1T; - lineart_vert_set_intersection_2v((*next), tri->v[1], tri->v[2]); - next = &v2; + if (lineart_triangle_2v_intersection_math(tri->v[0], tri->v[1], t2, 0, v1)) { + next = v2; + last = v1; } - if (!(*next)) { - E2T = lineart_triangle_2v_intersection_test(rb, tri->v[2], tri->v[0], tri, testing, v1); + + if (lineart_triangle_2v_intersection_math(tri->v[1], tri->v[2], t2, last, next)) { + if (last) { + return true; + } + next = v2; + last = v1; } - if (E2T && (!(*next))) { - (*next) = E2T; - lineart_vert_set_intersection_2v((*next), tri->v[2], tri->v[0]); - next = &v2; + if (lineart_triangle_2v_intersection_math(tri->v[2], tri->v[0], t2, last, next)) { + if (last) { + return true; + } + next = v2; + last = v1; } - if (!(*next)) { - TE0 = lineart_triangle_2v_intersection_test( - rb, testing->v[0], testing->v[1], testing, tri, v1); - } - if (TE0 && (!(*next))) { - (*next) = TE0; - lineart_vert_set_intersection_2v((*next), testing->v[0], testing->v[1]); - next = &v2; + if (lineart_triangle_2v_intersection_math(t2->v[0], t2->v[1], tri, last, next)) { + if (last) { + return true; + } + next = v2; + last = v1; } - if (!(*next)) { - TE1 = lineart_triangle_2v_intersection_test( - rb, testing->v[1], testing->v[2], testing, tri, v1); + if (lineart_triangle_2v_intersection_math(t2->v[1], t2->v[2], tri, last, next)) { + if (last) { + return true; + } + next = v2; + last = v1; } - if (TE1 && (!(*next))) { - (*next) = TE1; - lineart_vert_set_intersection_2v((*next), testing->v[1], testing->v[2]); - next = &v2; + if (lineart_triangle_2v_intersection_math(t2->v[2], t2->v[0], tri, last, next)) { + if (last) { + return true; + } + next = v2; + last = v1; } - if (!(*next)) { - TE2 = lineart_triangle_2v_intersection_test( - rb, testing->v[2], testing->v[0], testing, tri, v1); + } + return false; +} + +static void lineart_add_isec_thread(LineartIsecThread *th, + const double *v1, + const double *v2, + LineartTriangle *tri1, + LineartTriangle *tri2) +{ + if (th->current == th->max) { + + LineartIsecSingle *new_array = MEM_mallocN(sizeof(LineartIsecSingle) * th->max * 2, + "LineartIsecSingle"); + memcpy(new_array, th->array, sizeof(LineartIsecSingle) * th->max); + th->max *= 2; + MEM_freeN(th->array); + th->array = new_array; + } + LineartIsecSingle *is = &th->array[th->current]; + copy_v3fl_v3db(is->v1, v1); + copy_v3fl_v3db(is->v2, v2); + is->tri1 = tri1; + is->tri2 = tri2; + th->current++; +} + +#define LRT_ISECT_TRIANGLE_PER_THREAD 4096; + +static bool lineart_schedule_new_triangle_task(LineartIsecThread *th) +{ + LineartRenderBuffer *rb = th->rb; + int remaining = LRT_ISECT_TRIANGLE_PER_THREAD; + + BLI_spin_lock(&rb->lock_task); + LineartElementLinkNode *eln = rb->isect_scheduled_up_to; + + if (!eln) { + BLI_spin_unlock(&rb->lock_task); + return false; + } + + th->pending_from = eln; + th->index_from = rb->isect_scheduled_up_to_index; + + while (remaining > 0 && eln) { + int remaining_this_eln = eln->element_count - rb->isect_scheduled_up_to_index; + int added_count = MIN2(remaining, remaining_this_eln); + remaining -= added_count; + if (remaining || added_count == remaining_this_eln) { + eln = eln->next; + rb->isect_scheduled_up_to = eln; + rb->isect_scheduled_up_to_index = 0; } - if (TE2 && (!(*next))) { - (*next) = TE2; - lineart_vert_set_intersection_2v((*next), testing->v[2], testing->v[0]); - next = &v2; + else { + rb->isect_scheduled_up_to_index += added_count; } + } - if (!(*next)) { - return 0; - } + th->pending_to = eln ? eln : rb->triangle_buffer_pointers.last; + th->index_to = rb->isect_scheduled_up_to_index; + + BLI_spin_unlock(&rb->lock_task); + + return true; +} + +/* This function initializes two things: + * 1) Triangle array scheduling info, for each worker thread to get it's chunk from the scheduler. + * 2) Per-thread intersection result array. Does not store actual #LineartEdge, these results will + * be finalized by #lineart_create_edges_from_isec_data + */ +static void lineart_init_isec_thread(LineartIsecData *d, LineartRenderBuffer *rb, int thread_count) +{ + d->threads = MEM_callocN(sizeof(LineartIsecThread) * thread_count, "LineartIsecThread arr"); + d->rb = rb; + d->thread_count = thread_count; + + rb->isect_scheduled_up_to = rb->triangle_buffer_pointers.first; + rb->isect_scheduled_up_to_index = 0; + + for (int i = 0; i < thread_count; i++) { + LineartIsecThread *it = &d->threads[i]; + it->array = MEM_mallocN(sizeof(LineartIsecSingle) * 100, "LineartIsecSingle arr"); + it->max = 100; + it->current = 0; + it->thread_id = i; } - /* The intersection line has been generated only in geometry space, so we need to transform - * them as well. */ - mul_v4_m4v3_db(v1->fbcoord, rb->view_projection, v1->gloc); - mul_v4_m4v3_db(v2->fbcoord, rb->view_projection, v2->gloc); - if (rb->cam_is_persp) { - mul_v3db_db(v1->fbcoord, (1 / v1->fbcoord[3])); - mul_v3db_db(v2->fbcoord, (1 / v2->fbcoord[3])); - } - v1->fbcoord[0] -= rb->shift_x * 2; - v1->fbcoord[1] -= rb->shift_y * 2; - v2->fbcoord[0] -= rb->shift_x * 2; - v2->fbcoord[1] -= rb->shift_y * 2; - - /* This z transformation is not the same as the rest of the part, because the data don't go - * through normal perspective division calls in the pipeline, but this way the 3D result and - * occlusion on the generated line is correct, and we don't really use 2D for viewport stroke - * generation anyway. */ - v1->fbcoord[2] = ZMin * ZMax / (ZMax - fabs(v1->fbcoord[2]) * (ZMax - ZMin)); - v2->fbcoord[2] = ZMin * ZMax / (ZMax - fabs(v2->fbcoord[2]) * (ZMax - ZMin)); - - ((LineartVertIntersection *)v1)->intersecting_with = tri; - ((LineartVertIntersection *)v2)->intersecting_with = testing; - - result = lineart_mem_acquire(&rb->render_data_pool, sizeof(LineartEdge)); - result->v1 = v1; - result->v2 = v2; - result->t1 = tri; - result->t2 = testing; - - LineartEdgeSegment *es = lineart_mem_acquire(&rb->render_data_pool, sizeof(LineartEdgeSegment)); - BLI_addtail(&result->segments, es); - /* Don't need to OR flags right now, just a type mark. */ - result->flags = LRT_EDGE_FLAG_INTERSECTION; - result->intersection_mask = (tri->intersection_mask | testing->intersection_mask); - - lineart_add_edge_to_array(&rb->pending_edges, result); - - return result; +#define OBJ_PER_ISEC_THREAD 8 /* Largely arbitrary, no need to be big. */ + for (int i = 0; i < thread_count; i++) { + LineartIsecThread *it = &d->threads[i]; + it->rb = rb; + } +#undef OBJ_PER_ISEC_THREAD +} + +static void lineart_destroy_isec_thread(LineartIsecData *d) +{ + for (int i = 0; i < d->thread_count; i++) { + LineartIsecThread *it = &d->threads[i]; + MEM_freeN(it->array); + } + MEM_freeN(d->threads); } -static void lineart_triangle_intersect_in_bounding_area(LineartRenderBuffer *rb, - LineartTriangle *tri, - LineartBoundingArea *ba) +static void lineart_triangle_intersect_in_bounding_area(LineartTriangle *tri, + LineartBoundingArea *ba, + LineartIsecThread *th, + int up_to) { + if (!th) { + return; + } + /* Testing_triangle->testing[0] is used to store pairing triangle reference. * See definition of LineartTriangleThread for more info. */ LineartTriangle *testing_triangle; @@ -3235,24 +3245,18 @@ static void lineart_triangle_intersect_in_bounding_area(LineartRenderBuffer *rb, double *G0 = tri->v[0]->gloc, *G1 = tri->v[1]->gloc, *G2 = tri->v[2]->gloc; - /* If this is not the smallest subdiv bounding area. */ - if (ba->child) { - lineart_triangle_intersect_in_bounding_area(rb, tri, &ba->child[0]); - lineart_triangle_intersect_in_bounding_area(rb, tri, &ba->child[1]); - lineart_triangle_intersect_in_bounding_area(rb, tri, &ba->child[2]); - lineart_triangle_intersect_in_bounding_area(rb, tri, &ba->child[3]); - return; - } - /* If this _is_ the smallest subdiv bounding area, then do the intersections there. */ - for (int i = 0; i < ba->triangle_count; i++) { - testing_triangle = ba->linked_triangles[i]; + for (int i = 0; i < up_to; i++) { + do { + testing_triangle = (LineartTriangle *)lineart_atomic_load(&ba->linked_triangles[i]); + } while (UNLIKELY(testing_triangle == NULL)); + tt = (LineartTriangleThread *)testing_triangle; - if (testing_triangle == tri || tt->testing_e[0] == (LineartEdge *)tri) { + if (testing_triangle == tri || tt->testing_e[th->thread_id] == (LineartEdge *)tri) { continue; } - tt->testing_e[0] = (LineartEdge *)tri; + tt->testing_e[th->thread_id] = (LineartEdge *)tri; if ((testing_triangle->flags & LRT_TRIANGLE_NO_INTERSECTION) || ((testing_triangle->flags & LRT_TRIANGLE_INTERSECTION_ONLY) && @@ -3275,7 +3279,11 @@ static void lineart_triangle_intersect_in_bounding_area(LineartRenderBuffer *rb, } /* If we do need to compute intersection, then finally do it. */ - lineart_triangle_intersect(rb, tri, testing_triangle); + + double iv1[3], iv2[3]; + if (lineart_triangle_intersect_math(tri, testing_triangle, iv1, iv2)) { + lineart_add_isec_thread(th, iv1, iv2, tri, testing_triangle); + } } } @@ -3322,6 +3330,8 @@ static void lineart_destroy_render_data(LineartRenderBuffer *rb) MEM_freeN(rb->pending_edges.array); } + lineart_free_bounding_area_memories(rb); + lineart_mem_destroy(&rb->render_data_pool); } @@ -3460,14 +3470,13 @@ static LineartRenderBuffer *lineart_create_render_buffer(Scene *scene, BLI_spin_init(&rb->lock_cuts); BLI_spin_init(&rb->render_data_pool.lock_mem); + rb->thread_count = BKE_render_num_threads(&scene->r); + return rb; } -static int lineart_triangle_size_get(const Scene *scene, LineartRenderBuffer *rb) +static int lineart_triangle_size_get(LineartRenderBuffer *rb) { - if (rb->thread_count == 0) { - rb->thread_count = BKE_render_num_threads(&scene->r); - } return sizeof(LineartTriangle) + (sizeof(LineartEdge *) * (rb->thread_count)); } @@ -3481,6 +3490,14 @@ static void lineart_main_bounding_area_make_initial(LineartRenderBuffer *rb) int row, col; LineartBoundingArea *ba; + /* Always make sure the shortest side has at least LRT_BA_ROWS tiles. */ + if (rb->w > rb->h) { + sp_w = sp_h * rb->w / rb->h; + } + else { + sp_h = sp_w * rb->h / rb->w; + } + /* Because NDC (Normalized Device Coordinates) range is (-1,1), * so the span for each initial tile is double of that in the (0,1) range. */ double span_w = (double)1 / sp_w * 2.0; @@ -3498,7 +3515,7 @@ static void lineart_main_bounding_area_make_initial(LineartRenderBuffer *rb) /* Initialize tiles. */ for (row = 0; row < sp_h; row++) { for (col = 0; col < sp_w; col++) { - ba = &rb->initial_bounding_areas[row * LRT_BA_ROWS + col]; + ba = &rb->initial_bounding_areas[row * rb->tile_count_x + col]; /* Set the four direction limits. */ ba->l = span_w * col - 1.0; @@ -3512,34 +3529,12 @@ static void lineart_main_bounding_area_make_initial(LineartRenderBuffer *rb) /* Init linked_triangles array. */ ba->max_triangle_count = LRT_TILE_SPLITTING_TRIANGLE_LIMIT; ba->max_line_count = LRT_TILE_EDGE_COUNT_INITIAL; - ba->linked_triangles = lineart_mem_acquire( - &rb->render_data_pool, sizeof(LineartTriangle *) * ba->max_triangle_count); - ba->linked_lines = lineart_mem_acquire(&rb->render_data_pool, - sizeof(LineartEdge *) * ba->max_line_count); + ba->linked_triangles = MEM_callocN(sizeof(LineartTriangle *) * ba->max_triangle_count, + "ba_linked_triangles"); + ba->linked_lines = MEM_callocN(sizeof(LineartEdge *) * ba->max_line_count, + "ba_linked_lines"); - /* Link adjacent ones. */ - if (row) { - lineart_list_append_pointer_pool( - &ba->up, - &rb->render_data_pool, - &rb->initial_bounding_areas[(row - 1) * LRT_BA_ROWS + col]); - } - if (col) { - lineart_list_append_pointer_pool(&ba->lp, - &rb->render_data_pool, - &rb->initial_bounding_areas[row * LRT_BA_ROWS + col - 1]); - } - if (row != sp_h - 1) { - lineart_list_append_pointer_pool( - &ba->bp, - &rb->render_data_pool, - &rb->initial_bounding_areas[(row + 1) * LRT_BA_ROWS + col]); - } - if (col != sp_w - 1) { - lineart_list_append_pointer_pool(&ba->rp, - &rb->render_data_pool, - &rb->initial_bounding_areas[row * LRT_BA_ROWS + col + 1]); - } + BLI_spin_init(&ba->lock); } } } @@ -3687,16 +3682,68 @@ static void lineart_bounding_areas_connect_new(LineartRenderBuffer *rb, LineartB BLI_listbase_clear(&root->bp); } +static void lineart_bounding_areas_connect_recursive(LineartRenderBuffer *rb, + LineartBoundingArea *root) +{ + if (root->child) { + lineart_bounding_areas_connect_new(rb, root); + for (int i = 0; i < 4; i++) { + lineart_bounding_areas_connect_recursive(rb, &root->child[i]); + } + } +} + +static void lineart_main_bounding_areas_connect_post(LineartRenderBuffer *rb) +{ + int total_tile_initial = rb->tile_count_x * rb->tile_count_y; + int tiles_per_row = rb->tile_count_x; + + for (int row = 0; row < rb->tile_count_y; row++) { + for (int col = 0; col < rb->tile_count_x; col++) { + LineartBoundingArea *ba = &rb->initial_bounding_areas[row * tiles_per_row + col]; + /* Link adjacent ones. */ + if (row) { + lineart_list_append_pointer_pool( + &ba->up, + &rb->render_data_pool, + &rb->initial_bounding_areas[(row - 1) * tiles_per_row + col]); + } + if (col) { + lineart_list_append_pointer_pool( + &ba->lp, + &rb->render_data_pool, + &rb->initial_bounding_areas[row * tiles_per_row + col - 1]); + } + if (row != rb->tile_count_y - 1) { + lineart_list_append_pointer_pool( + &ba->bp, + &rb->render_data_pool, + &rb->initial_bounding_areas[(row + 1) * tiles_per_row + col]); + } + if (col != rb->tile_count_x - 1) { + lineart_list_append_pointer_pool( + &ba->rp, + &rb->render_data_pool, + &rb->initial_bounding_areas[row * tiles_per_row + col + 1]); + } + } + } + for (int i = 0; i < total_tile_initial; i++) { + lineart_bounding_areas_connect_recursive(rb, &rb->initial_bounding_areas[i]); + } +} + /** - * Subdivide a tile after one tile contains too many triangles. + * Subdivide a tile after one tile contains too many triangles, then re-link triangles into all the + * child tiles. */ static void lineart_bounding_area_split(LineartRenderBuffer *rb, LineartBoundingArea *root, int recursive_level) { - LineartBoundingArea *ba = lineart_mem_acquire(&rb->render_data_pool, - sizeof(LineartBoundingArea) * 4); + root->child = lineart_mem_acquire_thread(&rb->render_data_pool, sizeof(LineartBoundingArea) * 4); LineartTriangle *tri; + LineartBoundingArea *ba = root->child; ba[0].l = root->cx; ba[0].r = root->r; @@ -3726,39 +3773,45 @@ static void lineart_bounding_area_split(LineartRenderBuffer *rb, ba[3].cx = (ba[3].l + ba[3].r) / 2; ba[3].cy = (ba[3].u + ba[3].b) / 2; - root->child = ba; - - lineart_bounding_areas_connect_new(rb, root); - - /* Init linked_triangles array. */ + /* Init linked_triangles array and locks. */ for (int i = 0; i < 4; i++) { ba[i].max_triangle_count = LRT_TILE_SPLITTING_TRIANGLE_LIMIT; ba[i].max_line_count = LRT_TILE_EDGE_COUNT_INITIAL; - ba[i].linked_triangles = lineart_mem_acquire( - &rb->render_data_pool, sizeof(LineartTriangle *) * LRT_TILE_SPLITTING_TRIANGLE_LIMIT); - ba[i].linked_lines = lineart_mem_acquire(&rb->render_data_pool, - sizeof(LineartEdge *) * LRT_TILE_EDGE_COUNT_INITIAL); + ba[i].linked_triangles = MEM_callocN(sizeof(LineartTriangle *) * ba[i].max_triangle_count, + "ba_linked_triangles"); + ba[i].linked_lines = MEM_callocN(sizeof(LineartEdge *) * ba[i].max_line_count, + "ba_linked_lines"); + BLI_spin_init(&ba[i].lock); } for (int i = 0; i < root->triangle_count; i++) { - tri = root->linked_triangles[i]; - LineartBoundingArea *cba = root->child; + do { + tri = (LineartTriangle *)lineart_atomic_load(&root->linked_triangles[i]); + /* Need to wait for worker threads to fill in the last few triangles. */ + } while (UNLIKELY(tri == NULL)); double b[4]; b[0] = MIN3(tri->v[0]->fbcoord[0], tri->v[1]->fbcoord[0], tri->v[2]->fbcoord[0]); b[1] = MAX3(tri->v[0]->fbcoord[0], tri->v[1]->fbcoord[0], tri->v[2]->fbcoord[0]); b[2] = MAX3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); b[3] = MIN3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); - if (LRT_BOUND_AREA_CROSSES(b, &cba[0].l)) { - lineart_bounding_area_link_triangle(rb, &cba[0], tri, b, 0, recursive_level + 1, false); + + /* Re-link triangles into child tiles, not doing intersection lines during this because this + * batch of triangles are all tested with each other for intersecctions. */ + if (LRT_BOUND_AREA_CROSSES(b, &ba[0].l)) { + lineart_bounding_area_link_triangle_cas( + rb, &ba[0], tri, b, 0, recursive_level + 1, false, NULL); } - if (LRT_BOUND_AREA_CROSSES(b, &cba[1].l)) { - lineart_bounding_area_link_triangle(rb, &cba[1], tri, b, 0, recursive_level + 1, false); + if (LRT_BOUND_AREA_CROSSES(b, &ba[1].l)) { + lineart_bounding_area_link_triangle_cas( + rb, &ba[1], tri, b, 0, recursive_level + 1, false, NULL); } - if (LRT_BOUND_AREA_CROSSES(b, &cba[2].l)) { - lineart_bounding_area_link_triangle(rb, &cba[2], tri, b, 0, recursive_level + 1, false); + if (LRT_BOUND_AREA_CROSSES(b, &ba[2].l)) { + lineart_bounding_area_link_triangle_cas( + rb, &ba[2], tri, b, 0, recursive_level + 1, false, NULL); } - if (LRT_BOUND_AREA_CROSSES(b, &cba[3].l)) { - lineart_bounding_area_link_triangle(rb, &cba[3], tri, b, 0, recursive_level + 1, false); + if (LRT_BOUND_AREA_CROSSES(b, &ba[3].l)) { + lineart_bounding_area_link_triangle_cas( + rb, &ba[3], tri, b, 0, recursive_level + 1, false, NULL); } } @@ -3843,36 +3896,37 @@ static bool lineart_bounding_area_triangle_intersect(LineartRenderBuffer *fb, } /** - * 1) Link triangles with bounding areas for later occlusion test. - * 2) Test triangles with existing(added previously) triangles for intersection lines. + * This function does two things: + * + * 1) Builds a quad-tree under rb->InitialBoundingAreas to achieve good geometry separation for + * fast overlapping test between triangles and lines. This acceleration structure makes the + * occlusion stage much faster. + * + * 2) Test triangles with other triangles that are previously linked into each tile + * (#LineartBoundingArea) for intersection lines. When splitting the tile into 4 children and + * re-linking triangles into the child tiles, intersections are inhibited so we don't get + * duplicated intersection lines. + * */ -static void lineart_bounding_area_link_triangle(LineartRenderBuffer *rb, - LineartBoundingArea *root_ba, - LineartTriangle *tri, - double *LRUB, - int recursive, - int recursive_level, - bool do_intersection) +static void lineart_bounding_area_link_triangle_cas(LineartRenderBuffer *rb, + LineartBoundingArea *root_ba, + LineartTriangle *tri, + double *LRUB, + int recursive, + int recursive_level, + bool do_intersection, + struct LineartIsecThread *th) { if (!lineart_bounding_area_triangle_intersect(rb, tri, root_ba)) { return; } - if (root_ba->child == NULL) { - lineart_bounding_area_triangle_add(rb, root_ba, tri); - /* If splitting doesn't improve triangle separation, then shouldn't allow splitting anymore. - * Here we use recursive limit. This is especially useful in orthographic render, - * where a lot of faces could easily line up perfectly in image space, - * which can not be separated by simply slicing the image tile. */ - if (root_ba->triangle_count >= LRT_TILE_SPLITTING_TRIANGLE_LIMIT && recursive && - recursive_level < rb->tile_recursive_level) { - lineart_bounding_area_split(rb, root_ba, recursive_level); - } - if (recursive && do_intersection && rb->use_intersections) { - lineart_triangle_intersect_in_bounding_area(rb, tri, root_ba); - } - } - else { - LineartBoundingArea *ba = root_ba->child; + + LineartBoundingArea *old_ba = root_ba; + + if (old_ba->child) { + /* Wait for splitting thread to fully initialize child tiles. */ + BLI_spin_lock(&old_ba->lock); + BLI_spin_unlock(&old_ba->lock); double *B1 = LRUB; double b[4]; if (!LRUB) { @@ -3882,21 +3936,87 @@ static void lineart_bounding_area_link_triangle(LineartRenderBuffer *rb, b[3] = MIN3(tri->v[0]->fbcoord[1], tri->v[1]->fbcoord[1], tri->v[2]->fbcoord[1]); B1 = b; } - if (LRT_BOUND_AREA_CROSSES(B1, &ba[0].l)) { - lineart_bounding_area_link_triangle( - rb, &ba[0], tri, B1, recursive, recursive_level + 1, do_intersection); + /* continue adding into the child */ + for (int iba = 0; iba < 4; iba++) { + if (LRT_BOUND_AREA_CROSSES(B1, &old_ba->child[iba].l)) { + lineart_bounding_area_link_triangle_cas( + rb, &old_ba->child[iba], tri, B1, recursive, recursive_level + 1, do_intersection, th); + } } - if (LRT_BOUND_AREA_CROSSES(B1, &ba[1].l)) { - lineart_bounding_area_link_triangle( - rb, &ba[1], tri, B1, recursive, recursive_level + 1, do_intersection); + return; + } + + while (1) { + uint32_t old_tri_index = old_ba->triangle_count; + uint32_t new_tri_index = old_tri_index + 1; + uint32_t max_triangles = old_ba->max_triangle_count; + if (old_tri_index < max_triangles) { + uint32_t success_old_index = atomic_cas_uint32( + &old_ba->triangle_count, old_tri_index, new_tri_index); + if (success_old_index != old_tri_index) { + /* Too bad, other threads grabbed this index, we retry. */ + continue; + } + /* Successfully grabbed a viable index to insert the triangle into. + * Insert into [old_tri_index] for correct array offset starting from [0]. */ + lineart_atomic_store(&old_ba->linked_triangles[old_tri_index], tri); + + /* Do intersections in place. */ + if (do_intersection && rb->use_intersections) { + lineart_triangle_intersect_in_bounding_area(tri, old_ba, th, old_tri_index); + } + break; + } + else { /* We need to wait for either splitting or array extension to be done. */ + + /* Splitting/extending can only be operated by one thread. */ + BLI_spin_lock(&old_ba->lock); + + if (recursive_level < rb->tile_recursive_level) { + if (!old_ba->child) { + /* old_ba->child==NULL, means we are the thread that's doing the splitting. */ + lineart_bounding_area_split(rb, old_ba, recursive_level); + } /* Otherwise other thread has completed the splitting process. */ + } + else { + if (max_triangles == old_ba->max_triangle_count) { + /* Means we are the thread that's doing the extension. */ + lineart_bounding_area_triangle_reallocate(old_ba); + } /* Otherwise other thread has completed the extending the array. */ + } + + /* Job done, allow other threads to add into new tile. */ + BLI_spin_unlock(&old_ba->lock); + + /* Of course we still have our own triangle needs to be added. */ + lineart_bounding_area_link_triangle_cas( + rb, root_ba, tri, LRUB, recursive, recursive_level, do_intersection, th); + + break; } - if (LRT_BOUND_AREA_CROSSES(B1, &ba[2].l)) { - lineart_bounding_area_link_triangle( - rb, &ba[2], tri, B1, recursive, recursive_level + 1, do_intersection); + } +} + +static void lineart_free_bounding_area_memory(LineartBoundingArea *ba, bool recursive) +{ + if (ba->linked_lines) { + MEM_freeN(ba->linked_lines); + } + if (ba->linked_triangles) { + MEM_freeN(ba->linked_triangles); + } + if (recursive && ba->child) { + for (int i = 0; i < 4; i++) { + lineart_free_bounding_area_memory(&ba->child[i], recursive); } - if (LRT_BOUND_AREA_CROSSES(B1, &ba[3].l)) { - lineart_bounding_area_link_triangle( - rb, &ba[3], tri, B1, recursive, recursive_level + 1, do_intersection); + } +} +static void lineart_free_bounding_area_memories(LineartRenderBuffer *rb) +{ + for (int i = 0; i < rb->tile_count_y; i++) { + for (int j = 0; j < rb->tile_count_x; j++) { + lineart_free_bounding_area_memory(&rb->initial_bounding_areas[i * rb->tile_count_x + j], + true); } } } @@ -3906,7 +4026,7 @@ static void lineart_bounding_area_link_edge(LineartRenderBuffer *rb, LineartEdge *e) { if (root_ba->child == NULL) { - lineart_bounding_area_line_add(rb, root_ba, e); + lineart_bounding_area_line_add(root_ba, e); } else { if (lineart_bounding_area_edge_intersect( @@ -3940,7 +4060,7 @@ static void lineart_main_link_lines(LineartRenderBuffer *rb) for (row = r1; row != r2 + 1; row++) { for (col = c1; col != c2 + 1; col++) { lineart_bounding_area_link_edge( - rb, &rb->initial_bounding_areas[row * LRT_BA_ROWS + col], e); + rb, &rb->initial_bounding_areas[row * rb->tile_count_x + col], e); } } } @@ -4068,7 +4188,7 @@ LineartBoundingArea *MOD_lineart_get_parent_bounding_area(LineartRenderBuffer *r row = 0; } - return &rb->initial_bounding_areas[row * LRT_BA_ROWS + col]; + return &rb->initial_bounding_areas[row * rb->tile_count_x + col]; } static LineartBoundingArea *lineart_get_bounding_area(LineartRenderBuffer *rb, double x, double y) @@ -4090,7 +4210,7 @@ static LineartBoundingArea *lineart_get_bounding_area(LineartRenderBuffer *rb, d c = rb->tile_count_x - 1; } - iba = &rb->initial_bounding_areas[r * LRT_BA_ROWS + c]; + iba = &rb->initial_bounding_areas[r * rb->tile_count_x + c]; while (iba->child) { if (x > iba->cx) { if (y > iba->cy) { @@ -4121,39 +4241,144 @@ LineartBoundingArea *MOD_lineart_get_bounding_area(LineartRenderBuffer *rb, doub return NULL; } +static void lineart_add_triangles_worker(TaskPool *__restrict UNUSED(pool), LineartIsecThread *th) +{ + LineartRenderBuffer *rb = th->rb; + int _dir_control = 0; + while (lineart_schedule_new_triangle_task(th)) { + for (LineartElementLinkNode *eln = th->pending_from; eln != th->pending_to->next; + eln = eln->next) { + int index_start = eln == th->pending_from ? th->index_from : 0; + int index_end = eln == th->pending_to ? th->index_to : eln->element_count; + LineartTriangle *tri = (void *)(((uchar *)eln->pointer) + rb->triangle_size * index_start); + for (int ei = index_start; ei < index_end; ei++) { + int x1, x2, y1, y2; + int r, co; + if ((tri->flags & LRT_CULL_USED) || (tri->flags & LRT_CULL_DISCARD)) { + tri = (void *)(((uchar *)tri) + rb->triangle_size); + continue; + } + if (lineart_get_triangle_bounding_areas(rb, tri, &y1, &y2, &x1, &x2)) { + _dir_control++; + for (co = x1; co <= x2; co++) { + for (r = y1; r <= y2; r++) { + lineart_bounding_area_link_triangle_cas( + rb, + &rb->initial_bounding_areas[r * rb->tile_count_x + co], + tri, + 0, + 1, + 0, + (!(tri->flags & LRT_TRIANGLE_NO_INTERSECTION)), + th); + } + } + } /* Else throw away. */ + tri = (void *)(((uchar *)tri) + rb->triangle_size); + } + } + } +} + +static void lineart_create_edges_from_isec_data(LineartIsecData *d) +{ + LineartRenderBuffer *rb = d->rb; + double ZMax = rb->far_clip; + double ZMin = rb->near_clip; + + for (int i = 0; i < d->thread_count; i++) { + LineartIsecThread *th = &d->threads[i]; + if (G.debug_value == 4000) { + printf("Thread %d isec generated %d lines.\n", i, th->current); + } + if (!th->current) { + continue; + } + /* We don't care about removing duplicated vert in this method, chaning can handle that, and it + * saves us from using locks and look up tables. */ + LineartVertIntersection *v = lineart_mem_acquire( + &rb->render_data_pool, sizeof(LineartVertIntersection) * th->current * 2); + LineartEdge *e = lineart_mem_acquire(&rb->render_data_pool, sizeof(LineartEdge) * th->current); + LineartEdgeSegment *es = lineart_mem_acquire(&rb->render_data_pool, + sizeof(LineartEdgeSegment) * th->current); + for (int j = 0; j < th->current; j++) { + LineartVertIntersection *v1i = v; + LineartVertIntersection *v2i = v + 1; + LineartIsecSingle *is = &th->array[j]; + v1i->intersecting_with = is->tri1; + v2i->intersecting_with = is->tri2; + LineartVert *v1 = (LineartVert *)v1i; + LineartVert *v2 = (LineartVert *)v2i; + v1->flag |= LRT_VERT_HAS_INTERSECTION_DATA; + v2->flag |= LRT_VERT_HAS_INTERSECTION_DATA; + copy_v3db_v3fl(v1->gloc, is->v1); + copy_v3db_v3fl(v2->gloc, is->v2); + /* The intersection line has been generated only in geometry space, so we need to transform + * them as well. */ + mul_v4_m4v3_db(v1->fbcoord, rb->view_projection, v1->gloc); + mul_v4_m4v3_db(v2->fbcoord, rb->view_projection, v2->gloc); + mul_v3db_db(v1->fbcoord, (1 / v1->fbcoord[3])); + mul_v3db_db(v2->fbcoord, (1 / v2->fbcoord[3])); + + v1->fbcoord[0] -= rb->shift_x * 2; + v1->fbcoord[1] -= rb->shift_y * 2; + v2->fbcoord[0] -= rb->shift_x * 2; + v2->fbcoord[1] -= rb->shift_y * 2; + + /* This z transformation is not the same as the rest of the part, because the data don't go + * through normal perspective division calls in the pipeline, but this way the 3D result and + * occlusion on the generated line is correct, and we don't really use 2D for viewport stroke + * generation anyway. */ + v1->fbcoord[2] = ZMin * ZMax / (ZMax - fabs(v1->fbcoord[2]) * (ZMax - ZMin)); + v2->fbcoord[2] = ZMin * ZMax / (ZMax - fabs(v2->fbcoord[2]) * (ZMax - ZMin)); + e->v1 = v1; + e->v2 = v2; + e->t1 = is->tri1; + e->t2 = is->tri2; + e->flags = LRT_EDGE_FLAG_INTERSECTION; + e->intersection_mask = (is->tri1->intersection_mask | is->tri2->intersection_mask); + BLI_addtail(&e->segments, es); + + lineart_add_edge_to_array(&rb->pending_edges, e); + + v += 2; + e++; + es++; + } + } +} + /** - * Sequentially add triangles into render buffer. This also does intersection along the way. + * Sequentially add triangles into render buffer, intersection lines between those triangles will + * also be computed at the same time. */ static void lineart_main_add_triangles(LineartRenderBuffer *rb) { - LineartTriangle *tri; - int i, lim; - int x1, x2, y1, y2; - int r, co; + double t_start; + if (G.debug_value == 4000) { + t_start = PIL_check_seconds_timer(); + } - LISTBASE_FOREACH (LineartElementLinkNode *, eln, &rb->triangle_buffer_pointers) { - tri = eln->pointer; - lim = eln->element_count; - for (i = 0; i < lim; i++) { - if ((tri->flags & LRT_CULL_USED) || (tri->flags & LRT_CULL_DISCARD)) { - tri = (void *)(((uchar *)tri) + rb->triangle_size); - continue; - } - if (lineart_get_triangle_bounding_areas(rb, tri, &y1, &y2, &x1, &x2)) { - for (co = x1; co <= x2; co++) { - for (r = y1; r <= y2; r++) { - lineart_bounding_area_link_triangle(rb, - &rb->initial_bounding_areas[r * LRT_BA_ROWS + co], - tri, - 0, - 1, - 0, - (!(tri->flags & LRT_TRIANGLE_NO_INTERSECTION))); - } - } - } /* Else throw away. */ - tri = (void *)(((uchar *)tri) + rb->triangle_size); - } + /* Initialize per-thread data for thread task scheduling information and storing intersection + * results. */ + LineartIsecData d = {0}; + lineart_init_isec_thread(&d, rb, rb->thread_count); + + TaskPool *tp = BLI_task_pool_create(NULL, TASK_PRIORITY_HIGH); + for (int i = 0; i < rb->thread_count; i++) { + BLI_task_pool_push(tp, (TaskRunFunction)lineart_add_triangles_worker, &d.threads[i], 0, NULL); + } + BLI_task_pool_work_and_wait(tp); + BLI_task_pool_free(tp); + + /* Create actual lineart edges from intersection results. */ + lineart_create_edges_from_isec_data(&d); + + lineart_destroy_isec_thread(&d); + + if (G.debug_value == 4000) { + double t_elapsed = PIL_check_seconds_timer() - t_start; + printf("Line art intersection time: %lf\n", t_elapsed); } } @@ -4456,7 +4681,7 @@ bool MOD_lineart_compute_feature_lines(Depsgraph *depsgraph, /* Triangle thread testing data size varies depending on the thread count. * See definition of LineartTriangleThread for details. */ - rb->triangle_size = lineart_triangle_size_get(scene, rb); + rb->triangle_size = lineart_triangle_size_get(rb); /* FIXME(Yiming): See definition of int #LineartRenderBuffer::_source_type for detailed. */ rb->_source_type = lmd->source_type; @@ -4496,6 +4721,10 @@ bool MOD_lineart_compute_feature_lines(Depsgraph *depsgraph, * can do its job. */ lineart_main_add_triangles(rb); + /* Re-link bounding areas because they have been subdivided by worker threads and we need + * andjacent info. */ + lineart_main_bounding_areas_connect_post(rb); + /* Link lines to acceleration structure, this can only be done after perspective division, if * we do it after triangles being added, the acceleration structure has already been * subdivided, this way we do less list manipulations. */ diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h b/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h index 1a197c8b4b7..e3e9866d2c7 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_intern.h @@ -82,7 +82,7 @@ void lineart_count_and_print_render_buffer_memory(struct LineartRenderBuffer *rb /* Initial bounding area row/column count, setting 4 is the simplest way algorithm could function * efficiently. */ -#define LRT_BA_ROWS 4 +#define LRT_BA_ROWS 10 #ifdef __cplusplus extern "C" { @@ -93,3 +93,32 @@ void lineart_sort_adjacent_items(LineartAdjacentEdge *ai, int length); #ifdef __cplusplus } #endif + +#ifndef __cplusplus /* Compatibility code for atomics, only for C. */ + +# if defined __has_include /* Try to use C11 atomics support. */ +# if __has_include(<stdatomic.h>) +# include <stdatomic.h> +# define lineart_atomic_load(p) atomic_load((volatile size_t *)p) +# define lineart_atomic_store(p, d) atomic_store((volatile size_t *)p, (size_t)d) +# endif +# endif + +# ifdef _MSC_VER /* Atomics walkaround for windows. */ +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +# define lineart_atomic_load(p) (MemoryBarrier(), *(p)) +# define lineart_atomic_store(p, d) \ + do { \ + *(p) = (d); \ + MemoryBarrier(); \ + } while (0) +# endif + +# if !defined lineart_atomic_load /* Fallback */ +# include "atomic_ops.h" +# define lineart_atomic_load(p) atomic_add_and_fetch_z((size_t *)p, 0) +# define lineart_atomic_store(p, d) atomic_add_and_fetch_z((size_t *)p, (size_t)d) +# endif + +#endif /* !__cplusplus */ |