diff options
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/blender/addon/engine.py | 11 | ||||
-rw-r--r-- | intern/cycles/blender/addon/properties.py | 18 | ||||
-rw-r--r-- | intern/cycles/blender/addon/ui.py | 7 | ||||
-rw-r--r-- | intern/cycles/blender/blender_sync.cpp | 15 | ||||
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_mega.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_accumulate.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_passes.h | 73 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 86 | ||||
-rw-r--r-- | intern/cycles/render/buffers.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/render/buffers.h | 1 | ||||
-rw-r--r-- | intern/cycles/render/film.cpp | 166 | ||||
-rw-r--r-- | intern/mikktspace/mikktspace.c | 363 |
16 files changed, 333 insertions, 426 deletions
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 5b2cb9fe39b..cbc21b79da8 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -237,10 +237,13 @@ def register_passes(engine, scene, srl): if srl.use_pass_environment: engine.register_pass(scene, srl, "Env", 3, "RGB", 'COLOR') crl = srl.cycles - if crl.pass_debug_bvh_traversed_nodes: engine.register_pass(scene, srl, "Debug BVH Traversed Nodes", 1, "X", 'VALUE') - if crl.pass_debug_bvh_traversed_instances: engine.register_pass(scene, srl, "Debug BVH Traversed Instances", 1, "X", 'VALUE') - if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE') - if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE') + if crl.pass_debug_render_time: engine.register_pass(scene, srl, "Debug Render Time", 1, "X", 'VALUE') + if crl.pass_debug_bvh_traversed_nodes: engine.register_pass(scene, srl, "Debug BVH Traversed Nodes", 1, "X", 'VALUE') + if crl.pass_debug_bvh_traversed_instances: engine.register_pass(scene, srl, "Debug BVH Traversed Instances", 1, "X", 'VALUE') + if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE') + if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE') + if crl.use_pass_volume_direct: engine.register_pass(scene, srl, "VolumeDir", 3, "RGB", 'COLOR') + if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR') cscene = scene.cycles if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine: diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 95a6e302cc5..365184c382d 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1183,6 +1183,24 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) + cls.pass_debug_render_time = BoolProperty( + name="Debug Render Time", + description="Render time in milliseconds per sample and pixel", + default=False, + update=update_render_passes, + ) + cls.use_pass_volume_direct = BoolProperty( + name="Volume Direct", + description="Deliver direct volumetric scattering pass", + default=False, + update=update_render_passes, + ) + cls.use_pass_volume_indirect = BoolProperty( + name="Volume Indirect", + description="Deliver indirect volumetric scattering pass", + default=False, + update=update_render_passes, + ) cls.use_denoising = BoolProperty( name="Use Denoising", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 9c8855d0165..af16de70a46 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -526,6 +526,10 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel): row.prop(rl, "use_pass_subsurface_direct", text="Direct", toggle=True) row.prop(rl, "use_pass_subsurface_indirect", text="Indirect", toggle=True) row.prop(rl, "use_pass_subsurface_color", text="Color", toggle=True) + col.label(text="Volume:") + row = col.row(align=True) + row.prop(crl, "use_pass_volume_direct", text="Direct", toggle=True) + row.prop(crl, "use_pass_volume_indirect", text="Indirect", toggle=True) col.separator() col.prop(rl, "use_pass_emit", text="Emission") @@ -537,8 +541,9 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel): sub.active = crl.use_denoising sub.prop(crl, "denoising_store_passes", text="Denoising") + col = layout.column() + col.prop(crl, "pass_debug_render_time") if _cycles.with_cycles_debug: - col = layout.column() col.prop(crl, "pass_debug_bvh_traversed_nodes") col.prop(crl, "pass_debug_bvh_traversed_instances") col.prop(crl, "pass_debug_bvh_intersections") diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index c428516deec..f29d7754dcf 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -481,11 +481,13 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT); MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT); MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT); + MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT); MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT); MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT); MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT); MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT); + MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT); MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR); MAP_PASS("GlossCol", PASS_GLOSSY_COLOR); @@ -503,6 +505,7 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS); MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); #endif + MAP_PASS("Debug Render Time", PASS_RENDER_TIME); #undef MAP_PASS return PASS_NONE; @@ -589,6 +592,18 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, Pass::add(PASS_RAY_BOUNCES, passes); } #endif + if(get_boolean(crp, "pass_debug_render_time")) { + b_engine.add_pass("Debug Render Time", 1, "X", b_srlay.name().c_str()); + Pass::add(PASS_RENDER_TIME, passes); + } + if(get_boolean(crp, "use_pass_volume_direct")) { + b_engine.add_pass("VolumeDir", 3, "RGB", b_srlay.name().c_str()); + Pass::add(PASS_VOLUME_DIRECT, passes); + } + if(get_boolean(crp, "use_pass_volume_indirect")) { + b_engine.add_pass("VolumeInd", 3, "RGB", b_srlay.name().c_str()); + Pass::add(PASS_VOLUME_INDIRECT, passes); + } return passes; } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index ce02a5a932e..999b9230d29 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -689,6 +689,8 @@ public: void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { + scoped_timer timer(&tile.buffers->render_time); + float *render_buffer = (float*)tile.buffer; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 35b451c7a9c..c9e1fcad8f2 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1436,6 +1436,8 @@ public: void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles) { + scoped_timer timer(&rtile.buffers->render_time); + if(have_error()) return; diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 575ab73330e..ef39cfb5f7d 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -59,6 +59,8 @@ public: void path_trace(RenderTile& rtile, int sample) { + scoped_timer timer(&rtile.buffers->render_time); + /* Cast arguments to cl types. */ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 1073cfa6bf6..2d819080674 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -138,6 +138,8 @@ public: while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); + scoped_timer timer(&tile.buffers->render_time); + split_kernel->path_trace(task, tile, kgbuffer, diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 366f25422fd..7c1b2a015e1 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -187,7 +187,6 @@ ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) L->color_glossy = make_float3(0.0f, 0.0f, 0.0f); L->color_transmission = make_float3(0.0f, 0.0f, 0.0f); L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->color_scatter = make_float3(0.0f, 0.0f, 0.0f); L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 6bed73ad459..29451b6b8b6 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -170,19 +170,19 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, PathRadiance *L) { int flag = kernel_data.film.pass_flag; - if(flag & PASS_BVH_TRAVERSED_NODES) { + if(flag & PASSMASK(BVH_TRAVERSED_NODES)) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, L->debug_data.num_bvh_traversed_nodes); } - if(flag & PASS_BVH_TRAVERSED_INSTANCES) { + if(flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, L->debug_data.num_bvh_traversed_instances); } - if(flag & PASS_BVH_INTERSECTIONS) { + if(flag & PASSMASK(BVH_INTERSECTIONS)) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, L->debug_data.num_bvh_intersections); } - if(flag & PASS_RAY_BOUNCES) { + if(flag & PASSMASK(RAY_BOUNCES)) { kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, L->debug_data.num_ray_bounces); } @@ -199,8 +199,9 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl return; int flag = kernel_data.film.pass_flag; + int light_flag = kernel_data.film.light_pass_flag; - if(!(flag & PASS_ALL)) + if(!((flag | light_flag) & PASS_ANY)) return; if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { @@ -209,29 +210,29 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { if(state->sample == 0) { - if(flag & PASS_DEPTH) { + if(flag & PASSMASK(DEPTH)) { float depth = camera_distance(kg, sd->P); kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); } - if(flag & PASS_OBJECT_ID) { + if(flag & PASSMASK(OBJECT_ID)) { float id = object_pass_id(kg, sd->object); kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); } - if(flag & PASS_MATERIAL_ID) { + if(flag & PASSMASK(MATERIAL_ID)) { float id = shader_pass_id(kg, sd); kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); } } - if(flag & PASS_NORMAL) { + if(flag & PASSMASK(NORMAL)) { float3 normal = shader_bsdf_average_normal(kg, sd); kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); } - if(flag & PASS_UV) { + if(flag & PASSMASK(UV)) { float3 uv = primitive_uv(kg, sd); kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); } - if(flag & PASS_MOTION) { + if(flag & PASSMASK(MOTION)) { float4 speed = primitive_motion_vector(kg, sd); kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); @@ -241,16 +242,16 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl } } - if(flag & (PASS_DIFFUSE_INDIRECT|PASS_DIFFUSE_COLOR|PASS_DIFFUSE_DIRECT)) + if(light_flag & PASSMASK_COMPONENT(DIFFUSE)) L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput; - if(flag & (PASS_GLOSSY_INDIRECT|PASS_GLOSSY_COLOR|PASS_GLOSSY_DIRECT)) + if(light_flag & PASSMASK_COMPONENT(GLOSSY)) L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput; - if(flag & (PASS_TRANSMISSION_INDIRECT|PASS_TRANSMISSION_COLOR|PASS_TRANSMISSION_DIRECT)) + if(light_flag & PASSMASK_COMPONENT(TRANSMISSION)) L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput; - if(flag & (PASS_SUBSURFACE_INDIRECT|PASS_SUBSURFACE_COLOR|PASS_SUBSURFACE_DIRECT)) + if(light_flag & PASSMASK_COMPONENT(SUBSURFACE)) L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput; - if(flag & PASS_MIST) { + if(light_flag & PASSMASK(MIST)) { /* bring depth into 0..1 range */ float mist_start = kernel_data.film.mist_start; float mist_inv_depth = kernel_data.film.mist_inv_depth; @@ -280,49 +281,53 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L) { #ifdef __PASSES__ - int flag = kernel_data.film.pass_flag; + int light_flag = kernel_data.film.light_pass_flag; if(!kernel_data.film.use_light_pass) return; - if(flag & PASS_DIFFUSE_INDIRECT) + if(light_flag & PASSMASK(DIFFUSE_INDIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse); - if(flag & PASS_GLOSSY_INDIRECT) + if(light_flag & PASSMASK(GLOSSY_INDIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy); - if(flag & PASS_TRANSMISSION_INDIRECT) + if(light_flag & PASSMASK(TRANSMISSION_INDIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission); - if(flag & PASS_SUBSURFACE_INDIRECT) + if(light_flag & PASSMASK(SUBSURFACE_INDIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface); - if(flag & PASS_DIFFUSE_DIRECT) + if(light_flag & PASSMASK(VOLUME_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter); + if(light_flag & PASSMASK(DIFFUSE_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); - if(flag & PASS_GLOSSY_DIRECT) + if(light_flag & PASSMASK(GLOSSY_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy); - if(flag & PASS_TRANSMISSION_DIRECT) + if(light_flag & PASSMASK(TRANSMISSION_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission); - if(flag & PASS_SUBSURFACE_DIRECT) + if(light_flag & PASSMASK(SUBSURFACE_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface); + if(light_flag & PASSMASK(VOLUME_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter); - if(flag & PASS_EMISSION) + if(light_flag & PASSMASK(EMISSION)) kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); - if(flag & PASS_BACKGROUND) + if(light_flag & PASSMASK(BACKGROUND)) kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background); - if(flag & PASS_AO) + if(light_flag & PASSMASK(AO)) kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao); - if(flag & PASS_DIFFUSE_COLOR) + if(light_flag & PASSMASK(DIFFUSE_COLOR)) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse); - if(flag & PASS_GLOSSY_COLOR) + if(light_flag & PASSMASK(GLOSSY_COLOR)) kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy); - if(flag & PASS_TRANSMISSION_COLOR) + if(light_flag & PASSMASK(TRANSMISSION_COLOR)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission); - if(flag & PASS_SUBSURFACE_COLOR) + if(light_flag & PASSMASK(SUBSURFACE_COLOR)) kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); - if(flag & PASS_SHADOW) { + if(light_flag & PASSMASK(SHADOW)) { float4 shadow = L->shadow; shadow.w = kernel_data.film.pass_shadow_scale; kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow); } - if(flag & PASS_MIST) + if(light_flag & PASSMASK(MIST)) kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist); #endif } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 207ba741e6f..791c25c6553 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -140,7 +140,7 @@ ccl_device_forceinline void kernel_path_background( L->transparent += average(throughput); #ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) + if(!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND))) #endif /* __PASSES__ */ return; } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 5b8b760c48c..c4a9b3f4aa3 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -368,43 +368,59 @@ typedef enum ClosureLabel { /* Render Passes */ +#define PASS_NAME_JOIN(a, b) a ## _ ## b +#define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32)) + +#define PASSMASK_COMPONENT(comp) (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | \ + PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \ + PASSMASK(PASS_NAME_JOIN(comp, COLOR))) + typedef enum PassType { PASS_NONE = 0, - PASS_COMBINED = (1 << 0), - PASS_DEPTH = (1 << 1), - PASS_NORMAL = (1 << 2), - PASS_UV = (1 << 3), - PASS_OBJECT_ID = (1 << 4), - PASS_MATERIAL_ID = (1 << 5), - PASS_DIFFUSE_COLOR = (1 << 6), - PASS_GLOSSY_COLOR = (1 << 7), - PASS_TRANSMISSION_COLOR = (1 << 8), - PASS_DIFFUSE_INDIRECT = (1 << 9), - PASS_GLOSSY_INDIRECT = (1 << 10), - PASS_TRANSMISSION_INDIRECT = (1 << 11), - PASS_DIFFUSE_DIRECT = (1 << 12), - PASS_GLOSSY_DIRECT = (1 << 13), - PASS_TRANSMISSION_DIRECT = (1 << 14), - PASS_EMISSION = (1 << 15), - PASS_BACKGROUND = (1 << 16), - PASS_AO = (1 << 17), - PASS_SHADOW = (1 << 18), - PASS_MOTION = (1 << 19), - PASS_MOTION_WEIGHT = (1 << 20), - PASS_MIST = (1 << 21), - PASS_SUBSURFACE_DIRECT = (1 << 22), - PASS_SUBSURFACE_INDIRECT = (1 << 23), - PASS_SUBSURFACE_COLOR = (1 << 24), - PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */ + + /* Main passes */ + PASS_COMBINED = 1, + PASS_DEPTH, + PASS_NORMAL, + PASS_UV, + PASS_OBJECT_ID, + PASS_MATERIAL_ID, + PASS_MOTION, + PASS_MOTION_WEIGHT, #ifdef __KERNEL_DEBUG__ - PASS_BVH_TRAVERSED_NODES = (1 << 26), - PASS_BVH_TRAVERSED_INSTANCES = (1 << 27), - PASS_BVH_INTERSECTIONS = (1 << 28), - PASS_RAY_BOUNCES = (1 << 29), + PASS_BVH_TRAVERSED_NODES, + PASS_BVH_TRAVERSED_INSTANCES, + PASS_BVH_INTERSECTIONS, + PASS_RAY_BOUNCES, #endif + PASS_RENDER_TIME, + PASS_CATEGORY_MAIN_END = 31, + + PASS_MIST = 32, + PASS_EMISSION, + PASS_BACKGROUND, + PASS_AO, + PASS_SHADOW, + PASS_LIGHT, /* no real pass, used to force use_light_pass */ + PASS_DIFFUSE_DIRECT, + PASS_DIFFUSE_INDIRECT, + PASS_DIFFUSE_COLOR, + PASS_GLOSSY_DIRECT, + PASS_GLOSSY_INDIRECT, + PASS_GLOSSY_COLOR, + PASS_TRANSMISSION_DIRECT, + PASS_TRANSMISSION_INDIRECT, + PASS_TRANSMISSION_COLOR, + PASS_SUBSURFACE_DIRECT, + PASS_SUBSURFACE_INDIRECT, + PASS_SUBSURFACE_COLOR, + PASS_VOLUME_DIRECT, + PASS_VOLUME_INDIRECT, + /* No Scatter color since it's tricky to define what it would even mean. */ + PASS_CATEGORY_LIGHT_END = 63, } PassType; -#define PASS_ALL (~0) +#define PASS_ANY (~0) typedef enum DenoisingPassOffsets { DENOISING_PASS_NORMAL = 0, @@ -509,7 +525,6 @@ typedef ccl_addr_space struct PathRadiance { float3 color_glossy; float3 color_transmission; float3 color_subsurface; - float3 color_scatter; float3 direct_diffuse; float3 direct_glossy; @@ -1179,6 +1194,7 @@ static_assert_align(KernelCamera, 16); typedef struct KernelFilm { float exposure; int pass_flag; + int light_pass_flag; int pass_stride; int use_light_pass; @@ -1201,11 +1217,13 @@ typedef struct KernelFilm { int pass_glossy_indirect; int pass_transmission_indirect; int pass_subsurface_indirect; + int pass_volume_indirect; int pass_diffuse_direct; int pass_glossy_direct; int pass_transmission_direct; int pass_subsurface_direct; + int pass_volume_direct; int pass_emission; int pass_background; @@ -1215,7 +1233,6 @@ typedef struct KernelFilm { int pass_shadow; float pass_shadow_scale; int filter_table_offset; - int pass_pad2; int pass_mist; float mist_start; @@ -1225,7 +1242,8 @@ typedef struct KernelFilm { int pass_denoising_data; int pass_denoising_clean; int denoising_flags; - int pad; + + int pad1, pad2, pad3; #ifdef __KERNEL_DEBUG__ int pass_bvh_traversed_nodes; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index d76eaf793b4..7d4c3325a15 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -116,7 +116,7 @@ RenderTile::RenderTile() RenderBuffers::RenderBuffers(Device *device) : buffer(device, "RenderBuffers", MEM_READ_WRITE), - map_neighbor_copied(false) + map_neighbor_copied(false), render_time(0.0f) { } @@ -264,6 +264,12 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int } } #endif + else if(type == PASS_RENDER_TIME) { + float val = (float) (1000.0 * render_time/(params.width * params.height * sample)); + for(int i = 0; i < size; i++, pixels++) { + pixels[0] = val; + } + } else { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { float f = *in; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 9fa0cdd4e27..028bfb83735 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -75,6 +75,7 @@ public: /* float buffer */ device_vector<float> buffer; bool map_neighbor_copied; + double render_time; explicit RenderBuffers(Device *device); ~RenderBuffers(); diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index c8213d258d5..6c8c929c2f9 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -85,83 +85,81 @@ void Pass::add(PassType type, array<Pass>& passes) pass.components = 1; pass.filter = false; break; - case PASS_DIFFUSE_COLOR: - case PASS_GLOSSY_COLOR: - case PASS_TRANSMISSION_COLOR: - case PASS_SUBSURFACE_COLOR: - pass.components = 4; - break; - case PASS_DIFFUSE_INDIRECT: + + case PASS_EMISSION: + case PASS_BACKGROUND: pass.components = 4; pass.exposure = true; - pass.divide_type = PASS_DIFFUSE_COLOR; break; - case PASS_GLOSSY_INDIRECT: + case PASS_AO: pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_GLOSSY_COLOR; break; - case PASS_TRANSMISSION_INDIRECT: + case PASS_SHADOW: pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_TRANSMISSION_COLOR; + pass.exposure = false; break; - case PASS_SUBSURFACE_INDIRECT: + case PASS_LIGHT: + /* This isn't a real pass, used by baking to see whether + * light data is needed or not. + * + * Set components to 0 so pass sort below happens in a + * determined way. + */ + pass.components = 0; + break; +#ifdef WITH_CYCLES_DEBUG + case PASS_BVH_TRAVERSED_NODES: + case PASS_BVH_TRAVERSED_INSTANCES: + case PASS_BVH_INTERSECTIONS: + case PASS_RAY_BOUNCES: + pass.components = 1; + pass.exposure = false; + break; +#endif + case PASS_RENDER_TIME: + /* This pass is handled entirely on the host side. */ + pass.components = 0; + break; + + case PASS_DIFFUSE_COLOR: + case PASS_GLOSSY_COLOR: + case PASS_TRANSMISSION_COLOR: + case PASS_SUBSURFACE_COLOR: pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_SUBSURFACE_COLOR; break; case PASS_DIFFUSE_DIRECT: + case PASS_DIFFUSE_INDIRECT: pass.components = 4; pass.exposure = true; pass.divide_type = PASS_DIFFUSE_COLOR; break; case PASS_GLOSSY_DIRECT: + case PASS_GLOSSY_INDIRECT: pass.components = 4; pass.exposure = true; pass.divide_type = PASS_GLOSSY_COLOR; break; case PASS_TRANSMISSION_DIRECT: + case PASS_TRANSMISSION_INDIRECT: pass.components = 4; pass.exposure = true; pass.divide_type = PASS_TRANSMISSION_COLOR; break; case PASS_SUBSURFACE_DIRECT: + case PASS_SUBSURFACE_INDIRECT: pass.components = 4; pass.exposure = true; pass.divide_type = PASS_SUBSURFACE_COLOR; break; - - case PASS_EMISSION: - case PASS_BACKGROUND: + case PASS_VOLUME_DIRECT: + case PASS_VOLUME_INDIRECT: pass.components = 4; pass.exposure = true; break; - case PASS_AO: - pass.components = 4; - break; - case PASS_SHADOW: - pass.components = 4; - pass.exposure = false; - break; - case PASS_LIGHT: - /* This isn't a real pass, used by baking to see whether - * light data is needed or not. - * - * Set components to 0 so pass sort below happens in a - * determined way. - */ - pass.components = 0; - break; -#ifdef WITH_CYCLES_DEBUG - case PASS_BVH_TRAVERSED_NODES: - case PASS_BVH_TRAVERSED_INSTANCES: - case PASS_BVH_INTERSECTIONS: - case PASS_RAY_BOUNCES: - pass.components = 1; - pass.exposure = false; + + default: + assert(false); break; -#endif } passes.push_back_slow(pass); @@ -318,7 +316,19 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) for(size_t i = 0; i < passes.size(); i++) { Pass& pass = passes[i]; - kfilm->pass_flag |= pass.type; + + if(pass.type == PASS_NONE) + continue; + + int pass_flag = (1 << (pass.type % 32)); + if(pass.type <= PASS_CATEGORY_MAIN_END) { + kfilm->pass_flag |= pass_flag; + } + else { + assert(pass.type <= PASS_CATEGORY_LIGHT_END); + kfilm->use_light_pass = 1; + kfilm->light_pass_flag |= pass_flag; + } switch(pass.type) { case PASS_COMBINED: @@ -327,10 +337,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_DEPTH: kfilm->pass_depth = kfilm->pass_stride; break; - case PASS_MIST: - kfilm->pass_mist = kfilm->pass_stride; - kfilm->use_light_pass = 1; - break; case PASS_NORMAL: kfilm->pass_normal = kfilm->pass_stride; break; @@ -349,74 +355,67 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_MATERIAL_ID: kfilm->pass_material_id = kfilm->pass_stride; break; + + case PASS_MIST: + kfilm->pass_mist = kfilm->pass_stride; + break; + case PASS_EMISSION: + kfilm->pass_emission = kfilm->pass_stride; + break; + case PASS_BACKGROUND: + kfilm->pass_background = kfilm->pass_stride; + break; + case PASS_AO: + kfilm->pass_ao = kfilm->pass_stride; + break; + case PASS_SHADOW: + kfilm->pass_shadow = kfilm->pass_stride; + break; + + case PASS_LIGHT: + break; + case PASS_DIFFUSE_COLOR: kfilm->pass_diffuse_color = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_GLOSSY_COLOR: kfilm->pass_glossy_color = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_TRANSMISSION_COLOR: kfilm->pass_transmission_color = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_SUBSURFACE_COLOR: kfilm->pass_subsurface_color = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_DIFFUSE_INDIRECT: kfilm->pass_diffuse_indirect = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_GLOSSY_INDIRECT: kfilm->pass_glossy_indirect = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_TRANSMISSION_INDIRECT: kfilm->pass_transmission_indirect = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_SUBSURFACE_INDIRECT: kfilm->pass_subsurface_indirect = kfilm->pass_stride; - kfilm->use_light_pass = 1; + break; + case PASS_VOLUME_INDIRECT: + kfilm->pass_volume_indirect = kfilm->pass_stride; break; case PASS_DIFFUSE_DIRECT: kfilm->pass_diffuse_direct = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_GLOSSY_DIRECT: kfilm->pass_glossy_direct = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_TRANSMISSION_DIRECT: kfilm->pass_transmission_direct = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; case PASS_SUBSURFACE_DIRECT: kfilm->pass_subsurface_direct = kfilm->pass_stride; - kfilm->use_light_pass = 1; break; - - case PASS_EMISSION: - kfilm->pass_emission = kfilm->pass_stride; - kfilm->use_light_pass = 1; - break; - case PASS_BACKGROUND: - kfilm->pass_background = kfilm->pass_stride; - kfilm->use_light_pass = 1; - break; - case PASS_AO: - kfilm->pass_ao = kfilm->pass_stride; - kfilm->use_light_pass = 1; - break; - case PASS_SHADOW: - kfilm->pass_shadow = kfilm->pass_stride; - kfilm->use_light_pass = 1; - break; - - case PASS_LIGHT: - kfilm->use_light_pass = 1; + case PASS_VOLUME_DIRECT: + kfilm->pass_volume_direct = kfilm->pass_stride; break; #ifdef WITH_CYCLES_DEBUG @@ -433,8 +432,11 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_ray_bounces = kfilm->pass_stride; break; #endif + case PASS_RENDER_TIME: + break; - case PASS_NONE: + default: + assert(false); break; } diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c index f832b356ffe..ebf699c2428 100644 --- a/intern/mikktspace/mikktspace.c +++ b/intern/mikktspace/mikktspace.c @@ -447,305 +447,132 @@ typedef struct { int index; } STmpVert; -static const int g_iCells = 2048; -static const float g_iCells_fl = 2048.0f; +static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); -#ifdef _MSC_VER -# define NOINLINE __declspec(noinline) -#else -# define NOINLINE __attribute__ ((noinline)) -#endif +typedef unsigned int uint; -// it is IMPORTANT that this function is called to evaluate the hash since -// inlining could potentially reorder instructions and generate different -// results for the same effective input value fVal. -static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) +static uint float_as_uint(const float v) { - const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin)); - const int iIndex = (int)fIndex; - return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); + return *((uint*)(&v)); } -static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in); -static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries); -static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +#define HASH(x, y, z) (((x) * 73856093) ^ ((y) * 19349663) ^ ((z) * 83492791)) +#define HASH_F(x, y, z) HASH(float_as_uint(x), float_as_uint(y), float_as_uint(z)) -static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +/* Sort comp and data based on comp. + * comp2 and data2 are used as temporary storage. */ +static void radixsort_pair(uint *comp, int *data, uint *comp2, int *data2, int n) { + int shift = 0; + for(int pass = 0; pass < 4; pass++, shift+=8) { + int bins[257] = {0}; + /* Count number of elements per bin. */ + for(int i = 0; i < n; i++) { + bins[((comp[i] >> shift) & 0xff) + 1]++; + } + /* Compute prefix sum to find position of each bin in the sorted array. */ + for(int i = 2; i < 256; i++) { + bins[i] += bins[i-1]; + } + /* Insert the elements in their correct location based on their bin. */ + for(int i = 0; i < n; i++) { + int pos = bins[(comp[i] >> shift) & 0xff]++; + comp2[pos] = comp[i]; + data2[pos] = data[i]; + } - // Generate bounding box - int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL; - STmpVert * pTmpVert = NULL; - int i=0, iChannel=0, k=0, e=0; - int iMaxCount=0; - SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; - float fMin, fMax; - for (i=1; i<(iNrTrianglesIn*3); i++) - { - const int index = piTriList_in_and_out[i]; - - const SVec3 vP = GetPosition(pContext, index); - if (vMin.x > vP.x) vMin.x = vP.x; - else if (vMax.x < vP.x) vMax.x = vP.x; - if (vMin.y > vP.y) vMin.y = vP.y; - else if (vMax.y < vP.y) vMax.y = vP.y; - if (vMin.z > vP.z) vMin.z = vP.z; - else if (vMax.z < vP.z) vMax.z = vP.z; + /* Swap arrays. */ + int *tmpdata = data; data = data2; data2 = tmpdata; + uint *tmpcomp = comp; comp = comp2; comp2 = tmpcomp; } +} - vDim = vsub(vMax,vMin); - iChannel = 0; - fMin = vMin.x; fMax=vMax.x; - if (vDim.y>vDim.x && vDim.y>vDim.z) - { - iChannel=1; - fMin = vMin.y; - fMax = vMax.y; - } - else if (vDim.z>vDim.x) - { - iChannel=2; - fMin = vMin.z; - fMax = vMax.z; - } +/* Merge identical vertices. + * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 values. + * Then, by sorting based on that hash, identical elements (having identical hashes) will be moved next to each other. + * Since there might be hash collisions, the elements of each block are then compared with each other and duplicates + * are merged. + */ +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + int numVertices = iNrTrianglesIn*3; - // make allocations - piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn); - piHashCount = (int *) malloc(sizeof(int)*g_iCells); - piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); - piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); + uint *hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *indices = (int*) malloc(sizeof(int)*numVertices); + uint *temp_hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *temp_indices = (int*) malloc(sizeof(int)*numVertices); + + if(hashes == NULL || indices == NULL || temp_hashes == NULL || temp_indices == NULL) { + free(hashes); + free(indices); + free(temp_hashes); + free(temp_indices); - if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) - { - if (piHashTable!=NULL) free(piHashTable); - if (piHashCount!=NULL) free(piHashCount); - if (piHashOffsets!=NULL) free(piHashOffsets); - if (piHashCount2!=NULL) free(piHashCount2); GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); return; } - memset(piHashCount, 0, sizeof(int)*g_iCells); - memset(piHashCount2, 0, sizeof(int)*g_iCells); - // count amount of elements in each cell unit - for (i=0; i<(iNrTrianglesIn*3); i++) - { + for (int i = 0; i < numVertices; i++) { const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); - const int iCell = FindGridCell(fMin, fMax, fVal); - ++piHashCount[iCell]; - } - // evaluate start index of each cell. - piHashOffsets[0]=0; - for (k=1; k<g_iCells; k++) - piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1]; - - // insert vertices - for (i=0; i<(iNrTrianglesIn*3); i++) - { - const int index = piTriList_in_and_out[i]; const SVec3 vP = GetPosition(pContext, index); - const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); - const int iCell = FindGridCell(fMin, fMax, fVal); - int * pTable = NULL; - - assert(piHashCount2[iCell]<piHashCount[iCell]); - pTable = &piHashTable[piHashOffsets[iCell]]; - pTable[piHashCount2[iCell]] = i; // vertex i has been inserted. - ++piHashCount2[iCell]; - } - for (k=0; k<g_iCells; k++) - assert(piHashCount2[k] == piHashCount[k]); // verify the count - free(piHashCount2); - - // find maximum amount of entries in any hash entry - iMaxCount = piHashCount[0]; - for (k=1; k<g_iCells; k++) - if (iMaxCount<piHashCount[k]) - iMaxCount=piHashCount[k]; - pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount); + const uint hashP = HASH_F(vP.x, vP.y, vP.z); + const SVec3 vN = GetNormal(pContext, index); + const uint hashN = HASH_F(vN.x, vN.y, vN.z); - // complete the merge - for (k=0; k<g_iCells; k++) - { - // extract table of cell k and amount of entries in it - int * pTable = &piHashTable[piHashOffsets[k]]; - const int iEntries = piHashCount[k]; - if (iEntries < 2) continue; + const SVec3 vT = GetTexCoord(pContext, index); + const uint hashT = HASH_F(vT.x, vT.y, vT.z); - if (pTmpVert!=NULL) - { - for (e=0; e<iEntries; e++) - { - int i = pTable[e]; - const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]); - pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y; - pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i; - } - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1); - } - else - MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries); + hashes[i] = HASH(hashP, hashN, hashT); + indices[i] = i; } - if (pTmpVert!=NULL) { free(pTmpVert); } - free(piHashTable); - free(piHashCount); - free(piHashOffsets); -} - -static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in) -{ - // make bbox - int c=0, l=0, channel=0; - float fvMin[3], fvMax[3]; - float dx=0, dy=0, dz=0, fSep=0; - for (c=0; c<3; c++) - { fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; } - for (l=(iL_in+1); l<=iR_in; l++) { - for (c=0; c<3; c++) { - if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; - if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c]; + radixsort_pair(hashes, indices, temp_hashes, temp_indices, numVertices); + + free(temp_hashes); + free(temp_indices); + + /* Process blocks of vertices with the same hash. + * Vertices in the block might still be separate, but we know for sure that + * vertices in different blocks will never be identical. */ + int blockstart = 0; + while (blockstart < numVertices) { + /* Find end of this block (exclusive). */ + uint hash = hashes[blockstart]; + int blockend = blockstart+1; + for(; blockend < numVertices; blockend++) { + if(hashes[blockend] != hash) break; } - } - - dx = fvMax[0]-fvMin[0]; - dy = fvMax[1]-fvMin[1]; - dz = fvMax[2]-fvMin[2]; - - channel = 0; - if (dy>dx && dy>dz) channel=1; - else if (dz>dx) channel=2; - - fSep = 0.5f*(fvMax[channel]+fvMin[channel]); - // stop if all vertices are NaNs - if (!isfinite(fSep)) - return; - - // terminate recursion when the separation/average value - // is no longer strictly between fMin and fMax values. - if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) - { - // complete the weld - for (l=iL_in; l<=iR_in; l++) - { - int i = pTmpVert[l].index; - const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const SVec3 vN = GetNormal(pContext, index); - const SVec3 vT = GetTexCoord(pContext, index); - - tbool bNotFound = TTRUE; - int l2=iL_in, i2rec=-1; - while (l2<l && bNotFound) - { - const int i2 = pTmpVert[l2].index; - const int index2 = piTriList_in_and_out[i2]; - const SVec3 vP2 = GetPosition(pContext, index2); - const SVec3 vN2 = GetNormal(pContext, index2); - const SVec3 vT2 = GetTexCoord(pContext, index2); - i2rec=i2; - - //if (vP==vP2 && vN==vN2 && vT==vT2) - if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z && - vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z && - vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z) - bNotFound = TFALSE; - else - ++l2; - } - - // merge if previously found - if (!bNotFound) - piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; - } - } - else - { - int iL=iL_in, iR=iR_in; - assert((iR_in-iL_in)>0); // at least 2 entries - - // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] - while (iL < iR) - { - tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; - while ((!bReadyLeftSwap) && iL<iR) - { - assert(iL>=iL_in && iL<=iR_in); - bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep); - if (!bReadyLeftSwap) ++iL; - } - while ((!bReadyRightSwap) && iL<iR) - { - assert(iR>=iL_in && iR<=iR_in); - bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if (!bReadyRightSwap) --iR; - } - assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) ); - - if (bReadyLeftSwap && bReadyRightSwap) - { - const STmpVert sTmp = pTmpVert[iL]; - assert(iL<iR); - pTmpVert[iL] = pTmpVert[iR]; - pTmpVert[iR] = sTmp; - ++iL; --iR; + for(int i = blockstart; i < blockend; i++) { + int index1 = piTriList_in_and_out[indices[i]]; + const SVec3 vP = GetPosition(pContext, index1); + const SVec3 vN = GetNormal(pContext, index1); + const SVec3 vT = GetTexCoord(pContext, index1); + for(int i2 = i+1; i2 < blockend; i2++) { + int index2 = piTriList_in_and_out[indices[i2]]; + if(index1 == index2) continue; + + if(veq(vP, GetPosition(pContext, index2)) && + veq(vN, GetNormal(pContext, index2)) && + veq(vT, GetTexCoord(pContext, index2))) + { + piTriList_in_and_out[indices[i2]] = index1; + /* Once i2>i has been identified as a duplicate, we can stop since any + * i3>i2>i that is a duplicate of i (and therefore also i2) will also be + * compared to i2 and therefore be identified there anyways. */ + break; + } } } - assert(iL==(iR+1) || (iL==iR)); - if (iL==iR) - { - const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if (bReadyRightSwap) ++iL; - else --iR; - } - - // only need to weld when there is more than 1 instance of the (x,y,z) - if (iL_in < iR) - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR); // weld all left of fSep - if (iL < iR_in) - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in); // weld all right of (or equal to) fSep + /* Advance to next block. */ + blockstart = blockend; } -} - -static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries) -{ - // this can be optimized further using a tree structure or more hashing. - int e=0; - for (e=0; e<iEntries; e++) - { - int i = pTable[e]; - const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const SVec3 vN = GetNormal(pContext, index); - const SVec3 vT = GetTexCoord(pContext, index); - tbool bNotFound = TTRUE; - int e2=0, i2rec=-1; - while (e2<e && bNotFound) - { - const int i2 = pTable[e2]; - const int index2 = piTriList_in_and_out[i2]; - const SVec3 vP2 = GetPosition(pContext, index2); - const SVec3 vN2 = GetNormal(pContext, index2); - const SVec3 vT2 = GetTexCoord(pContext, index2); - i2rec = i2; - - if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) - bNotFound = TFALSE; - else - ++e2; - } - - // merge if previously found - if (!bNotFound) - piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; - } + free(hashes); + free(indices); } static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) |